#!/bin/bash
# Uses jmdict http://jmdict.sourceforge.net/
# Uses kakasi (required as part of Anki)
SENTENCE_SOURCE=$1
WORDS="$(mktemp)"
UNIQ="$(mktemp)"
SENTENCES="$(mktemp)"
EXAMPLES="$(mktemp)"
TMP="$(mktemp)"
cat "$SENTENCE_SOURCE" | iconv -c -f UTF-8 -t EUCJP | mecab | iconv -c -f EUCJP -t UTF-8 \
| sed '/EOS/d' | sed '/^$/d' > $WORDS
cat $WORDS | sort | uniq > $UNIQ
cat "$SENTENCE_SOURCE" | sed 's/。/\n/g' > $SENTENCES
cat /dev/stdin | while read WORD
do
if [ "$WORD" != "*" ]
then
READING=$(echo $WORD | iconv -c -f UTF-8 -t EUCJP| kakasi -JH | iconv -c -f EUCJP -t UTF-8)
DEFINITION=$(jmdict -j $WORD | grep -v "match(es) found" | sed "s:) (.*$:):" | sed -e '{:q;N;s:\n:
:g;t q}')
MEANING=$(echo $DEFINITION | sed 's:.*1) ::' | sed 's: $EXAMPLES
grep ",$WORD," $UNIQ | sed 's/\t.*$//' | while read CONJUGATION
do
grep --color=always $CONJUGATION $SENTENCES >> $EXAMPLES
done
cat $EXAMPLES | sort | uniq | awk '{print length, $0}' | sort -n > $TMP
cat $TMP | awk '$1 >= 50' > $EXAMPLES
cat $TMP | awk '$1 < 50' >> $EXAMPLES
cat $EXAMPLES | awk '{$1=""; print $0 }' | head -n 5 | while read EXAMPLE
do
echo -e -n "\t$EXAMPLE" | sed 's:\[01;31m\[K::g' | sed 's:\[m\[K::g'
done
echo
fi
done