#! /usr/bin/nawk -f BEGIN{ u["接頭辞"]="NOUN"; u["代名詞"]="PRON"; u["連体詞"]="DET"; u["動詞"]="VERB"; u["形容詞"]=u["形状詞"]="ADJ"; u["副詞"]="ADV"; u["感動詞"]="INTJ"; u["助動詞"]="AUX"; u["接続詞"]="CCONJ"; u["補助記号"]="PUNCT"; u["記号"]=u["空白"]="SYM"; } { if($1=="EOS"){ n=0; printf("\n"); } else{ split($2,a,","); lemma=(a[8]>"")?a[8]:$1; xpos=a[1]"-"a[2]"-"a[3]"-"a[4]; gsub(/-\*/,"",xpos); if(a[1]=="名詞"){ if(a[2]=="固有名詞") upos="PROPN"; else if(a[2]=="数詞") upos="NUM"; else upos="NOUN"; } else if(a[1]=="助詞"){ if(a[2]=="終助詞") upos="PART"; else if(a[2]=="接続助詞") upos=(lemma=="て")?"SCONJ":"CCONJ"; else upos="ADP"; } else if(a[1]=="接尾辞") upos=(a[2]=="名詞的")?"NOUN":"PART"; else{ upos=u[a[1]]; if(upos=="") upos="X"; } misc="SpaceAfter=No"; if(a[11]>"") misc=misc"|Translit="a[11]; n++; printf("%d\t%s\t%s\t%s\t%s\t_\t_\t_\t_\t%s\n",n,$1,lemma,upos,xpos,misc); } }