#! /usr/bin/python -i class UDPipeEntry: def __init__(self,result): self.result=result if "\n" in result: t=[] for r in result.split("\n"): w=UDPipeEntry(r) if w.id>0: t.append(w) for i,w in enumerate(t): w.head=w if w.head==0 else t[i+w.head-w.id] self.tokens=t else: w=result.split("\t") try: w[0],w[6]=int(w[0]),int(w[6]) except: w=[0]*10 self.id,self.form,self.lemma,self.upos,self.xpos,self.feats,self.head,self.deprel,self.deps,self.misc=w if len(w)==10 else [0]*10 def __repr__(self): return self.result def __getitem__(self,item): return self.tokens[item] def __len__(self): return len(self.tokens) def browse(self): import webbrowser try: import urllib.parse u=urllib.parse.quote(str(self)) except: import urllib u=urllib.quote(str(self)) h="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/kyodokenkyu/ud-kanbun/conllusvg/viewer.svg" webbrowser.open(h+"#"+u) class UDPipe2UD: def __init__(self,lang="ja",option="tokenizer&tagger&parser"): self.parseURL="http://lindat.mff.cuni.cz/services/udpipe/api/process?model="+lang+"&"+option def __call__(self,sentence): import json try: import urllib.request,urllib.parse with urllib.request.urlopen(self.parseURL+"&data="+urllib.parse.quote(sentence)) as r: q=r.read() except: import urllib,urllib2 r=urllib2.urlopen(self.parseURL+"&data="+urllib.quote(sentence)) q=r.read().decode("utf-8") return UDPipeEntry(json.loads(q)["result"])