def randomdep(path, cutoff): for d in depio.depread(path): n = 0 for w, p, h, l in d: if h != "-1" and random.random() < cutoff: d[n][2] = "-1" n += 1 depio.depprint(d)
def randomdep(path, cutoff): for d in depio.depread(path): n = 0 for w, p, h, l in d: if h != '-1' and random.random() < cutoff: d[n][2] = '-1' n += 1 depio.depprint(d)
def mergeconllpos(conll_path, pos_path): pos = posio.posread(pos_path) for sent in depio.depread(conll_path): sentpos = pos.next() assert len(sent) == len(sentpos) for n in range(len(sent)): # print sent[n][1], sentpos[n][0] assert sent[n][1] == sentpos[n][0] sent[n][4] = sentpos[n][1] depio.depprint(sent)
def mergeconllpos(conll_path, pos_path): pos = posio.posread(pos_path) for sent in depio.depread(conll_path): sentpos = pos.next() assert len(sent) == len(sentpos) for n in range(len(sent)): # print sent[n][1], sentpos[n][0] assert sent[n][1] == sentpos[n][0] sent[n][4] =sentpos[n][1] depio.depprint(sent)
if len(args) != 3: print "The alignment file must be provided with -ia" sys.exit(0) model = readModel(args[1], True) alignFile = readAlign(args[2]) elif sInput == 'c': model = None #readModel(args[1], False) else: print 'The input format is invalid' sys.exit(0) for tree in depio.depread(args[0]): if alignFile: align = alignFile.next() if align == None: depio.depprint(tree) assert align != None dept = dep.CDep(tree) reorder(dept, align, model, bDebug) if sOutput == 'd': print dept elif sOutput == 'p': print dept.toPOS() elif sOutput == 'r': print dept.toRaw() elif sOutput == 'i': print printOrder(dept) else: print "The output type is not valid" sys.exit(0) if alignFile:
import sys import depio import replace # how many changed N = 10000 r = replace.CReplace(sys.argv[2]) n = 0 for sent in depio.depread(sys.argv[1]): if n < N: for i in range(len(sent)): sent[i][0] = r.replace(sent[i][0]) n += 1 depio.depprint(sent)
if count>maxcnt: maxcnt = count print 'Overall', ':', maxcnt if __name__ == "__main__": if len(sys.argv) != 3: print "depop.py options input >output" sys.exit(1) option = sys.argv[1] input = sys.argv[2] if option == "remove": for sent in depio.depread(input): if len(sent) <= 1: continue index = random.randint(0, len(sent)-1) depio.depprint(removeword(sent, index)) if option == "headdepcount": counts = {} headdepcount(input, counts) for head, dep, head_pos, dep_pos, head_direction in counts: print head, head_pos, dep, dep_pos, head_direction, counts[(head, dep, head_pos, dep_pos, head_direction)] if option == 'maxleftmodifycount': if len(sys.argv) != 3: print 'depio.py maxleftmodifycount path' sys.exit(0) maxmodifycount(sys.argv[2], lambda x,y:x>y) if option == 'maxrightmodifycount': if len(sys.argv) != 3: print 'depio.py maxrightmodifycount path' sys.exit(0) maxmodifycount(sys.argv[2], lambda x,y:x<y)
maxcnt = count print 'Overall', ':', maxcnt if __name__ == "__main__": if len(sys.argv) != 3: print "depop.py options input >output" sys.exit(1) option = sys.argv[1] input = sys.argv[2] if option == "remove": for sent in depio.depread(input): if len(sent) <= 1: continue index = random.randint(0, len(sent) - 1) depio.depprint(removeword(sent, index)) if option == "headdepcount": counts = {} headdepcount(input, counts) for head, dep, head_pos, dep_pos, head_direction in counts: print head, head_pos, dep, dep_pos, head_direction, counts[( head, dep, head_pos, dep_pos, head_direction)] if option == 'maxleftmodifycount': if len(sys.argv) != 3: print 'depio.py maxleftmodifycount path' sys.exit(0) maxmodifycount(sys.argv[2], lambda x, y: x > y) if option == 'maxrightmodifycount': if len(sys.argv) != 3: print 'depio.py maxrightmodifycount path' sys.exit(0)
def malt2zpar(path): for sent in depio.depread(path): for word in sent: word[2] = str(int(word[2])-1) depio.depprint(sent)
def malt2zpar(path): for sent in depio.depread(path): for word in sent: word[2] = str(int(word[2]) - 1) depio.depprint(sent)