def parse_sentence(line): str_len = len(line) chars = [] for idx in range(str_len): chars.append(line[idx]) bichars = [] for idx in range(str_len): if idx == 0: bichar = '-NULL-' + chars[idx] else: bichar = chars[idx - 1] + chars[idx] bichars.append(bichar) inst = Instance() inst.chars = chars inst.bichars = bichars return inst
def parse_conll(info): chars = [] gold_labels = [] for line in info: id, c, l = line.split("\t") chars.append(c) gold_labels.append(l) bichars = [] char_len = len(chars) for idx in range(char_len): if idx == 0: bichar = '-NULL-' + chars[idx] else: bichar = chars[idx - 1] + chars[idx] bichars.append(bichar) inst = Instance() inst.chars = chars inst.gold_labels = gold_labels return inst
def parse_sent(info): words = info.split(' ') chars = [] bichars = [] inst = Instance() for w in words: for c in w: chars.append(c) char_len = len(chars) for idx in range(char_len): if idx == 0: bichar = '-NULL-' + chars[idx] else: bichar = chars[idx - 1] + chars[idx] bichars.append(bichar) #bichars.append(chars[char_len - 1] + '</s>') inst.words = words inst.chars = chars inst.bichars = bichars return inst