def read(self, fn): """ Read a tabbed format line Each line consists of: sent, prob, pred, arg1, arg2, ... """ d = {} ex_index = 0 with open(fn) as fin: for line in fin: if not line.strip(): continue data = line.strip().split('\t') try: text, confidence, rel = data[:3] except: continue curExtraction = Extraction( pred=rel, head_pred_index=None, sent=text, confidence=float(confidence), question_dist= "./question_distributions/dist_wh_sbj_obj1.json", index=ex_index) ex_index += 1 for arg in data[3:]: curExtraction.addArg(arg) d[text] = d.get(text, []) + [curExtraction] self.oie = d
def read(self, fn): d = defaultdict(lambda: []) with open(fn) as fin: for line_ind, line in enumerate(fin): data = line.strip().split('\t') text, rel = data[:2] args = data[2:] confidence = 1 curExtraction = Extraction(pred=rel, head_pred_index=None, sent=text, confidence=float(confidence), index=line_ind) for arg in args: curExtraction.addArg(arg) d[text].append(curExtraction) self.oie = d
def read(self, fn): d = {} with open(fn) as fin: for line in fin: data = line.strip().split('\t') if len(data) >= 4: arg1 = data[3] rel = data[2] arg_else = data[4:] confidence = data[1] text = data[0] curExtraction = Extraction(pred=rel, head_pred_index=-1, sent=text, confidence=float(confidence)) curExtraction.addArg(arg1) for arg in arg_else: curExtraction.addArg(arg) d[text] = d.get(text, []) + [curExtraction] self.oie = d
def read(self, fn): d = defaultdict(lambda: []) with open(fn) as fin: data = json.load(fin) for sentence in data: tuples = data[sentence] for t in tuples: if t["pred"].strip() == "<be>": rel = "[is]" else: rel = t["pred"].replace("<be> ", "") confidence = 1 curExtraction = Extraction(pred=rel, head_pred_index=None, sent=sentence, confidence=float(confidence), index=None) if t["arg0"] != "": curExtraction.addArg(t["arg0"]) if t["arg1"] != "": curExtraction.addArg(t["arg1"]) if t["arg2"] != "": curExtraction.addArg(t["arg2"]) if t["arg3"] != "": curExtraction.addArg(t["arg3"]) if t["temp"] != "": curExtraction.addArg(t["temp"]) if t["loc"] != "": curExtraction.addArg(t["loc"]) d[sentence].append(curExtraction) self.oie = d