Пример #1
0
    def read(self, fn):
        """
        Read a tabbed format line
        Each line consists of:
        sent, prob, pred, arg1, arg2, ...
        """
        d = {}
        ex_index = 0
        with open(fn) as fin:
            for line in fin:
                if not line.strip():
                    continue
                data = line.strip().split('\t')
                try:
                    text, confidence, rel = data[:3]
                except:
                    continue
                curExtraction = Extraction(
                    pred=rel,
                    head_pred_index=None,
                    sent=text,
                    confidence=float(confidence),
                    question_dist=
                    "./question_distributions/dist_wh_sbj_obj1.json",
                    index=ex_index)
                ex_index += 1

                for arg in data[3:]:
                    curExtraction.addArg(arg)

                d[text] = d.get(text, []) + [curExtraction]
        self.oie = d
Пример #2
0
    def read(self, fn):
        d = defaultdict(lambda: [])
        with open(fn) as fin:
            for line_ind, line in enumerate(fin):
                data = line.strip().split('\t')
                text, rel = data[:2]
                args = data[2:]
                confidence = 1

                curExtraction = Extraction(pred=rel,
                                           head_pred_index=None,
                                           sent=text,
                                           confidence=float(confidence),
                                           index=line_ind)
                for arg in args:
                    curExtraction.addArg(arg)

                d[text].append(curExtraction)
        self.oie = d
Пример #3
0
    def read(self, fn):
        d = {}
        with open(fn) as fin:
            for line in fin:
                data = line.strip().split('\t')
                if len(data) >= 4:
                    arg1 = data[3]
                    rel = data[2]
                    arg_else = data[4:]
                    confidence = data[1]
                    text = data[0]

                    curExtraction = Extraction(pred=rel,
                                               head_pred_index=-1,
                                               sent=text,
                                               confidence=float(confidence))
                    curExtraction.addArg(arg1)
                    for arg in arg_else:
                        curExtraction.addArg(arg)
                    d[text] = d.get(text, []) + [curExtraction]
        self.oie = d
Пример #4
0
    def read(self, fn):
        d = defaultdict(lambda: [])
        with open(fn) as fin:
            data = json.load(fin)
        for sentence in data:
            tuples = data[sentence]
            for t in tuples:
                if t["pred"].strip() == "<be>":
                    rel = "[is]"
                else:
                    rel = t["pred"].replace("<be> ", "")
                confidence = 1

                curExtraction = Extraction(pred=rel,
                                           head_pred_index=None,
                                           sent=sentence,
                                           confidence=float(confidence),
                                           index=None)
                if t["arg0"] != "":
                    curExtraction.addArg(t["arg0"])
                if t["arg1"] != "":
                    curExtraction.addArg(t["arg1"])
                if t["arg2"] != "":
                    curExtraction.addArg(t["arg2"])
                if t["arg3"] != "":
                    curExtraction.addArg(t["arg3"])
                if t["temp"] != "":
                    curExtraction.addArg(t["temp"])
                if t["loc"] != "":
                    curExtraction.addArg(t["loc"])

                d[sentence].append(curExtraction)
        self.oie = d