Python parse_sentence示例，data_util.parse_sentence Python示例

示例#1

0

显示文件

文件： model_util.py 项目： h0m3brew/SummerProject

def crazy2_get_feed(path,
                    batch_size,
                    word_to_id,
                    max_premise_length,
                    max_hypothesis_length,
                    num_iter=None,
                    shuffle=False):
    data, _, _ = gd.process_data(1.0)
    premises = []
    premise_lengths = []
    hypotheses = []
    hypothesis_lengths = []
    labels = []
    with open(path + "1256", 'r') as f:
        lines = f.readlines()
        if shuffle:
            random.shuffle(lines)
        for line in lines:
            example = json.loads(line)
            if " and " in example["sentence1"] or " or " in example[
                    "sentence1"] or " then " in example["sentence1"]:
                prem = du.parse_sentence(
                    data, example["sentence1"]
                )[0].emptystring + " " + du.parse_sentence(
                    data, example["sentence1"])[1] + " " + du.parse_sentence(
                        data, example["sentence1"])[2].emptystring
                hyp = du.parse_sentence(
                    data, example["sentence2"]
                )[0].emptystring + " " + du.parse_sentence(
                    data, example["sentence2"])[1] + " " + du.parse_sentence(
                        data, example["sentence2"])[2].emptystring
                premises.append(
                    sentence_to_id(prem, word_to_id, max_premise_length))
                premise_lengths.append(len(prem.split()))
                hypotheses.append(
                    sentence_to_id(hyp, word_to_id, max_hypothesis_length))
                hypothesis_lengths.append(len(hyp.split()))
            else:
                sentence1 = example["sentence1"]
                sentence2 = example["sentence2"]
                premises.append(
                    sentence_to_id(sentence1, word_to_id, max_premise_length))
                premise_lengths.append(len(sentence1.split()))
                hypotheses.append(
                    sentence_to_id(sentence2, word_to_id,
                                   max_hypothesis_length))
                hypothesis_lengths.append(len(sentence2.split()))
            labels.append(
                [label_to_num(example["gold_label"][i]) for i in range(12)])
            if num_iter is not None and len(labels) > num_iter * batch_size:
                break
    if num_iter is None:
        num_iter = int(math.ceil(len(labels) / batch_size))
    for i in range(num_iter):
        yield (np.array(premises[i * batch_size:(i + 1) * batch_size]),
               np.array(premise_lengths[i * batch_size:(i + 1) * batch_size]),
               np.array(hypotheses[i * batch_size:(i + 1) * batch_size]),
               np.array(hypothesis_lengths[i * batch_size:(i + 1) *
                                           batch_size]),
               np.array(labels[i * batch_size:(i + 1) * batch_size]), 1256)

示例#2

0

显示文件

文件： interface.py 项目： h0m3brew/SummerProject

import natural_logic_model as nlm
import data_util
import generate_data as gd

data, _, _ = gd.process_data(1.0)
print(
    "Input a premise sentence and hypothesis sentence of the form:\n Determiner (Adjective) Noun (does not) Verb Determiner Adjective Noun \n Make sure you conjugate to the present tense and use vocabulary from the files in the Data folder\n You can also combine two simple sentences of that form with: or, and, if...then"
)
while True:
    premise = data_util.parse_sentence(data,
                                       input("Enter a premise sentence:\n"))
    while premise == None:
        premise = data_util.parse_sentence(
            data,
            input(
                "There was some issue with the entered premise\n Enter a premise sentence:\n"
            ))
    hypothesis = data_util.parse_sentence(
        data, input("Enter a hypothesis sentence:\n"))
    while hypothesis == None:
        hypothesis = data_util.parse_sentence(
            data,
            input(
                "There was some issue with the entered premise\n Enter a premise sentence:\n"
            ))
    if len(premise) == 1:
        label = nlm.get_label(
            nlm.compute_simple_relation(premise[0], hypothesis[0]))
    else:
        label = nlm.get_label(
            nlm.compute_boolean_relation(premise[0], premise[1], premise[2],

示例#3

0

显示文件

文件： make_subphrase_labels.py 项目： hansonhl/MultiplyQuantifiedData

         example2["gold_label"] = "equivalence"
     elif example["sentence2"].split(
     )[i] == "emptystring" and example["sentence2"].split()[
             i + 1] == example["sentence1"].split()[i + 1]:
         example2["gold_label"] = "entails"
     elif example["sentence1"].split(
     )[i] == "emptystring" and example["sentence2"].split()[
             i + 1] == example["sentence1"].split()[i + 1]:
         example2["gold_label"] = "reverse entails"
     else:
         example2["gold_label"] = "independence"
     label.append(example2["gold_label"])
 example5 = dict()
 example5["sentence1"] = adjoin(example["sentence1"].split()[-5:])
 example5["sentence2"] = adjoin(example["sentence2"].split()[-5:])
 premise = du.parse_sentence(data, example["sentence1"])[0]
 hypothesis = du.parse_sentence(data, example["sentence2"])[0]
 verb_relation = nlm.standard_lexical_merge(premise.verb,
                                            hypothesis.verb)
 adverb_relation = nlm.standard_lexical_merge(
     premise.adverb, hypothesis.adverb)
 object_negation_signature = nlm.negation_merge(
     premise.object_negation, hypothesis.object_negation)
 object_determiner_signature = nlm.determiner_merge(
     premise.natlog_object_determiner,
     hypothesis.natlog_object_determiner)
 object_noun_relation = nlm.standard_lexical_merge(
     premise.object_noun, hypothesis.object_noun)
 object_adjective_relation = nlm.standard_lexical_merge(
     premise.object_adjective, hypothesis.object_adjective)
 VP_relation = nlm.standard_phrase(adverb_relation, verb_relation)

示例#4

0

显示文件

 with open("simple_solutions", "r") as f:
     simple_solutions = json.loads(f.read())
 for encoding in simple_solutions:
     encoding = json.loads(encoding)
     premise, hypothesis = gd.encoding_to_example(data, encoding)
     if gd.example_to_encoding(premise, hypothesis) != encoding:
         print("We have a problem with the simple encoding")
     nlm_label = nlm.get_label(
         nlm.compute_simple_relation(premise, hypothesis))
     if simple_solutions[json.dumps(encoding)] != nlm_label:
         print("We have a problem with the simple file")
 print("simple file is good")
 with open("boolean_solutions", "r") as f:
     boolean_solutions = json.loads(f.read())
 simple1 = [
     (data_util.parse_sentence(data, "some wizard eats some flute")[0],
      data_util.parse_sentence(data, "some wizard eats some flute")[0])
 ]
 simple1.append(
     (data_util.parse_sentence(data,
                               "every wizard eats every flute")[0],
      data_util.parse_sentence(data, "some wizard eats some flute")[0]))
 simple1.append(
     (data_util.parse_sentence(data, "some wizard eats some flute")[0],
      data_util.parse_sentence(data,
                               "every wizard eats every flute")[0]))
 simple1.append(
     (data_util.parse_sentence(data, "no wizard eats some flute")[0],
      data_util.parse_sentence(data,
                               "some wizard eats every flute")[0]))
 simple1.append(

示例#5

0

显示文件

文件： model_util.py 项目： h0m3brew/SummerProject

def crazy_get_feed(path,
                   batch_size,
                   word_to_id,
                   max_premise_length,
                   max_hypothesis_length,
                   num_iter=None,
                   shuffle=False):
    data, _, _ = gd.process_data(1.0)
    premises = [[], [], [], [], []]
    premise_lengths = [[], [], [], [], []]
    hypotheses = [[], [], [], [], []]
    hypothesis_lengths = [[], [], [], [], []]
    labels = [[], [], [], [], []]
    for i, type in enumerate(["", "1", "2", "5", "6"]):
        with open(path + type, 'r') as f:
            lines = f.readlines()
            if shuffle:
                random.shuffle(lines)
            for line in lines:
                example = json.loads(line)
                if " and " in example["sentence1"] or " or " in example[
                        "sentence1"] or " then " in example["sentence1"]:
                    prem = du.parse_sentence(
                        data, example["sentence1"]
                    )[0].emptystring + " " + du.parse_sentence(
                        data,
                        example["sentence1"])[1] + " " + du.parse_sentence(
                            data, example["sentence1"])[2].emptystring
                    hyp = du.parse_sentence(
                        data, example["sentence2"]
                    )[0].emptystring + " " + du.parse_sentence(
                        data,
                        example["sentence2"])[1] + " " + du.parse_sentence(
                            data, example["sentence2"])[2].emptystring
                    premises.append(
                        sentence_to_id(prem, word_to_id, max_premise_length))
                    premise_lengths.append(len(prem.split()))
                    hypotheses.append(
                        sentence_to_id(hyp, word_to_id, max_hypothesis_length))
                    hypothesis_lengths.append(len(hyp.split()))
                else:
                    sentence1 = example["sentence1"]
                    sentence2 = example["sentence2"]
                    premises[i].append(
                        sentence_to_id(sentence1, word_to_id,
                                       max_premise_length))
                    premise_lengths[i].append(len(sentence1.split()))
                    hypotheses[i].append(
                        sentence_to_id(sentence2, word_to_id,
                                       max_hypothesis_length))
                    hypothesis_lengths[i].append(len(sentence2.split()))
                labels[i].append(label_to_num(example["gold_label"]))
                if num_iter is not None and len(
                        labels) > num_iter * batch_size:
                    break
    if num_iter is None:
        num_iter = int(math.ceil(len(labels[0]) / batch_size))
    batches = []
    for i in range(num_iter):
        for j in range(5):
            batches.append((i, j))
    lengths = {0: 9, 1: 1, 2: 2, 3: 5, 4: 6}
    random.shuffle(batches)
    random.shuffle(batches)
    random.shuffle(batches)
    for i, j in batches:
        yield (np.array(premises[j % 5][i * batch_size:(i + 1) * batch_size]),
               np.array(premise_lengths[j % 5][i * batch_size:(i + 1) *
                                               batch_size]),
               np.array(hypotheses[j % 5][i * batch_size:(i + 1) *
                                          batch_size]),
               np.array(hypothesis_lengths[j % 5][i * batch_size:(i + 1) *
                                                  batch_size]),
               np.array(labels[j % 5][i * batch_size:(i + 1) * batch_size]),
               lengths[j])

示例#6

0

显示文件

文件： test.py 项目： hansonhl/MultiplyQuantifiedData

         premise, hypothesis = gd.encoding_to_example(data,encoding)
         if gd.example_to_encoding(premise,hypothesis) != encoding:
             print("We have a problem with the simple encoding")
         nlm_label = nlm.get_label(nlm.compute_simple_relation(premise, hypothesis))
         if convert(simple_solutions[json.dumps(encoding)]) != nlm_label:
             print("We have a problem with the simple file")
     print("simple file is good")
 examples = gd.generate_balanced_data("simple_solutions", "boolean_solutions", 100, 0, data,simple_sampling = "level 2", boolean_sampling = "level 1")
 gd.save_data(examples, "test")
 examples = []
 with open("test", "r") as f:
     lines = f.readlines()
     for line in lines:
         examples.append(json.loads(line))
 for example in examples:
     premise = data_util.parse_sentence(data,example["sentence1"])
     hypothesis = data_util.parse_sentence(data,example["sentence2"])
     if len(premise) == 1:
         fol_label = fol.get_label(premise[0], hypothesis[0])
         nlm_label = nlm.get_label(nlm.compute_simple_relation(premise[0], hypothesis[0]))
         if example["gold_label"] != fol_label or fol_label != nlm_label:
             print(example["gold_label"] , fol_label,nlm_label)
             print("We have a problem with simple generation")
     else:
         premise1 = premise[0]
         premise_conjunction = premise[1]
         premise2 = premise[2]
         hypothesis1 = hypothesis[0]
         hypothesis_conjunction = hypothesis[1]
         hypothesis2 = hypothesis[2]
         nlm_label = nlm.get_label(nlm.compute_boolean_relation(premise1, premise_conjunction, premise2, hypothesis1, hypothesis_conjunction, hypothesis2))