Exemplo n.º 1
0
def test_date_check():
    parse = "(S (NP (NNP CARL ) (NN XVI ) (NNP GUSTAF ) ) )"

    test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate("20150813"))
    phrase = test.tree.children[0]
    assert phrase.get_meaning() == ["SWEGOV"]

    test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate( "19720813"))
    phrase = test.tree.children[0]
    assert phrase.get_meaning() == ["SWEELI"]

    test = ptree.Sentence(parse,"Carl XVI Gustaf", PETRreader.dstr_to_ordate("19010813"))
    phrase = test.tree.children[0]
    assert phrase.get_meaning() == ["SWEELI"]
def test_reflexive():
    parse = "(S (NP (NNP Obama ) )  (VP (VBD asked ) (NP (PRP himself ) )  (SBAR (WHADVP (WRB why ) ) (S (NP (NNP Biden ) ) (VP (VBD was ) (ADJP (VBN tired ) ) ) ) ) ) )".upper(
    )

    test = ptree.Sentence(parse, "Obama asked himself why Biden was tired",
                          PETRreader.dstr_to_ordate("20150813"))
    phrase = test.tree.children[1].children[1]
    assert phrase.get_meaning() == ["USAGOV"]
def test_noun_meaning3():
    parse = "(S (NP (NP (NNP BARACK ) (NNP OBAMA ) ) (CC AND ) (NP (NNP VLADIMIR ) (NNP PUTIN ) ) ) )"

    test = ptree.Sentence(parse, "Barack Obama and Vladimir Putin", "081315")

    phrase = test.tree.children[0]

    assert sorted(phrase.get_meaning()) == sorted(["USAGOV", "RUSGOV"])
def test_reflexive2():
    parse = "(S (NP (NNP Obama ) ) (VP (VBD knew ) (SBAR (IN that ) (S (NP (NNP Putin ) ) (VP (VBD liked ) (NP (PRP himself ) ) ) ) ) )  ) ".upper(
    )

    test = ptree.Sentence(parse, "Obama knew that Biden liked him",
                          PETRreader.dstr_to_ordate("20150813"))
    phrase = test.tree.children[1].children[1].children[1].children[
        1].children[1]
    assert phrase.get_meaning() == ["RUSGOV"]
def test_personal1():
    parse = "(S (NP (NNP Obama ) ) (VP (VBD said ) (SBAR (S (NP (PRP he ) ) (VP (VBD was ) (ADJP (VBN tired ) ) ) ) ) ) ) ".upper(
    )

    print('This is a test')
    test = ptree.Sentence(parse, "Obama said he was tired",
                          PETRreader.dstr_to_ordate("20150813"))
    phrase = test.tree.children[1].children[1].children[0].children[0]
    assert phrase.get_meaning() == ["USAGOV"]
def test_noun_meaning5():
    parse = "(S (NP (NP (DT THE ) (NNP US ) (NN COMMANDER ) ) (PP (IN IN ) (NP (NNP IRAQ ) ) ) ) )"

    test = ptree.Sentence(parse, "The US commander in Iraq", "081315")

    phrase = test.tree.children[0]
    test.tree.print_to_stdout("")
    assert phrase.get_meaning() == ['USAMIL']
    assert phrase.get_head()[0] == "COMMANDER"
def test_noun_meaning4():
    parse = "(S (NP (DT THE ) (NNP REBELS ) (PP (IN FROM ) (NP (NNP SYRIA ) ) ) ) )"

    test = ptree.Sentence(parse, "The rebels from Syria", "081315")

    phrase = test.tree.children[0]

    assert phrase.get_meaning() == ['SYRREB']
    assert phrase.get_head()[0] == "REBELS"
    assert phrase.get_head()[1] == phrase
def test_prepmeaning():
    parse = "(S (PP (IN TO ) (NP (DT THE ) (JJ TURKISH ) (NN MARKET )  ) ) )"

    test = ptree.Sentence(parse, "to the market", "081315")

    phrase = test.tree.children[0]

    assert phrase.get_meaning() == ['TUR']
    assert phrase.head == "MARKET"
    assert phrase.get_prep() == "TO"
def test_noun_meaning1():
    parse = "(S (NP (DT THE ) (JJ ISLAMIC ) (NN STATE ) ) "

    test = ptree.Sentence(parse, "The Islamic State", "081315")

    phrase = test.tree.children[0]

    head, headphrase = phrase.get_head()

    assert head == "STATE"
    assert headphrase == phrase
    assert phrase.get_meaning() == ["IMGMUSISI"]
Exemplo n.º 10
0
def test_noun_meaning2():
    parse = "(S (NP (DT THE ) (JJ NORTH ) (NN ATLANTIC ) (NN TREATY ) (NN ORGANIZATION ) ) )  "

    test = ptree.Sentence(parse,"The North Atlantic Treaty Organization", "081315")

    phrase = test.tree.children[0]

    head, headphrase = phrase.get_head()

    assert head== "ORGANIZATION"
    assert headphrase == phrase
    assert phrase.get_meaning() == ["IGOWSTNAT"]
Exemplo n.º 11
0
def do_coding(event_dict):
    """
    Main coding loop Note that entering any character other than 'Enter' at the
    prompt will stop the program: this is deliberate.
    <14.02.28>: Bug: PETRglobals.PauseByStory actually pauses after the first
                sentence of the *next* story
    """

    treestr = ""

    NStory = 0
    NSent = 0
    NEvents = 0
    NEmpty = 0
    NDiscardSent = 0
    NDiscardStory = 0

    logger = logging.getLogger('petr_log')
    times = 0
    sents = 0
    for key, val in sorted(event_dict.items()):
        NStory += 1
        prev_code = []

        SkipStory = False
        print('\n\nProcessing story {}'.format(key))
        StoryDate = event_dict[key]['meta']['date']
        for sent in val['sents']:
            NSent += 1
            if 'parsed' in event_dict[key]['sents'][sent]:
                if 'config' in val['sents'][sent]:
                    for _, config in event_dict[key]['sents'][sent][
                            'config'].items():
                        change_Config_Options(config)

                SentenceID = '{}_{}'.format(key, sent)
                SentenceText = event_dict[key]['sents'][sent]['content']
                SentenceDate = event_dict[key]['sents'][sent][
                    'date'] if 'date' in event_dict[key]['sents'][
                        sent] else StoryDate
                Date = PETRreader.dstr_to_ordate(SentenceDate)

                print("\n", SentenceID)
                parsed = event_dict[key]['sents'][sent]['parsed']
                treestr = parsed
                disc = check_discards(SentenceText)
                if disc[0] > 0:
                    if disc[0] == 1:
                        print("Discard sentence:", disc[1])
                        logger.info('\tSentence discard. {}'.format(disc[1]))
                        NDiscardSent += 1
                        continue
                    else:
                        print("Discard story:", disc[1])
                        logger.info('\tStory discard. {}'.format(disc[1]))
                        SkipStory = True
                        NDiscardStory += 1
                        break

                t1 = time.time()
                sentence = PETRtree.Sentence(treestr, SentenceText, Date)
                print(sentence.txt)
                # this is the entry point into the processing in PETRtree
                coded_events, meta = sentence.get_events()
                code_time = time.time() - t1
                if PETRglobals.NullVerbs or PETRglobals.NullActors:
                    event_dict[key]['meta'] = meta
                    event_dict[key]['text'] = sentence.txt
                elif PETRglobals.NullActors:
                    event_dict[key]['events'] = coded_events
                    coded_events = None  # skips additional processing
                    event_dict[key]['text'] = sentence.txt
                else:
                    # 16.04.30 pas: we're using the key value 'meta' at two
                    # very different
                    event_dict[key]['meta']['verbs'] = meta
                    # levels of event_dict -- see the code about ten lines below -- and
                    # this is potentially confusing, so it probably would be useful to
                    # change one of those

                del (sentence)
                times += code_time
                sents += 1
                # print('\t\t',code_time)

                if coded_events:
                    event_dict[key]['sents'][sent]['events'] = coded_events
                    event_dict[key]['sents'][sent]['meta'] = meta
                    #print('DC-events:', coded_events) # --
                    #print('DC-meta:', meta) # --
                    #print('+++',event_dict[key]['sents'][sent])  # --
                    if PETRglobals.WriteActorText or PETRglobals.WriteEventText or PETRglobals.WriteActorRoot:
                        text_dict = utilities.extract_phrases(
                            event_dict[key]['sents'][sent], SentenceID)
                        # --                        print('DC-td1:',text_dict) # --
                        if text_dict:
                            event_dict[key]['sents'][sent]['meta'][
                                'actortext'] = {}
                            event_dict[key]['sents'][sent]['meta'][
                                'eventtext'] = {}
                            event_dict[key]['sents'][sent]['meta'][
                                'actorroot'] = {}
                            # --                            print('DC1:',text_dict) # --
                            for evt in coded_events:
                                if evt in text_dict:  # 16.04.30 pas bypasses problems with expansion of compounds
                                    event_dict[key]['sents'][sent]['meta'][
                                        'actortext'][evt] = text_dict[evt][:2]
                                    event_dict[key]['sents'][sent]['meta'][
                                        'eventtext'][evt] = text_dict[evt][2]
                                    event_dict[key]['sents'][sent]['meta'][
                                        'actorroot'][evt] = text_dict[evt][3:5]

                if coded_events and PETRglobals.IssueFileName != "":
                    event_issues = get_issues(SentenceText)
                    if event_issues:
                        event_dict[key]['sents'][sent]['issues'] = event_issues

                if PETRglobals.PauseBySentence:
                    if len(input("Press Enter to continue...")) > 0:
                        sys.exit()

                prev_code = coded_events
                NEvents += len(coded_events)
                if len(coded_events) == 0:
                    NEmpty += 1
            else:
                logger.info(
                    '{} has no parse information. Passing.'.format(SentenceID))
                pass

        if SkipStory:
            event_dict[key]['sents'] = None

    print("\nSummary:")
    print("Stories read:", NStory, "   Sentences coded:", NSent,
          "  Events generated:", NEvents)
    print("Discards:  Sentence", NDiscardSent, "  Story", NDiscardStory,
          "  Sentences without events:", NEmpty)
    print("Average Coding time = ", times / sents if sents else 0)
    # --    print('DC-exit:',event_dict)
    return event_dict