Пример #1
0
def continueIsIntransitive(my_split, tagged_tokens):
    if (fl.isIntransitive(my_split, tagged_tokens)
            and tagged_tokens[int(my_split[5]) + 1][0] != 'that'
        ) or tagged_tokens[int(my_split[5]) - 1][0] == 'road':
        return (True)
    elif fl.hasPattern(my_split[7].lower(),
                       'continu[a-z]+\s[a-z]*(west|east|north|south)[a-z]*\s'):
        print(my_split[7])
        return (True)
    else:
        return (False)
Пример #2
0
lineList = [
    line for line in open(r'results_desire.tsv', 'r', encoding='utf-8')
]

f = open(r"sample_desire.tsv", "w", encoding='utf-8')

filtered_lines = []
for s in range(1, len(lineList)):
    my_split = lineList[s].split('\t')
    my_tokens = my_split[7].split(' ')
    tagged_tokens = nltk.pos_tag(my_tokens)
    formlist = ['desire', 'desired']

    if not (fl.isGerund(my_split) or fl.hasGerundAfter(my_split, my_tokens)
            or fl.isIntransitive(my_split, tagged_tokens)
            or fl.hasToAfterVerb(my_split, my_tokens) or fl.isAdjectiveOrNoun(
                my_split, my_tokens, tagged_tokens, formlist)
            or tagged_tokens[int(my_split[5]) + 1][1].startswith('VB') or
            (my_tokens[int(my_split[3]) + 1].lower() == 'to'
             and tagged_tokens[int(my_split[3]) + 2][1] == 'VB')):

        filtered_lines.append(lineList[s])

n = 0
sampled_numbers = []
while n < 100:
    s = random.randint(1, len(filtered_lines))
    if not s in sampled_numbers:
        sampled_numbers.append(s)
        my_split = filtered_lines[s].split('\t')
Пример #3
0
        return(False)
        
def dobjTooFarFromVerb(my_split):
    if int(my_split[3]) - int(my_split[5]) >=10:
        
        return(True)
    else:
        return(False)
        
lineList = [line for line in open(r'results_try.tsv', 'r', encoding='utf-8')]

f = open(r"sample_try.tsv", "w",encoding='utf-8')

filtered_lines = []
for s in range (1,len(lineList)):
    my_split = lineList[s].split('\t')
    my_tokens = my_split[7].split(' ')
    tagged_tokens = nltk.pos_tag(my_tokens)
    formlist = ['tried','try','trying','tries']
    
    if not (my_split[2][0].isupper() or dobjTooFarFromVerb(my_split) or dobjIsGerund(my_split) or fl.hasPattern(my_split[7].lower(), "tr(i|y)[^\s]*\s(to|and)\s") or fl.hasGerundAfter(my_split, my_tokens) or fl.isAdjective(my_split, my_tokens,tagged_tokens, formlist) or fl.isPhrasalVerb(my_split,tagged_tokens) or fl.hasPattern(my_split[7], '\str(y|i)[^\s]*\s[^\s]*\s(luck|hand|best|while|lot|fortune|patience)\s') or my_tokens[int(my_split[5])+1] in ['out','again'] or my_split[1] in ['case','time','lot','fortune','day','year','patience','hand'] or tagged_tokens[int(my_split[3])+1][1] == 'RP' or tagged_tokens[int(my_split[5])+1][1].startswith('VB') or fl.isIntransitive(my_split,tagged_tokens) or 'Try Tag Rugby' in my_split[7]):
        filtered_lines.append(lineList[s])
    


sampleNumbers = random.sample(range(1, len(filtered_lines)), 100)
for s in range (len(filtered_lines)):
    if s in sampleNumbers:
        f.write(filtered_lines[s])
    
f.close()
Пример #4
0
import random
import re
import nltk
import sys
sys.path.insert(0,'..')
import filter_lines as fl

lineList = [line for line in open(r'results_start.tsv', 'r', encoding='utf-8')]

f = open(r"sample_start.tsv", "w",encoding='utf-8')

filtered_lines = []
for s in range (1,len(lineList)):
    my_split = lineList[s].split('\t')
    my_tokens = my_split[7].split(' ')
    tagged_tokens = nltk.pos_tag(my_tokens)
    formlist = ['starting']
    
    if not (fl.isGerund(my_split) or fl.isAdjective(my_split, my_tokens,tagged_tokens, formlist) or fl.isIntransitive(my_split, tagged_tokens) or fl.hasPattern(my_split[7].lower(), "start[^\s]+\sto\s") or fl.hasGerundAfter(my_split, my_tokens) or fl.isPhrasalVerb(my_split, tagged_tokens) or fl.hasOrdNumAfter(my_split, tagged_tokens) or fl.dobjIsOrdinalNumber(my_split, tagged_tokens)):
        filtered_lines.append(lineList[s])


sampleNumbers = random.sample(range(1, len(filtered_lines)), 100)
for s in range (len(filtered_lines)):
    if s in sampleNumbers:
        f.write(filtered_lines[s])
    
f.close()
© 2019 GitHub, Inc.
Пример #5
0
import random
import re
import nltk
import sys
sys.path.insert(0,'..')
import filter_lines as fl

lineList = [line for line in open(r'results_quit.tsv', 'r', encoding='utf-8')]

f = open(r"sample_quit.tsv", "w",encoding='utf-8')

filtered_lines = []
for s in range (1,len(lineList)):
    my_split = lineList[s].split('\t')
    my_tokens = my_split[7].split(' ')
    tagged_tokens = nltk.pos_tag(my_tokens)
    formlist = ['quit']
    
    if not (fl.isGerund(my_split) or fl.isAdjective(my_split, my_tokens,tagged_tokens, formlist) or fl.isIntransitive(my_split, tagged_tokens) or fl.hasGerundAfter(my_split, my_tokens) or fl.isPhrasalVerb(my_split, tagged_tokens) or fl.hasPattern(my_split[7].lower(), ' (a|the)[^a-zA-Z]*quit')):
        filtered_lines.append(lineList[s])


sampleNumbers = random.sample(range(1, len(filtered_lines)), 100)
for s in range (len(filtered_lines)):
    if s in sampleNumbers:
        f.write(filtered_lines[s])
    
f.close()
Пример #6
0
import sys

sys.path.insert(0, '..')
import filter_lines as fl

lineList = [line for line in open(r'results_begin.tsv', 'r', encoding='utf-8')]

f = open(r"sample_begin.tsv", "w", encoding='utf-8')

filtered_lines = []
for s in range(1, len(lineList)):
    my_split = lineList[s].split('\t')
    my_tokens = my_split[7].split(' ')
    tagged_tokens = nltk.pos_tag(my_tokens)
    formlist = ['begining']

    if not (fl.isGerund(my_split)
            or fl.isAdjective(my_split, my_tokens, tagged_tokens, formlist)
            or fl.isIntransitive(my_split, tagged_tokens) or fl.hasPattern(
                my_split[7].lower(), "(begin|began|begun)[^\s]*\sto\s")
            or fl.hasGerundAfter(my_split, my_tokens)
            or fl.hasOrdNumAfter(my_split, tagged_tokens)
            or fl.dobjIsOrdinalNumber(my_split, tagged_tokens)):
        filtered_lines.append(lineList[s])

sampleNumbers = random.sample(range(1, len(filtered_lines)), 100)
for s in range(len(filtered_lines)):
    if s in sampleNumbers:
        f.write(filtered_lines[s])

f.close()