示例#1
0
from output_builder import OutputBuilder
from variants_generation import PrimaryCandidates, SecondaryCandidates
from candidate_selection import Selector

file500 = '/Tweets/tweet-norm-dev500_annotated.txt'
file100 = '/Tweets/tweet-norm-dev100_annotated.txt'
outputpath = '/home/alangb/Escritorio/result100.txt'
tweets_file = path.split(path.abspath(__file__))[0] + file100

splitter = Tw_Splitter(tweets_file)
picker = OOVpicker(splitter.texts)
classifier = OOVclassifier()
primary = PrimaryCandidates(2)
secondary = SecondaryCandidates()
selector = Selector()
output = OutputBuilder(outputpath)
oovs = picker.OOV
tokenized = picker.tokenized
correct = defaultdict(dict)

for tweet_id, tweet in oovs.items():
    for j, sent in tweet.items():  # j is number of the sent
        for_prev = tokenized[tweet_id][j]
        correct[tweet_id][j] = []
        for word, pos in sent:
            class_number = classifier.classify(word)
            # if class is variant
            if class_number == 0:
                IVcandidates = primary.generate(word)
                # if no primary candidates generated
                if len(IVcandidates) == 0: