from output_builder import OutputBuilder from variants_generation import PrimaryCandidates, SecondaryCandidates from candidate_selection import Selector file500 = '/Tweets/tweet-norm-dev500_annotated.txt' file100 = '/Tweets/tweet-norm-dev100_annotated.txt' outputpath = '/home/alangb/Escritorio/result100.txt' tweets_file = path.split(path.abspath(__file__))[0] + file100 splitter = Tw_Splitter(tweets_file) picker = OOVpicker(splitter.texts) classifier = OOVclassifier() primary = PrimaryCandidates(2) secondary = SecondaryCandidates() selector = Selector() output = OutputBuilder(outputpath) oovs = picker.OOV tokenized = picker.tokenized correct = defaultdict(dict) for tweet_id, tweet in oovs.items(): for j, sent in tweet.items(): # j is number of the sent for_prev = tokenized[tweet_id][j] correct[tweet_id][j] = [] for word, pos in sent: class_number = classifier.classify(word) # if class is variant if class_number == 0: IVcandidates = primary.generate(word) # if no primary candidates generated if len(IVcandidates) == 0: