def split_words_in_tweets(): # f1 = open('conf_training_set.txt','r+') # f1 = open('input.txt','r+') tagged_tweet_list = [] f1 = open('tem_unlabelled_201_599.txt','r+') for id,tweet in enumerate(f1): id = id + 201 if len(tweet) > 0 and tweet != '': tagged_tweet = process_tweet(tweet) temporal_tagged_tweet,temporal_value = temporal_tagger_override.tag(tweet) if len(temporal_value) != 0: try: # print temporal_tagged_tweet temporal_tweet_value = temporal_tagger_override.ground(temporal_tagged_tweet,now()) tagged_tweet = tagged_tweet + " " + str(temporal_tweet_value) + "/TEMPORAL" except Exception as e: # print "Cannot convert to ISO time" + str(e) tagged_tweet = tagged_tweet + " " + ",".join(temporal_value) + "/TEMPORAL" else: tagged_tweet = tagged_tweet + " " + "NO" + "/TEMPORAL" tagged_tweet = "ID:"+str(id)+" " + tagged_tweet f2.write(tagged_tweet) f2.write("\n") tagged_tweet_list.append(tagged_tweet) return ",".join(tagged_tweet_list)
def split_words_in_tweets_i(tweet,id): if len(tweet) > 0 and tweet != '': tagged_tweet = process_tweet(tweet) temporal_tagged_tweet,temporal_value = temporal_tagger_override.tag(tweet) if len(temporal_value) != 0: try: # print temporal_tagged_tweet temporal_tweet_value = temporal_tagger_override.ground(temporal_tagged_tweet,now()) tagged_tweet = tagged_tweet + " " + str(temporal_tweet_value) + "/TEMPORAL" except Exception as e: # print "Cannot convert to ISO time" + str(e) tagged_tweet = tagged_tweet + " " + ",".join(temporal_value) + "/TEMPORAL" else: tagged_tweet = tagged_tweet + " " + "NO" + "/TEMPORAL" tagged_tweet = "ID:"+str(id)+" " + tagged_tweet return tagged_tweet