def parse_all(): """ This method parses all used datasets whether they aren't already parsed. :return: None """ bn2wn = build_bn2wns() # if Eurosense parsed sentences doesn't exists if not os.path.isfile(EURO_SENTENCES): write_file(EURO_SENTENCES, parser_eurosense(bn2wn)) # if SEW parsed sentences doesn't exists if not os.path.isfile(SEW_SENTENCES): write_file(SEW_SENTENCES, parser_sew(bn2wn)) # if TOM parsed sentences doesn't exists if not os.path.isfile(TOM_SENTENCES): wns2bn = build_wns2bn() write_file(TOM_SENTENCES, parser_tom(wns2bn))
''' def write_csv(data, filename): with open(filename, 'wb') as csvfile: writer = csv.writer(csvfile) writer.writerows(data) if __name__ == '__main__': # get path for all outputs inp, urls, tags, replyats, sentiment, trends = txu.extract_arguments(n=7) tweets = twu.get_input(inp) tweets_text = "\n".join(tweets) # extract all URLs in the tweets and write in a file start = time.clock() txu.write_file('\n'.join(txu.get_urls(tweets_text)), urls) print "Time taken in extracting URLs: ", time.clock() - start # extract all hashtags in the tweets and write in a file start = time.clock() txu.write_file('\n'.join(txu.extract_hashtags(tweets_text)), tags) print "Time taken in extracting hashtags: ", time.clock() - start # extract all replyats in the tweets and write in a file start = time.clock() txu.write_file('\n'.join(txu.extract_replyat(tweets_text)), replyats) print "Time taken in extracting replyats: ", time.clock() - start # get sentiments in the tweets and write in a file start = time.clock() write_csv(st.get_sentiments(tweets), sentiment)
import time import text_utils as txu import twitter_utils as twu ''' Implementing the first feature which would produce the total count for each word in a file ft1.txt ''' # Calculate the time when the processing starts start = time.clock() inp, outp = txu.extract_arguments() # The mode of input can be a text file or twitter api json tweets = '\n'.join(twu.get_input(inp)) words = txu.extract_words(tweets) counter = txu.get_counter(words) txu.write_file("\n".join("{} \t\t\t\t\t {}".format(k, v) for k, v in sorted(dict(counter).items())),outp) # Calculate the time processing ends end = time.clock() # Print total time taken print "Total time taken in processing word count: ", end - start
import time import text_utils as txu import twitter_utils as twu ''' Implementing the first feature which would produce the total count for each word in a file ft1.txt ''' # Calculate the time when the processing starts start = time.clock() inp, outp = txu.extract_arguments() # The mode of input can be a text file or twitter api json tweets = '\n'.join(twu.get_input(inp)) words = txu.extract_words(tweets) counter = txu.get_counter(words) txu.write_file( "\n".join("{} \t\t\t\t\t {}".format(k, v) for k, v in sorted(dict(counter).items())), outp) # Calculate the time processing ends end = time.clock() # Print total time taken print "Total time taken in processing word count: ", end - start
def write_csv(data, filename): with open(filename, 'wb') as csvfile: writer = csv.writer(csvfile) writer.writerows(data) if __name__ == '__main__': # get path for all outputs inp, urls, tags, replyats, sentiment, trends = txu.extract_arguments(n=7) tweets = twu.get_input(inp) tweets_text = "\n".join(tweets) # extract all URLs in the tweets and write in a file start = time.clock() txu.write_file('\n'.join(txu.get_urls(tweets_text)), urls) print "Time taken in extracting URLs: ", time.clock() - start # extract all hashtags in the tweets and write in a file start = time.clock() txu.write_file('\n'.join(txu.extract_hashtags(tweets_text)), tags) print "Time taken in extracting hashtags: ", time.clock() - start # extract all replyats in the tweets and write in a file start = time.clock() txu.write_file('\n'.join(txu.extract_replyat(tweets_text)), replyats) print "Time taken in extracting replyats: ", time.clock() - start # get sentiments in the tweets and write in a file start = time.clock() write_csv(st.get_sentiments(tweets), sentiment)