Пример #1
0
		keyword = sys.argv[2]
	else:
		csvFile = 'trainingandtestdata/testdata.csv'

	tweetlist = loadTwitterCSV(csvFile)
	#tweetlist = loadTwitterCSV('trainingandtestdata/training.1600000.processed.noemoticon.csv')
	tweetlist = chunked(tweetlist, size)
else:
	tweetlist = None # tweetlist must be defined

tweetlist_chunk = comm.scatter(tweetlist, root=0)
#print rank, 'has data:', tweetlist_chunk

sentiments = {}
for tweet in tweetlist_chunk:
	sentiment = classifier.classify_tweet(tweet['text'])
	tweetDate = tweet['date'].replace('PDT ','')
	tweetDate = tweetDate.replace('UTC ','')
	tweetDate = tweetDate.replace('GMT ','')
	date = datetime.strptime(tweetDate, '%a %b %d %H:%M:%S %Y')
	dayDate = date.strftime('%Y%m%d')

	if sentiment == 'pos':
		sentimentValue = 1
	else:
		sentimentValue = -1

	if not dayDate in sentiments:
		sentiments[dayDate] = sentimentValue
	else:
		sentiments[dayDate] += sentimentValue
Пример #2
0
	tweetlist = loadTwitterCSV('trainingandtestdata/testdata.csv')
	labeld_features = label_feats_from_tweets(tweetlist)
	#training_set, test_set = split_label_feats(labeld_features)

	tweetlist = loadTwitterCSV('trainingandtestdata/training.1600000.processed.noemoticon.csv')
	training_set = label_feats_from_tweets(tweetlist)
	training_set, garbage = split_label_feats(training_set, 1.0)
	test_set, garbage = split_label_feats(labeld_features, 1.0)

	print "training set length: %i  test set length: %i" % (len(training_set), len(test_set))
	#print prettifyFeatureSet(test_set)
	
	if args.algo == 'nb':
		classifier = NBSentimentClassifier().train(training_set)
		print "training NaiveBayes classifier..."
	else:
		classifier = MaxEntSentimentClassifier().train(training_set)
		print "training MaxEnt classifier..."

	print "calculating accuracy..."
	print 'accuracy:', classifier.test_accuracy(test_set)
	#classifier.show_most_informative_features(30)


	classifier.save_model()

	# load a serialized trained classifier
	#classifier = NBSentimentClassifier().load_model()
	#classifier = MaxEntSentimentClassifier().load_model()
	classifier.classify_tweet("Python rocks!!!", True)