示例#1
0
def main():

	data_folder = 'data/sources/wikipedia'
	models_folder = 'classifier/models'
	save_loc = 'site/wikiclassify/wiki'

	if not os.path.exists(data_folder):
		os.makedirs(data_folder)

	if next(os.walk(data_folder))[1]:
		
		retrain = False
		if retrain:
			input, target, classes = data.sample(data_folder)
			model = classifier.build(input.shape, target.shape)
			classifier.train(model, input, target)
			classifier.save(models_folder, model, classes)

		else:
			model, classes = classifier.load(models_folder, sorted(os.listdir(models_folder))[-1])
		
		for root, dirs, files in os.walk(data_folder):
			for file in files:
				if not file.startswith('.'):
					with open(root+'/'+file) as f:
						input = data.str2mat(f.read())
						output = classifier.run(model, input)
示例#2
0
def retrieve_tweets():
    classif = train()
    # called when a new tweets is retrieved from streaming
    @receiver(streamer.tweet_retrieved)
    def my_callback(sender, **kwargs):
        tweet = kwargs['tweet']
        if tweet['coordinates']:
            tweet_id = tweet['id']
            user = tweet['user']['name'].encode('utf-8')
            lat = tweet['coordinates']['coordinates'][1]
            lng = tweet['coordinates']['coordinates'][0]
            text = tweet['text'].encode('utf-8') 
            if tweet_id and user and lat and lng and text:
                # classify tweets (tourism or nontourism)
                classification = classif.classify(feature_extractor_lda_tripadvisor_top_words_weights(text))
                # for tourism-related tweets, classify as being positive or negative
                if classification == 'tourism':
                    sentiment = sentiment_analyzer.classify(feature_extractor(tweet))
                    print 'Label: {} || Tweet: {}'.format(sentiment, text)
                    # create Tweet object and save to db
                    tweet_obj = Tweet.objects.create(tweet_id=tweet_id, user=user,
                                                     lat=lat, lng=lng, text=text,
                                                     classification=classification, 
                                                     sentiment=sentiment)
                    tweet_obj.save()
    # call streamer
    streamer.stream()
    return None
示例#3
0
def retrieve_tweets():
    classif = train()
    # called when a new tweets is retrieved from streaming
    @receiver(streamer.tweet_retrieved)
    def my_callback(sender, **kwargs):
        tweet = kwargs['tweet']
        if tweet['coordinates']:
            tweet_id = tweet['id']
            user = tweet['user']['name'].encode('utf-8')
            lat = tweet['coordinates']['coordinates'][1]
            lng = tweet['coordinates']['coordinates'][0]
            text = tweet['text'].encode('utf-8')
            if tweet_id and user and lat and lng and text:
                # classify tweets (tourism or nontourism)
                classification = classif.classify(
                    feature_extractor_lda_tripadvisor_top_words_weights(text))
                # for tourism-related tweets, classify as being positive or negative
                if classification == 'tourism':
                    sentiment = sentiment_analyzer.classify(
                        feature_extractor(tweet))
                    print 'Label: {} || Tweet: {}'.format(sentiment, text)
                    # create Tweet object and save to db
                    tweet_obj = Tweet.objects.create(
                        tweet_id=tweet_id,
                        user=user,
                        lat=lat,
                        lng=lng,
                        text=text,
                        classification=classification,
                        sentiment=sentiment)
                    tweet_obj.save()

    # call streamer
    streamer.stream()
    return None