Пример #1
0
    def test_register(self):
        registry = formats.get_registry()
        assert_false(CustomFormat in registry.values())

        formats.register('trt', CustomFormat)

        assert_true(CustomFormat in registry.values())
        assert_true('trt' in registry.keys())
Пример #2
0
    def test_register(self):
        registry = formats.get_registry()
        assert_false(CustomFormat in registry.values())

        formats.register('trt', CustomFormat)

        assert_true(CustomFormat in registry.values())
        assert_true('trt' in registry.keys())
Пример #3
0
    def test_init_with_custom_format(self):
        redis_train = [('I like turtles', 'pos'), ('I hate turtles', 'neg')]

        class MockRedisFormat(formats.BaseFormat):
            def __init__(self, client, port):
                self.client = client
                self.port = port

            @classmethod
            def detect(cls, stream):
                return True

            def to_iterable(self):
                return redis_train

        formats.register('redis', MockRedisFormat)
        mock_redis = mock.Mock()
        cl = NaiveBayesClassifier(mock_redis, format='redis', port=1234)
        assert_equal(cl.train_set, redis_train)
Пример #4
0
import traceback
from textblob.classifiers import NaiveBayesClassifier
from textblob import formats
from xml.etree import ElementTree
import json
import sys
import random
import facebook
import requests
from sklearn.cross_validation import train_test_split


class PipeDelimitedFormat(formats.DelimitedFormat):
    delimiter = '|'

formats.register('psv', PipeDelimitedFormat)
errorFile = open('Error.csv','w')


separator="|"
processed_data="processed_data.psv"
processed_age_data = "processed_age_data.psv"

reload(sys)
#sys.setdefaultencoding("utf-8")

import csv
def get_fb_token(app_id, app_secret):
    payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret}
    file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload)
    #print file.text #to test what the FB api responded with
import logging
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
import nltk
from textblob.classifiers import NaiveBayesClassifier
from textblob import formats
from xml.etree import ElementTree

import pickle
import csv
import json
import sys, getopt
class PipeDelimitedFormat(formats.DelimitedFormat):
    delimiter = '|'

formats.register('psv', PipeDelimitedFormat)


separator="|"
processed_data="processed_data.psv"

reload(sys)
sys.setdefaultencoding("utf-8")

import csv


def preprocess(training_data_dir):
    """
    taining_data_dir : path to training dir. This dir should contain
    profile and text dir.