示例#1
0
def build_model():
	model = NLPModel()
	
	with open('lib/data/train.tsv') as f:
		data = pd.read_csv(f, sep='\t')
		
	pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)]
	pos_neg['Binary'] = pos_neg.apply(lambda x: 0 if x['Sentiment'] == 0 else 1, axis=1)
		
	model.vectorizer_fit(pos_neg.loc[:, 'Phrase'])
	print('Vectorizer fit complete')
	
	X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase'])
	print('Vectorizer tranform complete')
	y = pos_neg.loc[:, 'Binaryy']
	
	X_train, X_test, y_train, y_test = train_test_split(X, y)
	
	model.train(X_train, y_train)
	Print('Model training complete')
	
	model.pickle_clf()
	model.pickle_vectorizer()
	
	model.plot_roc(X_train, y_test)
示例#2
0
def build_model():
    model = NLPModel()

    # filename = os.path.join(
    #     os.path.dirname(__file__), 'chalicelib', 'all/train.tsv')
    with open('lib/data/train.tsv') as f:
        data = pd.read_csv(f, sep='\t')

    pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)]

    pos_neg['Binary'] = pos_neg.apply(lambda x: 0
                                      if x['Sentiment'] == 0 else 1,
                                      axis=1)

    model.vectorizer_fit(pos_neg.loc[:, 'Phrase'])
    print('Vectorizer fit complete')

    X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase'])
    print('Vectorizer transform complete')
    y = pos_neg.loc[:, 'Binary']

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    model.train(X_train, y_train)
    print('Model training complete')

    model.pickle_clf()
    model.pickle_vectorizer()

    model.plot_roc(X_test, y_test, size_x=12, size_y=12)