def load_data(): try: f = open('../training.txt','r') except: print ("No file named in training.txt in current directory") pos = [] neg = [] corpus = [] for eachline in f: senti,text = eachline.split('\t') corpus.append(text) if(int(senti)==0): temp = preprocess(text) neg.append((word_feats(temp),'neg')) elif(int(senti)==1): temp = preprocess(text) pos.append((word_feats(temp),'pos')) try: f2 = open('../training3.csv','r') except: print("Cant open training3.csv") i = 0 for eachline in f2: i+=1 temp = eachline.split(',') text = str(temp[5]).strip() senti = int(temp[0].strip('""')) if(int(senti)==0): pptweet = preprocess(text) neg.append((word_feats(pptweet,'neg'))) elif(int(senti)==4): pptweet = preprocess(text) pos.append((word_feats(temp),'pos')) if(i>100000): break
import numpy as np import pandas as pd import matplotlib.pyplot as plt '''load hyperparameters''' BATCH_SIZE = 64 NUM_CLASESS = 2 EPOCHS =2000 NUM_FETRUES=4#nums of featrue INPUT_SHAPE=(1,NUM_FETRUES) VALIDATION_SPLIT=0.2 #data preprocessing trainPath='train.csv' testPath='test.csv' X_train,Y_train,X_test=preprocess(trainPath,testPath) '''from shape=(n,1) into shape=(n,num_classes)''' Y_train = utils.to_categorical(Y_train, NUM_CLASESS) print('X_train:',X_train.shape) print('Y_train:',Y_train.shape) print('X_test:',X_test.shape) model = Inet.buildInet(INPUT_SHAPE,NUM_CLASESS) modelfile = 'modelweight.model' #save weight file_path_history = 'historyfile.bin'#save history for matplot
def word_feats(words): """ :param words: takes any english sentence :return: a dictionary by splitting each word in the sentence where 'word' is the key and 'True' is the value """ return dict([(word, True) for word in words]) neg = [] pos = [] try: f2 = open('training3.csv','r') except: print("Cant open training3.csv") i = 0 for eachline in f2: i+=1 temp = eachline.split(',') text = str(temp[5]).strip() senti = int(temp[0].strip('""')) if(int(senti)==0): pptweet = preprocess(text) neg.append(((word_feats(pptweet)),'neg')) elif(int(senti)==4): pptweet = preprocess(text) pos.append(((word_feats(temp)),'pos')) if(i>1000): break print(neg[:5]) print(pos[:5])