示例#1
0
def load_data():

    try:
        f = open('../training.txt','r')
    except:
        print ("No file named in training.txt in current directory")

    pos = []
    neg = []
    corpus = []
    for eachline in f:
        senti,text = eachline.split('\t')
        corpus.append(text)
        if(int(senti)==0):
            temp = preprocess(text)
            neg.append((word_feats(temp),'neg'))
        elif(int(senti)==1):
            temp = preprocess(text)
            pos.append((word_feats(temp),'pos'))


    try:
        f2  = open('../training3.csv','r')
    except:
        print("Cant open training3.csv")

    i = 0
    for eachline in f2:
        i+=1
        temp = eachline.split(',')
        text = str(temp[5]).strip()
        senti = int(temp[0].strip('""'))
        if(int(senti)==0):
            pptweet = preprocess(text)
            neg.append((word_feats(pptweet,'neg')))
        elif(int(senti)==4):
            pptweet = preprocess(text)
            pos.append((word_feats(temp),'pos'))
        if(i>100000):
            break
示例#2
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

'''load hyperparameters'''
BATCH_SIZE = 64
NUM_CLASESS = 2
EPOCHS =2000
NUM_FETRUES=4#nums of featrue
INPUT_SHAPE=(1,NUM_FETRUES)
VALIDATION_SPLIT=0.2
#data preprocessing

trainPath='train.csv'
testPath='test.csv'
X_train,Y_train,X_test=preprocess(trainPath,testPath)
'''from shape=(n,1) into shape=(n,num_classes)'''
Y_train = utils.to_categorical(Y_train, NUM_CLASESS)

print('X_train:',X_train.shape)
print('Y_train:',Y_train.shape)
print('X_test:',X_test.shape)



model = Inet.buildInet(INPUT_SHAPE,NUM_CLASESS)


modelfile = 'modelweight.model' #save weight
file_path_history = 'historyfile.bin'#save history for matplot
示例#3
0
def word_feats(words):
    """
    :param words: takes any english sentence
    :return: a dictionary by splitting each word in the sentence where 'word' is the key and 'True' is the value
    """
    return dict([(word, True) for word in words])

neg = []
pos = []
try:
        f2  = open('training3.csv','r')
except:
        print("Cant open training3.csv")
i = 0
for eachline in f2:
    i+=1
    temp = eachline.split(',')
    text = str(temp[5]).strip()

    senti = int(temp[0].strip('""'))
    if(int(senti)==0):
        pptweet = preprocess(text)
        neg.append(((word_feats(pptweet)),'neg'))
    elif(int(senti)==4):
        pptweet = preprocess(text)
        pos.append(((word_feats(temp)),'pos'))
    if(i>1000):
            break

print(neg[:5])
print(pos[:5])