print(ex) print("Could't load Doc2Vec, BUILDING...") doc2vec_model = generate_doc2vec(doc2vec_name, datasets) # Step 3 Gather Features print("FEATURIZING") # Labels # yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"][0:200]]) yTrain = getSentimentArray(dataset_A["sentiment"], useSmall=None) yTrain_list = torch.Tensor([y_val for y_val in dataset_A["sentiment"]]) yDev = getSentimentArray(dataset_B["sentiment"], useSmall=None) yDev_list = torch.Tensor([y_val for y_val in dataset_B["sentiment"]]) # Get word2vec xTrain = getFeatureList(getCleanReviews(dataset_A, useSmall=None), word2vec_model, NUM_FEATURES, num_splits=4) xDev = getFeatureList(getCleanReviews(dataset_B, useSmall=None), word2vec_model, NUM_FEATURES, num_splits=4) # Step 4 Training NN model MODEL_PATH = "recurrent_model.pt" doTraining = False if doTraining: print("TRAINING recurrent nn model") r_model = nn_recurrent_model.RNN(input_size=NUM_FEATURES) r_model.train(xTrain, yTrain, learning_rate = 0.01, epochs=4) torch.save(r_model, MODEL_PATH) else: r_model = torch.load(MODEL_PATH) # Step 5 Evaluate performance print()
print("Could't load Doc2Vec, BUILDING...") doc2vec_model = generate_doc2vec(doc2vec_name, datasets) # Step 3 Gather Features print("FEATURIZING") # Labels yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"]]) yDev = torch.Tensor([y_val for y_val in dataset_B["sentiment"]]) # Get word2vec # xTrain = getAvgFeatureVecs(getCleanReviews(dataset_A), word2vec_model, NUM_FEATURES) # xTrain = torch.tensor(xTrain) # Get doc2vec xTrainDoc = getDocFeatureVec(getCleanReviews(dataset_A), doc2vec_model, NUM_FEATURES) xTrainDoc = torch.tensor(xTrainDoc) # Step 4 Training NN model MODEL_PATH = "feedforward_model.pt" doTraining = False if doTraining: print("TRAINING nn model") l_model = nn_model.NeuralNetwork(input_nodes=NUM_FEATURES) l_model.train_model_persample(xTrainDoc, yTrain) torch.save(l_model, MODEL_PATH) else: l_model = torch.load(MODEL_PATH) # Step 5 Evaluate performance
header=0, delimiter="\t", quoting=3) dataset_D = pd.read_csv("dataset/processed/D.tsv", header=0, delimiter="\t", quoting=3) dataset_E = pd.read_csv("dataset/processed/E.tsv", header=0, delimiter="\t", quoting=3) datasets = [dataset_A, dataset_D, dataset_E] yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"]]) yDev = torch.Tensor([y_val for y_val in dataset_B["sentiment"]]) xTrainRaw = getCleanReviews(dataset_A) xDevRaw = getCleanReviews(dataset_B) # Step 2 Define sweep range evaluationRunSpecifications = [] # for num_features in [100, 150, 200, 250, 300]: # for num_features in [350, 400, 450]: # runSpecification = {} # runSpecification['model'] = 'Standard Neural Network' # runSpecification['num_features'] = num_features # runSpecification['context'] = 5 # runSpecification['optimizing'] = 'num_features' # evaluationRunSpecifications.append(runSpecification)
print(ex) print("Could't load Doc2Vec, BUILDING...") doc2vec_model = generate_doc2vec(doc2vec_name, datasets) # Step 3 Gather Features print("FEATURIZING") # Labels # yTrain = torch.Tensor([y_val for y_val in dataset_A["sentiment"][0:200]]) yTrain = getSentimentArray(dataset_A["sentiment"], useSmall=None) yTrain_list = torch.Tensor([y_val for y_val in dataset_A["sentiment"]]) yDev = getSentimentArray(dataset_B["sentiment"], useSmall=None) yDev_list = torch.Tensor([y_val for y_val in dataset_B["sentiment"]]) # Get word2vec xTrain = getFeatureList(getCleanReviews(dataset_A, useSmall=None), word2vec_model, NUM_FEATURES, num_splits=4) xDev = getFeatureList(getCleanReviews(dataset_B, useSmall=None), word2vec_model, NUM_FEATURES, num_splits=4) # Step 4 Training NN model MODEL_PATH = "recurrent_model.pt" doTraining = True if doTraining: print("TRAINING recurrent nn model") r_model = nn_recurrent_model.RNN(input_size=NUM_FEATURES) r_model.train(xTrain, yTrain, learning_rate=0.01, epochs=4)