Exemplo n.º 1
0
 def readProcessedReview(self, filename):
     review_lines = GetDatasetPath(filename, choice='imdb')
     df = review_lines.readFileProcessed()
     lis = ReadData(filename, choice='imdb').readDataReview('0')
     swr = StopwordRemoval(lis)
     stopwords = swr.stopwordsToRem()
     afterRem = swr.removefromStopwordList(stopwords)
     lines_processed = swr.stopwordRemoval(afterRem)
     return lines_processed
Exemplo n.º 2
0
 def saveData(self, res, sen):
     df1 = pd.DataFrame(res)
     sv = GetDatasetPath(PROCESSED['html_review'], choice=self.choice)
     sv.saveDataset(df1)
     df1 = pd.DataFrame(sen)
     sv = GetDatasetPath(PROCESSED['sentiment'], choice=self.choice)
     sv.saveDataset(df1)
Exemplo n.º 3
0
 def readDataReview(self, column_name):
     #print("readdatareview")
     df = GetDatasetPath(self.filename,
                         choice=self.choice).readFileProcessed()
     lis = df[column_name].values.tolist()
     #print(lis)
     return lis
Exemplo n.º 4
0
 def readProcessedReview(self):
     review_lines = GetDatasetPath(self.filename, choice=self.choice)
     lis = ReadData(self.filename, choice=self.choice).readDataReview('0')
     swr = StopwordRemoval(lis)
     stopwords = swr.stopwordsToRem()
     afterRem = swr.removefromStopwordList(stopwords)
     lines_processed = swr.stopwordRemoval(afterRem)
     return lines_processed
Exemplo n.º 5
0
 def data_preparation(self):
     preprocessFinal = ReadData(PROCESSED['lemmatize']).readDataReview('0')
     stop_removed = StopwordRemoval(preprocessFinal).perform_removal()
     review_pad, word_index = StrToNum(stop_removed,
                                       200).convert_str_to_nums()
     SaveData(WORDS['num_words']).saveNumWords(word_index)
     df = GetDatasetPath(PROCESSED['sentiment']).readFileProcessed()
     X_train_pad, y_train, X_test_pad, y_test = TrainTestSplit().train_test(
         review_pad, df, 0.2)
     trainTestVal = [X_train_pad, y_train, X_test_pad, y_test]
     SaveData().savePrepared(trainTestVal)
Exemplo n.º 6
0
    def createmodel(self):
        print("Getting training and validation files...")
        dataObj = GetDatasetPath('x_train.csv')
        X_train_pad = dataObj.loadDataset()
        dataObj = GetDatasetPath('x_test.csv')
        X_test_pad = dataObj.loadDataset()
        dataObj = GetDatasetPath('y_test.csv')
        y_test = dataObj.loadDataset()
        dataObj = GetDatasetPath('y_train.csv')
        y_train = dataObj.loadDataset()
        print("Getting embedding file...")

        print("Fasttext embedding - model...")
        embObj = ReadWriteEmbedding(EMBEDDING['fast_emb'])
        embedding_matrix = embObj.readEmb()
        print("Creating model...")
        mod = CreateModel(200, 100)
        model = mod.existing_model(embedding_matrix, 1)
        #model = mod.existing_model(embedding_matrix, 1)
        print("Training model...")
        train = TrainModel(128, 50)
        model = train.model_train(model, X_train_pad, y_train, X_test_pad,
                                  y_test)
        print("Saving model...")
        GetDatasetPath(MODEL['fasttext']).saveModel(model)

        #elif num == 2:
        print("w2v embedding - model...")
        embObj = ReadWriteEmbedding(EMBEDDING['w2v_emb'])
        embedding_matrix = embObj.readEmb()
        print("Creating model...")
        mod = CreateModel(200, 100)
        model = mod.existing_model(embedding_matrix, 1)
        print("Training model...")
        train = TrainModel(128, 50)
        model = train.model_train(model, X_train_pad, y_train, X_test_pad,
                                  y_test)
        print("Saving model...")
        GetDatasetPath(MODEL['w2v']).saveModel(model)
        #elif num == 3:

        print("Glove embedding - model...")
        embObj = ReadWriteEmbedding(EMBEDDING['glove_emb'])
        embedding_matrix = embObj.readEmb()
        print("Creating model...")
        mod = CreateModel(200, 100)
        model = mod.existing_model(embedding_matrix, 1)
        print("Training model...")
        train = TrainModel(128, 50)
        model = train.model_train(model, X_train_pad, y_train, X_test_pad,
                                  y_test)
        print("Saving model...")
        GetDatasetPath(MODEL['glove']).saveModel(model)
Exemplo n.º 7
0
 def prepModel(self, test_samples_tokens_pad):
     with CustomObjectScope({'Attention': Attention}):
         new_model = GetDatasetPath(self.model, choice=self.choice).loadModel()
     predictions = new_model.predict(test_samples_tokens_pad)
     return predictions
Exemplo n.º 8
0
 def saveDataReview(self, res):
     df1 = pd.DataFrame(res)
     sv = GetDatasetPath(self.filename, choice=self.choice)
     sv.saveDataset(df1)
Exemplo n.º 9
0
 def savePrepared(self, trainTestVal):
     empty_df = pd.DataFrame()
     store_res = GetDatasetPath(SPLIT['X_train_pad'],
                                data=trainTestVal[0],
                                choice=self.choice)
     store_res.saveDataset(empty_df)
     store_res = GetDatasetPath(SPLIT['y_train'],
                                data=trainTestVal[1],
                                choice=self.choice)
     store_res.saveDataset(empty_df)
     store_res = GetDatasetPath(SPLIT['X_test_pad'],
                                data=trainTestVal[2],
                                choice=self.choice)
     store_res.saveDataset(empty_df)
     store_res = GetDatasetPath(SPLIT['y_test'],
                                data=trainTestVal[3],
                                choice=self.choice)
     store_res.saveDataset(empty_df)
Exemplo n.º 10
0
 def prepModel(self):
     with CustomObjectScope({'Attention': Attention}):
         new_model = GetDatasetPath(MODEL['fasttext'],
                                    choice='imdb').loadModel()
     self.predictions = new_model.predict(self.test_samples_tokens_pad)
Exemplo n.º 11
0
 def readProcessedSentiment(self, filename):
     review_lines = GetDatasetPath(filename, choice='imdb')
     df = review_lines.readFileProcessed()
     lis = ReadData(filename, choice='imdb').readDataReview('0')
     self.tes_res = lis
Exemplo n.º 12
0
 def readProcessedSentiment(self):
     review_lines = GetDatasetPath(self.filename, choice=self.choice)
     df = review_lines.readFileProcessed()
     lis = ReadData(self.filename, choice=self.choice).readDataReview('0')
     return lis