Пример #1
0
def load_config():
    LoggerHelper.info("Loading Config...")
    pwd = os.path.dirname(os.path.abspath(__file__))
    if platform.system() == "Windows":
        Config.add_config_ini('%s\\initialization\\main_w.ini' % pwd)
    else:
        Config.add_config_ini('%s/initialization/main.ini' % pwd)
    LoggerHelper.info("Loading is loaded.")
Пример #2
0
 def get_network_input_size(self):
     size = self.config["wordEmbedding"]["size"]
     if self.config["options"]["wiki"]["enabled"]:
         size = size + self.config["options"]["wiki"]["multiply_factors"]
     if self.config["options"]["twitter"]["enabled"]:
         size = size + self.config["options"]["twitter"]["multiply_factors"]
     LoggerHelper.info("Network Input Size :" + str(size))
     return size
Пример #3
0
 def calculate_hidden_size(self):
     samples_in_training_data = 116100
     scaling_factor = 5
     input_neurons = self.input_size
     output_neurons = self.output_size
     size = int(samples_in_training_data / (scaling_factor * (input_neurons + output_neurons)))
     LoggerHelper.info('Calculated hidden size is ' + str(size))
     if size == 0:
         LoggerHelper.error('Calculated hidden size is changed to 2')
         return 2
     else:
         return size
Пример #4
0
    def save_model(self):
        # serialize model to JSON
        save_file_name = self.get_save_file_name()
        checkpoint = {
            'model': NewsDnnGeneralModel(),
            'model_state_dict': self.model.state_dict(),
            'optimizer': optim.Adam(self.model.parameters(), lr=self.model.lr),
            'optimizer_state_dict': self.optimizer.state_dict()
        }

        torch.save(checkpoint, save_file_name)
        LoggerHelper.info("Model Saved to disk")
Пример #5
0
 def load_model(self, path):
     checkpoint = torch.load(path)
     self.model = checkpoint['model']
     self.model.load_state_dict(checkpoint['model_state_dict'])
     self.optimizer = checkpoint['optimizer']
     self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
     LoggerHelper.info("**Model Info**"
                       + "\nbatch_size : " + str(self.reader.batch_size)
                       + "\nsequence_length : " + str(self.reader.sequence_length)
                       + "\ninput_size : " + str(self.model.input_size)
                       + "\nnum_layers : " + str(self.model.num_layers)
                       + "\ndrop_prob : " + str(self.model.drop_prob)
                       + "\nlr : " + str(self.model.lr))
     LoggerHelper.info("Model loaded from disk")
Пример #6
0
def get_news_type(dnn_type):
    dnn_type = dnn_type.strip()
    if dnn_type == "CNN":
        return NewsCnnMain()
    elif dnn_type == "RNN":
        return NewsDnnGeneralMain()
    elif dnn_type == "TA":
        return TaMain()
    elif dnn_type == "PriceRNN":
        return PriceRnnMain()
    elif dnn_type == "CATE":
        return NewsCateMain()
    else:  # Default RNN
        LoggerHelper.error("DNN type (" + dnn_type + ") is not found. Default RNN (NewsDnnGeneralMain) is used.")
        return NewsDnnGeneralMain()
 def get_count(self, fetch_type=1):
     if fetch_type == NewsDnnBaseDataReader.DictDataTerm["Train"]:
         if self.__train_cursor is None:
             self.fetch_data(NewsDnnBaseDataReader.DictDataTerm["Train"])
         self.train_count = self.__train_cursor.count()
         return self.train_count
     elif fetch_type == NewsDnnBaseDataReader.DictDataTerm["Validate"]:
         if self.__validate_cursor is None:
             self.fetch_data(NewsDnnBaseDataReader.DictDataTerm["Validate"])
         self.validate_count = self.__validate_cursor.count()
         return self.validate_count
     elif fetch_type == NewsDnnBaseDataReader.DictDataTerm["Test"]:
         if self.__test_cursor is None:
             self.fetch_data(NewsDnnBaseDataReader.DictDataTerm["Test"])
         self.test_count = self.__test_cursor.count()
         return self.test_count
     else:
         LoggerHelper.critical('Unable To Fetch')
Пример #8
0
    def test(self):
        LoggerHelper.info("Test Started...")
        self.timer.start()
        df = pandas.DataFrame(columns=['Accuracy', 'Test Accuracy', 'Mean Test Loss'])
        # Tracking variables
        val_losses = []
        predictions, true_labels = [], []

        test_set = self.reader.get_data(NewsCateDataReader.DictDataTerm["Test"],
                                        NewsCateDataReader.DictDataType[
                                            self.config["options"]["network_type"]])
        self.model.eval()
        accuracy = 0
        for batch in test_set:
            # Add batch to GPU
            batch = tuple(t.to(self.device) for t in batch)

            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch

            with torch.no_grad():
                # Forward pass, calculate logit predictions
                outputs = self.model(b_input_ids, token_type_ids=None,
                                     attention_mask=b_input_mask)

                logits = outputs[0]

                # Move logits and labels to CPU
                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()

                # Calculate the accuracy for this batch of test sentences.
                label, acc = self.calculate_accuracy(logits, label_ids)
                accuracy += acc

                # Store predictions and true labels
                predictions.append(label)
                true_labels.append(label_ids)
        scores = self.calculate_scores(predictions, true_labels)
        df = self.log_test(df, accuracy, self.test_count, val_losses, scores)
        Export.append_df_to_excel(df, self.current_date)
        self.timer.stop(time_for="Test")
 def get_data(self, fetch_type=1, data_type=1):
     if fetch_type == NewsDnnBaseDataReader.DictDataTerm["Train"]:
         cursor = self.__train_cursor
     elif fetch_type == NewsDnnBaseDataReader.DictDataTerm["Validate"]:
         cursor = self.__validate_cursor
     elif fetch_type == NewsDnnBaseDataReader.DictDataTerm["Test"]:
         cursor = self.__test_cursor
     else:
         LoggerHelper.critical('Unable To Get Cursor (Check Fetch Type)')
         return None
     cursor.rewind()
     self.clear_data()
     if data_type == NewsDnnBaseDataReader.DictDataType["News"]:
         return self.get_data_news(cursor)
     elif data_type == NewsDnnBaseDataReader.DictDataType["Wiki"]:
         return self.get_data_wiki(cursor)
     elif data_type == NewsDnnBaseDataReader.DictDataType["WikiAndTweet"]:
         return self.get_data_wiki_and_tweet(cursor)
     else:
         LoggerHelper.critical('Unknown Data Type (data_type)')
         return None
Пример #10
0
 def validate(self, df, epoch, losses):
     LoggerHelper.info("Validation Started...")
     # Get validation loss
     val_losses = []
     predictions, true_labels = [], []
     self.model.eval()
     accuracy = 0
     steps = 0
     validate_set = self.reader.get_data(NewsCateDataReader.DictDataTerm["Validate"],
                                         NewsCateDataReader.DictDataType[
                                             self.config["options"]["network_type"]])
     for batch in validate_set:  # Evaluate data for one epoch
         # Add batch to GPU
         batch = tuple(t.to(self.device) for t in batch)
         # Unpack the inputs from our dataloader
         b_input_ids, b_input_mask, b_labels = batch
         with torch.no_grad():  # Not to compute or store gradients
             outputs = self.model(b_input_ids,
                                  token_type_ids=None,
                                  attention_mask=b_input_mask)
             logits = outputs[0]
             # Move logits and labels to CPU
             logits = logits.detach().cpu().numpy()
             label_ids = b_labels.to('cpu').numpy()
             # Calculate the accuracy for this batch of test sentences.
             label, tmp_eval_accuracy = self.calculate_accuracy(logits, label_ids)
             # Accumulate the total accuracy.
             accuracy += tmp_eval_accuracy
             # Track the number of batches
             steps += 1
             # Store predictions and true labels
             predictions.append(label)
             true_labels.append(label_ids)
     # Report the final accuracy for this validation run.
     LoggerHelper.info("Accuracy: {0:.2f}".format(accuracy / steps))
     scores = self.calculate_scores(predictions, true_labels)
     self.model.train()  # reset to train mode after iterationg through validation data
     return self.log_validate_without_loss(df, epoch, 0, self.validate_count, scores)
Пример #11
0
    def __init__(self,
                 input_size=1,
                 hidden=None,
                 n_layers=2,
                 drop_prob=0.2,
                 lr=0.001,
                 training_data_size=100000,
                 output_size=3,
                 use_gpu=True):
        super().__init__()
        self.should_use_gpu = use_gpu
        self.input_size = input_size
        self.training_data_size = training_data_size
        self.num_layers = n_layers
        self.drop_prob = drop_prob
        self.lr = lr
        self.output_size = output_size
        if hidden is None:
            self.hidden = self.calculate_hidden_size()
        else:
            self.hidden = hidden

        self.lstm = nn.LSTM(
            self.input_size,  # Expected features in the input
            self.hidden,  # Features in the hidden state
            self.num_layers,  # Stacked LSTM's
            bias=True,  # Bias weights should be used or not
            dropout=drop_prob,  # Dropout layer of each LSTM
            batch_first=
            True,  # Input and output tensors are provided as (batch, seq, feature)
            bidirectional=False)  # Bidirectional LSTM

        # Additional Dropout Layer
        self.dropout = nn.Dropout(drop_prob)

        # Fully-Connected Output Layer
        self.fc = nn.Linear(self.hidden, output_size)

        # Sigmoid Layer
        self.sig = nn.LogSoftmax(dim=1)

        # Check GPU Usage
        self.can_use_gpu = torch.cuda.is_available()
        if self.can_use_gpu:
            if self.should_use_gpu:
                LoggerHelper.info('Training on GPU!')
            else:
                LoggerHelper.info('GPU usage is disabled by config.json')
        else:
            LoggerHelper.info(
                'No GPU available, training on CPU; consider making n_epochs very small.'
            )
Пример #12
0
    def __init__(self,
                 input_size=102,
                 n_layers=2,
                 drop_prob=0.2,
                 n_filters=100,
                 filter_sizes=[3, 4, 5],
                 lr=0.001,
                 output_size=3,
                 use_gpu=True):
        super().__init__()
        self.should_use_gpu = use_gpu
        self.input_size = input_size
        self.output_size = output_size
        self.num_layers = n_layers
        self.drop_prob = drop_prob
        self.lr = lr

        # 2D Convolution Layer
        self.conv_0 = nn.Conv1d(in_channels=input_size,
                                out_channels=n_filters,
                                kernel_size=filter_sizes[0])

        self.conv_1 = nn.Conv1d(in_channels=input_size,
                                out_channels=n_filters,
                                kernel_size=filter_sizes[1])

        self.conv_2 = nn.Conv1d(in_channels=input_size,
                                out_channels=n_filters,
                                kernel_size=filter_sizes[2])

        # Additional Dropout Layer
        self.dropout = nn.Dropout(drop_prob)
        # Fully-Connected Output Layer
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_size)
        # Sigmoid Layer
        self.sig = nn.Softmax(dim=1)
        # Check GPU Usage
        self.can_use_gpu = torch.cuda.is_available()
        if self.can_use_gpu:
            if self.should_use_gpu:
                LoggerHelper.info('Training on GPU!')
            else:
                LoggerHelper.info('GPU usage is disabled by config.json')
        else:
            LoggerHelper.info('No GPU available, training on CPU; consider making n_epochs very small.')
Пример #13
0
        LoggerHelper.info("News Stock Prediction is ended.")
        # WordEmbedding(path=Config.word_embedding.path)
        # news_dnn = NewsDnnMain(epochs=int(Config.training.epochs),
                                # batch_size=int(Config.training.batch_size),
                                # seq_length=int(Config.training.sequence_length),
                                # lr=float(Config.training.lr))3
    if args.statistics:
        LoggerHelper.info("Starting Statistic Collection Mode...")
        Statistics().collect()
        LoggerHelper.info("Statistic Collection is ended...")

    if args.test:
        LoggerHelper.info("Starting Test Mode...")
        TransformersTest.sentiment_analysis_test()
        LoggerHelper.info("Test Mode is ended...")

    if args.webservice:
        web_manager = WebManager()
        web_manager.add_static_files()
        web_manager.add_news_root()
        web_manager.run()


if __name__ == "__main__":
    try:
        main()
    except Exception as exception:
        LoggerHelper.error("Ex: " + str(exception))
        LoggerHelper.error(traceback.format_exc())
    exit()
Пример #14
0
def main():
    # Load Config
    load_config()
    # Load arg
    args = load_arg()

    if args.fdc:
        LoggerHelper.info("Starting Financial Data Collector Mode...")
        fdc = FDC()
        fdc.collect()
        LoggerHelper.info("Financial Data Collector Mode is ended.")

    if args.wiki:
        LoggerHelper.info("Starting Wikipedia Load Mode...")
        wiki = WikiRecorder()
        wiki.collect_all()
        LoggerHelper.info("Wikipedia Load Mode is ended.")

    if args.organize:
        LoggerHelper.info("Starting News Organizer Mode...")
        collector = NewsOrganizer()
        collector.dnn_organizer_for_dnn_filtered_news()
        LoggerHelper.info("News Organizer Mode is ended.")

    if args.ind:
        LoggerHelper.info("Starting Indicators Collector Mode...")
        ind_collector = IndicatorsCollector()
        if args.ind == "zip":
            ind_collector.collect_from_zip()
        else:
            ind_collector.collect()
        LoggerHelper.info("Indicators Collector Mode is ended.")

    if args.news is not None:
        LoggerHelper.info("Starting Stock Prediction Mode...")
        news_dnn = get_news_type(args.news)
        news_dnn.train(print_every=int(Config.training.print_every))
        news_dnn.test()
        LoggerHelper.info("News Stock Prediction is ended.")
        # WordEmbedding(path=Config.word_embedding.path)
        # news_dnn = NewsDnnMain(epochs=int(Config.training.epochs),
                                # batch_size=int(Config.training.batch_size),
                                # seq_length=int(Config.training.sequence_length),
                                # lr=float(Config.training.lr))3
    if args.statistics:
        LoggerHelper.info("Starting Statistic Collection Mode...")
        Statistics().collect()
        LoggerHelper.info("Statistic Collection is ended...")

    if args.test:
        LoggerHelper.info("Starting Test Mode...")
        TransformersTest.sentiment_analysis_test()
        LoggerHelper.info("Test Mode is ended...")

    if args.webservice:
        web_manager = WebManager()
        web_manager.add_static_files()
        web_manager.add_news_root()
        web_manager.run()
Пример #15
0
 def get_network_input_size(self):
     size = 1
     LoggerHelper.info("Network Input Size :" + str(size))
     return size
Пример #16
0
 def stop(self, time_for=None):
     end_dt = dt.datetime.now()
     if time_for is None:
         LoggerHelper.info('Time taken: %s' % (end_dt - self.start_dt))
     else:
         LoggerHelper.info('Time taken for ' + time_for + ' : %s' % (end_dt - self.start_dt))
Пример #17
0
    def train(self, print_every=20):
        df = pandas.DataFrame(columns=['Epoch', 'Step',
                                       'Train Mean Loss Cumulative', 'Train Accuracy',
                                       'Val Mean Loss', 'Val Accuracy'])
        self.timer.start()
        self.model.train()  # Set mode of model
        losses = []
        train_set = self.reader.get_data(fetch_type=NewsCateDataReader.DictDataTerm["Train"],
                                         data_type=NewsCateDataReader.DictDataType[
                                             self.config["options"]["network_type"]])
        for e in range(self.epochs):
            print(self.config["options"]["network_type"])
            print(NewsCateDataReader.DictDataType[
                      self.config["options"]["network_type"]])
            self.model.train()  # Set to Train Mode
            total_loss_for_epoch = 0

            epoch_timer = Timer()
            epoch_timer.start()
            for step, batch in enumerate(train_set): # For each batch of training data...
                # Progress update every 40 batches.
                if step % print_every == 0:
                    # Report progress.
                    print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_set)))
                # Get Data
                b_input_ids = batch[0].to(self.device)
                b_input_mask = batch[1].to(self.device)
                b_labels = batch[2].to(self.device)

                # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
                self.model.zero_grad()

                # Perform a forward pass (evaluate the model on this training batch).
                # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
                outputs = self.model(b_input_ids,
                                     token_type_ids=None,
                                     attention_mask=b_input_mask,
                                     labels=b_labels)
                loss = outputs[0]
                total_loss_for_epoch += loss.item()

                # Perform a backward pass to calculate the gradients.
                loss.backward()

                # This is to help prevent the "exploding gradients" problem.
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

                # modified based on their gradients, the learning rate, etc.
                self.optimizer.step()

                # Update the learning rate.
                self.scheduler.step()

            # Calculate the average loss over the training data.
            avg_train_loss = total_loss_for_epoch / len(train_set)

            # Store the loss value for plotting the learning curve.
            losses.append(avg_train_loss)
            LoggerHelper.info("  Average training loss: {0:.2f}".format(avg_train_loss))
            epoch_timer.stop(time_for="Epoch")

            timer = Timer(start=True)
            df = self.validate(df, e, losses)
            timer.stop(time_for="Validate")
            self.model.train()
        self.timer.stop(time_for="Train")
        self.save_model()
        self.current_date = DateHelper.get_current_date()
        Export.append_df_to_excel(df, self.current_date)
        Export.append_df_to_excel(self.get_info(), self.current_date)
Пример #18
0
 async def __random_news_handler(self, request):
     request = await request.json()
     print(request)
     default = self.get_news_data(self.db, self.defaultCollection,
                                  request['object_id'])
     if default is None:
         res = {'isError': True, 'Message': "Object Is Not Found."}
         res = JSONEncoder().encode(res)
         return web.json_response(res)
     else:
         try:
             self.toCollection.insert({
                 "_id":
                 default["_id"],
                 "title":
                 default["title"],
                 "summery":
                 default["summery"],
                 "article":
                 default['authors'],
                 "url":
                 default["url"],
                 "category":
                 request["categories"],
                 "price_after_minute":
                 default["price_after_minute"],
                 "price_after_hour":
                 default["price_after_hour"],
                 "price_after_day":
                 default["price_after_day"],
                 "price_before":
                 default["price_before"],
                 "wiki_relatedness":
                 default["wiki_relatedness"],
                 "tweet_count":
                 default["tweet_count"],
                 "tweet_percentage":
                 default["tweet_percentage"],
                 "wiki_relatedness_nor":
                 default["wiki_relatedness_nor"],
                 "tweet_count_nor":
                 default["tweet_count_nor"],
                 "date":
                 default["date"],
                 "authors":
                 default["authors"],
                 "comment":
                 request['comment'],
                 "price_effect":
                 request['effect']
             })
             default['is_controlled'] = True
             default['is_incorrect'] = False
             self.record_one_field(self.defaultCollection, default)
             # price_effect effect
             res = {'isError': False, 'Message': "Success"}
         except Exception as exception:
             res = {
                 'isError': True,
                 'Message': "Insert Error. Please inform the Admin"
             }
             LoggerHelper.error(type(exception).__name__)
             LoggerHelper.error("Ex: " + str(exception))
             LoggerHelper.error(traceback.format_exc())
         res = JSONEncoder().encode(res)
         return web.json_response(res)
Пример #19
0
 def save_model(self):
     save_file_name = self.get_save_file_name()
     FileHelper.create_path_if_not_exists(save_file_name)
     self.model.save_pretrained(save_file_name)  # save
     self.reader.tokenizer.save_pretrained(save_file_name)  # save
     LoggerHelper.info("Model Saved to disk")