def __init_model(self):
        self.train_manager = DataGetter(
            'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json')
        '''
            在data_getter中bio_convert中lebel和setence增扩为元素相同长度相同的列表,对长度不足的句子补[0],对齐最长句句子长度,这步不知道有
            没有用,但是在原代码的data_manager中有这一步,data_getter和data_manager输出对象应该是相同的
            
        '''
        #self.total_size = len(self.train_manager.sentence)
        data = {
            "batch_size": 20,
            #"input_size": self.train_manager.input_size,
            "vocab": self.train_manager.vocab,
            "tag_map": self.train_manager.get_tag2idx,
        }
        self.save_params(data)
        dev_manager = DataGetter(
            'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json')
        #self.dev_batch = dev_manager.iteration()
        '''
            将iteration代码移植到data_getter中
        '''

        self.model = BiLSTMCRF(
            tag_map=self.train_manager.tag_map,
            batch_size=self.batch_size,
            vocab_size=Config.max_len,
            dropout=self.dropout,
            embedding_dim=self.embedding_size,
            hidden_dim=self.hidden_size,
        )
        self.restore_model()
 def test_get_standardized_and_normalized_df(self):
     data_getter = DataGetter()
     df_result = data_getter.get_standardized_and_normalized_df()
     self.assertIsInstance(
         df_result, pd.DataFrame,
         "There was an issue with standardization and normalization.")
     df_result.to_excel("../output/df_norm_std.xlsx")
Пример #3
0
    def _get_training_data_y(self):
        data_getter = DataGetter()

        config.DF_BASE_START_DATE = config.TRAINING_DATE_START
        config.DF_BASE_END_DATE = config.TRAINING_DATE_END
        df_result = data_getter.get_deltas()
        df_result = df_result[config.TRAINING_DATA_TARGET]
        y = df_result.values
        if self.X is None:
            raise Exception(
                'X needs to be defined before defining Y. Run _get_training_data_x before this method.'
            )
        y = y[0:self.X.shape[0]]
        y = self._one_hot_encode(y, 2)

        config.DF_BASE_START_DATE = config.VALIDATION_DATE_START
        config.DF_BASE_END_DATE = config.VALIDATION_DATE_END
        df_result = data_getter.get_deltas()
        df_result = df_result[config.TRAINING_DATA_TARGET]
        y_val = df_result.values
        if self.X_val is None:
            raise Exception(
                'X needs to be defined before defining Y. Run _get_training_data_x before this method.'
            )
        y_val = y_val[0:self.X_val.shape[0]]
        y_val = self._one_hot_encode(y_val, 2)

        return y, y_val
Пример #4
0
 def _get_testing_data_x(self):
     data_getter = DataGetter()
     config.DF_BASE_START_DATE = config.TESTING_DATE_START
     config.DF_BASE_END_DATE = config.TESTING_DATE_END
     reshaped_data_lstm = data_getter.get_reshaped_data_for_lstm()
     self.X = reshaped_data_lstm
     return reshaped_data_lstm
 def test_get_first_reached_targets(self):
     data_getter = DataGetter()
     df_result = data_getter.get_first_reached_targets()
     self.assertIsInstance(
         df_result, pd.DataFrame,
         "There was an issue calculating the targets reached.")
     df_result.to_excel("../output/df_reached_targets.xlsx")
 def test_get_deltas(self):
     data_getter = DataGetter()
     df_result = data_getter.get_deltas()
     self.assertIsInstance(
         df_result, pd.DataFrame,
         "There was an issue calculating the delta values.")
     df_result.to_excel("../output/df_deltas.xlsx")
 def test_get_single_currency_raw_data_with_base(self):
     data_getter = DataGetter()
     for currency_dir in config.get_currency_dir_paths():
         df_base_raw = data_getter.get_single_currency_raw_data_with_base(
             currency_dir)
         self.assertIsInstance(
             df_base_raw, pd.DataFrame,
             "There was an issue creating df_base_raw from the files in dir - {dir}."
             .format(dir=dir))
 def test_get_single_currency_raw_data_from_excel(self):
     data_getter = DataGetter()
     for currency_dir in config.get_currency_dir_paths():
         df_raw = data_getter.get_raw_data_from_excel_files_for_single_currency(
             currency_dir)
         self.assertIsInstance(
             df_raw, pd.DataFrame,
             "There was an issue creating a dataframe from a file in - {}.".
             format(currency_dir))
 def test_tf_resampled_single_currency_raw_data_with_base(self):
     data_getter = DataGetter()
     for currency_dir in config.get_currency_dir_paths():
         df_resampled = data_getter.get_tf_resampled_single_currency_raw_data_with_base(
             currency_dir)
         # df_resampled.to_excel("df_resampled.xlsx")
         # exit()
         self.assertIsInstance(
             df_resampled, pd.DataFrame,
             "There was an issue creating df_resampled from dir - {dir}.".
             format(dir=currency_dir))
 def test_get_df_with_indicators_single_currency(self):
     data_getter = DataGetter()
     for currency_dir in config.get_currency_dir_paths():
         df_result = data_getter.get_df_with_indicators_single_currency(
             currency_dir)
         # df_result.to_excel("df_with_indicators.xlsx")
         # exit()
         self.assertIsInstance(
             df_result, pd.DataFrame,
             "There was an issue creating the df_result with the data in this dir - {dir}"
             .format(dir=currency_dir))
Пример #11
0
 def _get_testing_data_y(self):
     data_getter = DataGetter()
     df_result = data_getter.get_deltas()
     df_result = df_result[config.TRAINING_DATA_TARGET]
     y = df_result.values
     if self.X is None:
         raise Exception(
             'X needs to be defined before defining Y. Run _get_testing_data_x before this method.'
         )
     y = y[0:self.X.shape[0]]
     y = self._one_hot_encode(y, 2)
     return y
Пример #12
0
    def _get_training_data_x(self):
        data_getter = DataGetter()

        config.DF_BASE_START_DATE = config.TRAINING_DATE_START
        config.DF_BASE_END_DATE = config.TRAINING_DATE_END
        X_train = data_getter.get_reshaped_data_for_lstm()
        self.X = X_train

        config.DF_BASE_START_DATE = config.VALIDATION_DATE_START
        config.DF_BASE_END_DATE = config.VALIDATION_DATE_END
        X_val = data_getter.get_reshaped_data_for_lstm()
        self.X_val = X_val

        return X_train, X_val
Пример #13
0
    def __init_model(self):
        self.train_manager = DataGetter(
            'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json')
        #self.total_size = len(self.train_manager.sentence)
        data = {
            "batch_size": 20,
            #"input_size": self.train_manager.input_size,
            "vocab": self.train_manager.vocab,
            "tag_map": self.train_manager.get_tag2idx,
        }
        self.save_params(data)
        dev_manager = DataGetter(
            'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json')
        #self.dev_batch = dev_manager.iteration()

        self.model = BiLSTMCRF(
            tag_map=self.train_manager.tag_map,
            batch_size=self.batch_size,
            vocab_size=Config.max_len,
            dropout=self.dropout,
            embedding_dim=self.embedding_size,
            hidden_dim=self.hidden_size,
        )
        self.restore_model()
Пример #14
0
 def runBacktestOffline(self, instrument, granularity, startep, endep):
     dg = DataGetter(MSSQLGetter(OandaGetter(instrument, granularity)))
     ti = BackTestTicker(dg, startep, endep)
     self._run(ti)
 def test_get_reshaped_data_for_lstm(self):
     data_getter = DataGetter()
     reshaped_data = data_getter.get_reshaped_data_for_lstm()
     self.assertTrue(len(reshaped_data))
     utils.write_3d_np_array_to_file("../output/reshaped_data.txt",
                                     reshaped_data)
 def test_get_targets_long_short(self):
     data_getter = DataGetter()
     df_result = data_getter.get_targets_long_short()
     self.assertIsInstance(df_result, pd.DataFrame,
                           "There was an issue calculating the targets.")
     df_result.to_excel("../output/df_with_targets.xlsx")
 def test_get_df_with_indicators_multicurrency(self):
     data_getter = DataGetter()
     df_result = data_getter.get_df_with_indicators_multicurrency()
     self.assertIsInstance(
         df_result, pd.DataFrame,
         "There was an issue creating the multicurrency df.")
Пример #18
0
from data_getter import DataGetter

D = DataGetter()
D_name = D.get_name()
D_number = D.get_number()


class DuichengWindow():
    def __init__(self):
        self.L1_name = D_name[0]
        self.L2_name = D_name[1]
        self.L3_name = D_name[2]
        self.L4_name = D_name[3]
        self.L5_name = D_name[4]
        self.L = D_number[0]
        self.W = D_number[1]
        self.G = D_number[2]
        self.num1 = D_number[3]
        self.num2 = D_number[4]
        self.L_a = D_number[5]
        self.L_b = self.L - 2 * self.G - self.L_a
        self.W_a1 = (self.W - 2 * self.G) // self.num1
        self.W_a2 = (self.W - 2 * self.G) // self.num2

    def SumWindow(self):

        if self.num2 == 1:
            L1 = 2 * ((self.L - self.G * 2) + (self.W - self.G * 2))
            print('窗框{}:{}'.format(self.L1_name, L1))
            L2 = self.num1 * (2 * (self.L_a + self.W_a1))
            print('窗扇{}:{}'.format(self.L2_name, L2))
Пример #19
0
"""
This module builds and executes the mailer system.
"""

import os
import sys
import csv
import requests
from mailer import Mailer
from data_getter import DataGetter
from parameters import (MAIL_SENDER, DATABASE_LOCATION, TARGET_DATABASE,
                        TEMPLATE_FILE, ATTACHMENTS_FOLDER, MAIL_COLUMN_NAME)

mailer = Mailer()
getter = DataGetter(DATABASE_LOCATION)


def get_mail_title():
    """Gets the title for the mail."""
    trailing = sys.argv[1:]
    return 'Sample Diffusion' if not trailing else " ".join(trailing)


def generate_message(template, subject, attachments, **kwargs):
    """Generates the adequate message depending on attachments."""
    text = template.format(**kwargs)
    if not attachments:
        return mailer.create_text_message(MAIL_SENDER,
                                          kwargs[MAIL_COLUMN_NAME], subject,
                                          text)
    return mailer.create_attachments_message(MAIL_SENDER,
 def test_get_df_base(self):
     data_getter = DataGetter()
     df_base = data_getter.get_df_base()
     self.assertIsInstance(df_base, pd.DataFrame,
                           "There was an issue creating df_base.")