def __init_model(self): self.train_manager = DataGetter( 'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json') ''' 在data_getter中bio_convert中lebel和setence增扩为元素相同长度相同的列表,对长度不足的句子补[0],对齐最长句句子长度,这步不知道有 没有用,但是在原代码的data_manager中有这一步,data_getter和data_manager输出对象应该是相同的 ''' #self.total_size = len(self.train_manager.sentence) data = { "batch_size": 20, #"input_size": self.train_manager.input_size, "vocab": self.train_manager.vocab, "tag_map": self.train_manager.get_tag2idx, } self.save_params(data) dev_manager = DataGetter( 'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json') #self.dev_batch = dev_manager.iteration() ''' 将iteration代码移植到data_getter中 ''' self.model = BiLSTMCRF( tag_map=self.train_manager.tag_map, batch_size=self.batch_size, vocab_size=Config.max_len, dropout=self.dropout, embedding_dim=self.embedding_size, hidden_dim=self.hidden_size, ) self.restore_model()
def test_get_standardized_and_normalized_df(self): data_getter = DataGetter() df_result = data_getter.get_standardized_and_normalized_df() self.assertIsInstance( df_result, pd.DataFrame, "There was an issue with standardization and normalization.") df_result.to_excel("../output/df_norm_std.xlsx")
def _get_training_data_y(self): data_getter = DataGetter() config.DF_BASE_START_DATE = config.TRAINING_DATE_START config.DF_BASE_END_DATE = config.TRAINING_DATE_END df_result = data_getter.get_deltas() df_result = df_result[config.TRAINING_DATA_TARGET] y = df_result.values if self.X is None: raise Exception( 'X needs to be defined before defining Y. Run _get_training_data_x before this method.' ) y = y[0:self.X.shape[0]] y = self._one_hot_encode(y, 2) config.DF_BASE_START_DATE = config.VALIDATION_DATE_START config.DF_BASE_END_DATE = config.VALIDATION_DATE_END df_result = data_getter.get_deltas() df_result = df_result[config.TRAINING_DATA_TARGET] y_val = df_result.values if self.X_val is None: raise Exception( 'X needs to be defined before defining Y. Run _get_training_data_x before this method.' ) y_val = y_val[0:self.X_val.shape[0]] y_val = self._one_hot_encode(y_val, 2) return y, y_val
def _get_testing_data_x(self): data_getter = DataGetter() config.DF_BASE_START_DATE = config.TESTING_DATE_START config.DF_BASE_END_DATE = config.TESTING_DATE_END reshaped_data_lstm = data_getter.get_reshaped_data_for_lstm() self.X = reshaped_data_lstm return reshaped_data_lstm
def test_get_first_reached_targets(self): data_getter = DataGetter() df_result = data_getter.get_first_reached_targets() self.assertIsInstance( df_result, pd.DataFrame, "There was an issue calculating the targets reached.") df_result.to_excel("../output/df_reached_targets.xlsx")
def test_get_deltas(self): data_getter = DataGetter() df_result = data_getter.get_deltas() self.assertIsInstance( df_result, pd.DataFrame, "There was an issue calculating the delta values.") df_result.to_excel("../output/df_deltas.xlsx")
def test_get_single_currency_raw_data_with_base(self): data_getter = DataGetter() for currency_dir in config.get_currency_dir_paths(): df_base_raw = data_getter.get_single_currency_raw_data_with_base( currency_dir) self.assertIsInstance( df_base_raw, pd.DataFrame, "There was an issue creating df_base_raw from the files in dir - {dir}." .format(dir=dir))
def test_get_single_currency_raw_data_from_excel(self): data_getter = DataGetter() for currency_dir in config.get_currency_dir_paths(): df_raw = data_getter.get_raw_data_from_excel_files_for_single_currency( currency_dir) self.assertIsInstance( df_raw, pd.DataFrame, "There was an issue creating a dataframe from a file in - {}.". format(currency_dir))
def test_tf_resampled_single_currency_raw_data_with_base(self): data_getter = DataGetter() for currency_dir in config.get_currency_dir_paths(): df_resampled = data_getter.get_tf_resampled_single_currency_raw_data_with_base( currency_dir) # df_resampled.to_excel("df_resampled.xlsx") # exit() self.assertIsInstance( df_resampled, pd.DataFrame, "There was an issue creating df_resampled from dir - {dir}.". format(dir=currency_dir))
def test_get_df_with_indicators_single_currency(self): data_getter = DataGetter() for currency_dir in config.get_currency_dir_paths(): df_result = data_getter.get_df_with_indicators_single_currency( currency_dir) # df_result.to_excel("df_with_indicators.xlsx") # exit() self.assertIsInstance( df_result, pd.DataFrame, "There was an issue creating the df_result with the data in this dir - {dir}" .format(dir=currency_dir))
def _get_testing_data_y(self): data_getter = DataGetter() df_result = data_getter.get_deltas() df_result = df_result[config.TRAINING_DATA_TARGET] y = df_result.values if self.X is None: raise Exception( 'X needs to be defined before defining Y. Run _get_testing_data_x before this method.' ) y = y[0:self.X.shape[0]] y = self._one_hot_encode(y, 2) return y
def _get_training_data_x(self): data_getter = DataGetter() config.DF_BASE_START_DATE = config.TRAINING_DATE_START config.DF_BASE_END_DATE = config.TRAINING_DATE_END X_train = data_getter.get_reshaped_data_for_lstm() self.X = X_train config.DF_BASE_START_DATE = config.VALIDATION_DATE_START config.DF_BASE_END_DATE = config.VALIDATION_DATE_END X_val = data_getter.get_reshaped_data_for_lstm() self.X_val = X_val return X_train, X_val
def __init_model(self): self.train_manager = DataGetter( 'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json') #self.total_size = len(self.train_manager.sentence) data = { "batch_size": 20, #"input_size": self.train_manager.input_size, "vocab": self.train_manager.vocab, "tag_map": self.train_manager.get_tag2idx, } self.save_params(data) dev_manager = DataGetter( 'D://nlp work//gwx//task//NER//dataset//CLUE-NER2020//train.json') #self.dev_batch = dev_manager.iteration() self.model = BiLSTMCRF( tag_map=self.train_manager.tag_map, batch_size=self.batch_size, vocab_size=Config.max_len, dropout=self.dropout, embedding_dim=self.embedding_size, hidden_dim=self.hidden_size, ) self.restore_model()
def runBacktestOffline(self, instrument, granularity, startep, endep): dg = DataGetter(MSSQLGetter(OandaGetter(instrument, granularity))) ti = BackTestTicker(dg, startep, endep) self._run(ti)
def test_get_reshaped_data_for_lstm(self): data_getter = DataGetter() reshaped_data = data_getter.get_reshaped_data_for_lstm() self.assertTrue(len(reshaped_data)) utils.write_3d_np_array_to_file("../output/reshaped_data.txt", reshaped_data)
def test_get_targets_long_short(self): data_getter = DataGetter() df_result = data_getter.get_targets_long_short() self.assertIsInstance(df_result, pd.DataFrame, "There was an issue calculating the targets.") df_result.to_excel("../output/df_with_targets.xlsx")
def test_get_df_with_indicators_multicurrency(self): data_getter = DataGetter() df_result = data_getter.get_df_with_indicators_multicurrency() self.assertIsInstance( df_result, pd.DataFrame, "There was an issue creating the multicurrency df.")
from data_getter import DataGetter D = DataGetter() D_name = D.get_name() D_number = D.get_number() class DuichengWindow(): def __init__(self): self.L1_name = D_name[0] self.L2_name = D_name[1] self.L3_name = D_name[2] self.L4_name = D_name[3] self.L5_name = D_name[4] self.L = D_number[0] self.W = D_number[1] self.G = D_number[2] self.num1 = D_number[3] self.num2 = D_number[4] self.L_a = D_number[5] self.L_b = self.L - 2 * self.G - self.L_a self.W_a1 = (self.W - 2 * self.G) // self.num1 self.W_a2 = (self.W - 2 * self.G) // self.num2 def SumWindow(self): if self.num2 == 1: L1 = 2 * ((self.L - self.G * 2) + (self.W - self.G * 2)) print('窗框{}:{}'.format(self.L1_name, L1)) L2 = self.num1 * (2 * (self.L_a + self.W_a1)) print('窗扇{}:{}'.format(self.L2_name, L2))
""" This module builds and executes the mailer system. """ import os import sys import csv import requests from mailer import Mailer from data_getter import DataGetter from parameters import (MAIL_SENDER, DATABASE_LOCATION, TARGET_DATABASE, TEMPLATE_FILE, ATTACHMENTS_FOLDER, MAIL_COLUMN_NAME) mailer = Mailer() getter = DataGetter(DATABASE_LOCATION) def get_mail_title(): """Gets the title for the mail.""" trailing = sys.argv[1:] return 'Sample Diffusion' if not trailing else " ".join(trailing) def generate_message(template, subject, attachments, **kwargs): """Generates the adequate message depending on attachments.""" text = template.format(**kwargs) if not attachments: return mailer.create_text_message(MAIL_SENDER, kwargs[MAIL_COLUMN_NAME], subject, text) return mailer.create_attachments_message(MAIL_SENDER,
def test_get_df_base(self): data_getter = DataGetter() df_base = data_getter.get_df_base() self.assertIsInstance(df_base, pd.DataFrame, "There was an issue creating df_base.")