def __init__(self, window = 50, datasource = 'local', preprocesses = ['None'], datadir = None, reward = 'ROI', use_market_profile = False): self.window = window self.reward_meth = reward #self.action_space = spaces.Discrete(3) self.action_space = dict(type = 'int', shape = 1, num_actions = 3, min_value = 0, max_value = 2) self.use_market_profile = use_market_profile self.preprocesses = preprocesses self.fig = None '''FIXME: is this correct? ''' self.commission = 0.1 / 100 if datasource == 'local': if datadir != datadir: # check for Nonetype raise ValueError('Error: please specify data directory.') else: self.data = self.load_normal(datadir = datadir) elif datasource == 'robinhood': return NotImplementedError elif datasource == 'iex': return NotImplementedError for preprocess in preprocesses: if preprocess == 'None': pass elif preprocess == 'MinMax': # normalized 0 to 1, wouldn't recommend self.data = self.preprocess_MinMax() elif preprocess == 'renko': # blocks self.data = self.preprocess_renko() elif preprocess == 'log_transform': # log return values self.data = self.preprocess_log_transform() elif preprocess == 'autoencode': self.data = self.preprocess_autoencode() if self.observation_space is None: # not yet set by preprocessing self.observation_space = dict(type = 'float', shape = [self.window, self.data.shape[1]]) #self.observation_space = spaces.Box(low = 0, high = 10000, shape = (self.window, self.data.shape[1])) # data should be loaded and processed print_data_info(self.data)
log_device_placement = False # log placement of operations on devices # Data Preparation # ================================================== train, test = data.load_dataset(args.dataset, out=args.out, vocab_size=args.vocab_size) x_train = train.data.astype(np.float32) x_test = test.data.astype(np.float32) y_train = train.labels y_test = test.labels # Print information about the dataset utils.print_data_info(train, x_train, x_test, y_train, y_test) # To print for results.csv data_str = "{{format: '{}', vocab_size: {}}}".format(args.out, len(train.vocab)) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=allow_soft_placement, log_device_placement=log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): mlp = MLP(vocab_size=len(train.vocab), num_classes=len(train.class_names),
# data split MAX_LEN = 25 # NOTE: we filter out a lot of sentences for speed train_data, valid_data, test_data = datasets.IWSLT.splits( exts=('.en', '.de'), fields=(SRC, TRG), filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and len( vars(x)['trg']) <= MAX_LEN) MIN_FREQ = 5 # NOTE: we limit the vocabulary to frequent words for speed SRC.build_vocab(train_data.src, min_freq=MIN_FREQ) TRG.build_vocab(train_data.trg, min_freq=MIN_FREQ) PAD_INDEX = TRG.vocab.stoi[PAD_TOKEN] # print data info print_data_info(train_data, valid_data, test_data, SRC, TRG) # define iterator train_iter = data.BucketIterator(train_data, batch_size=params['batch_size'], train=True, sort_within_batch=True, sort_key=lambda x: (len(x.src), len(x.trg)), repeat=False, device=DEVICE) valid_iter = data.Iterator(valid_data, batch_size=1, train=False, sort=False, repeat=False,
# define iterator train_iter = data.BucketIterator(train_data, batch_size=params['BATCH_SIZE'], device=DEVICE, sort_within_batch=True, sort_key=lambda x: len(x.text), train=True, repeat=False) # train_iter = data.Iterator(train_data, batch_size=1, train=False, sort=False, repeat=False, device=DEVICE) valid_iter = data.Iterator(valid_data, batch_size=1, train=False, sort=False, repeat=False, device=DEVICE) test_iter = data.Iterator(test_data, batch_size=1, train=False, sort=False, repeat=False, device=DEVICE) print_data_info(train_data, valid_data, test_data, SRC, LABEL) ############################# run_lrp(test_iter, vocab=SRC.vocab, model_file='sa_model4.pt')
def __init__(self, data_info, time_info): # Print data information info_dict = extract(data_info, time_info) print_data_info(info_dict) # # Install hyperopt and lightgbm # pip_install('hyperopt') # pip_install('lightgbm') print('Using algo: {}'.format(params['algo'])) # Settings if params['algo'] == Algo.ORIGINAL: self._dataset_budget_threshold = 0.8 self._max_train_data = 200000 self.batch_size = 50000 self.delta_n_estimators = 100 self.delta_num_leaves = 20 self.delta_learning_rate = 0.005 self.delta_max_depth = 1 self.delta_feature_fraction = 0.1 self.delta_bagging_fraction = 0.1 self.delta_bagging_freq = 1 self.max_evaluation = 30 self.param_choice_fixed = { 'n_estimators': 400, 'learning_rate': 0.01, 'num_leaves': 50, 'feature_fraction': 0.6, 'bagging_fraction': 0.6, 'bagging_freq': 2, 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc' } elif params['algo'] == Algo.FACEBOOK_LR: self._dataset_budget_threshold = 0.8 self._max_train_data = 100000 self.batch_size = 25000 self.delta_n_estimators = 50 self.delta_num_leaves = 10 self.delta_learning_rate = 0.005 self.delta_max_depth = 1 self.delta_feature_fraction = 0.1 self.delta_bagging_fraction = 0.1 self.delta_bagging_freq = 1 self.max_evaluation = 30 self.param_choice_fixed = { 'n_estimators': 75, 'learning_rate': 0.01, 'num_leaves': 15, 'feature_fraction': 0.6, 'bagging_fraction': 0.6, 'bagging_freq': 2, 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc' } elif params['algo'] == Algo.BASIC: self._dataset_budget_threshold = 0.8 self._max_train_data = 100000 self.batch_size = 25000 self.delta_n_estimators = 50 self.delta_num_leaves = 10 self.delta_learning_rate = 0.005 self.delta_max_depth = 1 self.delta_feature_fraction = 0.1 self.delta_bagging_fraction = 0.1 self.delta_bagging_freq = 1 self.max_evaluation = 30 self.param_choice_fixed = { 'n_estimators': 75, 'learning_rate': 0.01, 'num_leaves': 15, 'feature_fraction': 0.6, 'bagging_fraction': 0.6, 'bagging_freq': 2, 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc' } self._train_data = np.array([]) self._train_labels = np.array([]) self._transformed_train_data = np.array([]) self.best_hyperparams = {} self._classifier = None self._classifier2 = None self._data_processor = DataProcessor(info_dict) self._sampler = Sampler() self.mdl = StreamSaveRetrainPredictor()