def test_train_method(self): self.test_data_loader = DataLoader() self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt' test_word2index, test_index2word, test_window_data, \ test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data) # noqa self.test_prepare_train_data = PrepareTrainData() test_train_data = \ self.test_prepare_train_data.prepare_train_data_method( window_data=test_window_data, word2index=test_word2index, weighting_dic=test_weightinhg_dict, X_ik=test_X_ik) self.model = Glove(vocab_size=len(test_word2index)) self.trainer = Trainer(model=self.model) self.trainer.train_method(train_data=test_train_data) word_similarity = self.trainer.word_similarity( target=self.test_data_loader.vocab[0], vocab=self.test_data_loader.vocab, word2index=test_word2index, top_rank=2) word_similarity_check = ['<', '>', 's'] word_similarity_bool = False for word in word_similarity: if word[0] in word_similarity_check: word_similarity_bool = True assert word_similarity_bool is True
def run(param): """ Train SidNet :param param: parameters """ data_loader = DataLoader(random_seed=param.random_seed, reduction_dimension=param.reduction_dimension, reduction_iterations=param.reduction_iterations) # data = {train, test}, train = {X, y}, test = {X, y} according to heldout_ratio data = data_loader.load(data_path=param.data_path, heldout_ratio=param.heldout_ratio) logger.info('Start training SidNet with the hyperparameters...') # training trainer = SidNetTrainer(param) trained_model = trainer.train_with_hyper_param( data=data, hyper_param=param.hyper_param, epochs=param.epochs) # save model logger.info('Save the trained model at {}...'.format(param.output_home)) logger.info('- Path for the trained model: {}'.format( param.paths.model_output_path)) logger.info('- Path for the hyperparameters used in the model: {}'.format( param.paths.param_output_path)) torch.save(trained_model.state_dict(), param.paths.model_output_path) param.device = 0 with open(param.paths.param_output_path, 'w') as out_file: json.dump(param, out_file)
def main(args): # Load data. print("Loading data from {}".format(args.data_path)) data_loader = DataLoader(path=args.data_path) n_users, n_items = data_loader.n_users, data_loader.n_items print("n_users: {}, n_items: {}".format(n_users, n_items)) train_data = data_loader.load_train_data() test_data = data_loader.load_test_data() attack_eval_args = Bunch(args.attack_eval_args) # Load fake data (and combine with normal training data) if path provided. n_fakes = 0 if attack_eval_args.fake_data_path: fake_data = load_fake_data(attack_eval_args.fake_data_path) train_data = stack_csrdata(train_data, fake_data) n_fakes = fake_data.shape[0] print("Statistics of fake data: " "n_fakes={}, avg_clicks={:.2f}".format(n_fakes, fake_data.sum(1).mean())) # Evaluate victim model performance. for victim_args in attack_eval_args.victims: print(victim_args) victim_args = Bunch(victim_args) trainer_class = victim_args.model["trainer_class"] trainer = trainer_class(n_users=n_users + n_fakes, n_items=n_items, args=victim_args) trainer.fit(train_data, test_data)
def get_descs_and_labels(net: MLNet, sess: tf.Session, modal, paths_with_labels, process_fn, batch_size): """ This function computes description vectors for image and text samples. """ if net.is_training: raise Exception("should not run this in training mode") if net.is_retrieving: raise Exception("should not run this in retrieving mode") descriptors = [] labels = [] loader = DataLoader(paths_with_labels, batch_size, shuffle=False, process_fn=process_fn) for batch in range(loader.n_batches): batch_data, batch_labels = loader.get_batch_by_index(batch) batch_data = split_and_pack(batch_data) if modal == 1: feed_dict = {} for ph, data in zip(net.ph1, batch_data): feed_dict[ph] = data batch_descs = net.descriptors_1.eval(session=sess, feed_dict=feed_dict) elif modal == 2: feed_dict = {} for ph, data in zip(net.ph2, batch_data): feed_dict[ph] = data batch_descs = net.descriptors_2.eval(session=sess, feed_dict=feed_dict) else: raise Exception("modal should be either 1 or 2") descriptors.append(batch_descs) labels.append(batch_labels) if loader.n_remain > 0: batch_data, batch_labels = loader.get_remaining() batch_data = split_and_pack(batch_data) if modal == 1: feed_dict = {} for ph, data in zip(net.ph1, batch_data): feed_dict[ph] = data batch_descs = net.descriptors_1.eval(session=sess, feed_dict=feed_dict) elif modal == 2: feed_dict = {} for ph, data in zip(net.ph2, batch_data): feed_dict[ph] = data batch_descs = net.descriptors_2.eval(session=sess, feed_dict=feed_dict) else: raise Exception("modal should be either 1 or 2") descriptors.append(batch_descs[:loader.n_remain]) labels.append(batch_labels[:loader.n_remain]) descriptors = np.concatenate(descriptors, axis=0) labels = np.concatenate(labels, axis=0) return descriptors, labels
def load_data(output="label_bbox", batch_size=32, channels=1, tl_preprocess=False, model_type="resnet", seed=23): data = DataLoader('./data/cars_train', './data/cars_test', './data/devkit', batch_size=batch_size) n_classes = len(data.df_test['label'].unique()) labels = data.labels print(f'{n_classes} CLASSES, Random Chance: {1/n_classes}') gen = data.get_pipeline(type='test', output=output, channels=channels, apply_aug=False, tl_preprocess=tl_preprocess, model_type=model_type, seed=seed) steps = np.ceil(len(data.df_test) / data.batch_size) return labels, n_classes, gen, steps
def test_load_data(self): word2index = {'/': 0, '<': 1, '>': 2, 's': 3, '、': 4, '。': 5, 'が': 6, 'た': 7, 'で': 8, 'に': 9, 'の': 10, 'は': 11, 'を': 12} index2word = {0: '/', 1: '<', 2: '>', 3: 's', 4: '、', 5: '。', 6: 'が', 7: 'た', 8: 'で', 9: 'に', 10: 'の', 11: 'は', 12: 'を'} window_data = [('<', '/'), ('<', 's'), ('<', '>'), ('/', '<'), ('/', 's'), ('/', '>'), ('s', '<'), ('s', '/'), ('s', '>'), ('>', '<'), ('>', '/'), ('>', 's')] X_ik = {('/', '<'): 2, ('<', '/'): 2, ('/', '>'): 2, ('>', '/'): 2, ('/', 's'): 2, ('s', '/'): 2, ('<', '>'): 2, ('>', '<'): 2, ('<', 's'): 2, ('s', '<'): 2, ('>', 's'): 2, ('s', '>'): 2} self.test_data_loader = DataLoader() self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt' test_word2index, test_index2word, test_window_data, \ test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data) # noqa # Reference # https://stackoverflow.com/questions/11026959/writing-a-dict-to-txt-file-and-reading-it-back # noqa APP_PATH = os.path.dirname(__file__) with open(APP_PATH + '/test_data/test_weighting_dict.pkl', 'rb') as handle: # noqa weighting_dict = pickle.loads(handle.read()) print(test_word2index) print(test_index2word) print(test_window_data) print(test_X_ik) assert word2index == test_word2index assert index2word == test_index2word assert window_data == test_window_data assert test_X_ik == X_ik assert test_weightinhg_dict == weighting_dict
def test_compare_order_metrics(self): sim_root = self.config.sim_root + self.sim_st.date().isoformat( ) + "/" + self.sim_st.time().isoformat() + "/" all_sims = DataLoader().load_sim_data(sim_root) all_sim_limit_orders = list( map(lambda sim: DataSplitter.get_limit_orders(sim[0].compute()), all_sims)) all_sim_market_orders = list( map(lambda sim: DataSplitter.get_market_orders(sim[0].compute()), all_sims)) all_sim_trades = list(map(lambda sim: sim[1].compute(), all_sims)) all_sim_cancels = list(map(lambda sim: sim[2].compute(), all_sims)) feed_df = DataLoader().load_feed( self.config.real_root, self.sim_st, self.sim_st + timedelta(seconds=self.config.simulation_window), self.config.product) real_orders = DataSplitter.get_orders(feed_df) real_limit_orders = DataSplitter.get_limit_orders(real_orders) real_market_orders = DataSplitter.get_market_orders(real_orders) real_trades = DataSplitter.get_trades(feed_df) real_trades['size'] = pd.to_numeric(real_trades['remaining_size']) real_cancels = DataSplitter.get_cancellations(feed_df) real_cancels['size'] = pd.to_numeric(real_cancels['remaining_size']) print("Order Buy/Sell limit metrics") Evaluation.compare_order_metrics(real_limit_orders, all_sim_limit_orders) print("Order Buy/Sell market metrics") Evaluation.compare_order_metrics(real_market_orders, all_sim_market_orders) print("Cancel metrics") Evaluation.compare_order_metrics(real_cancels, all_sim_cancels) print("Trade metrics") Evaluation.compare_metrics(real_trades, all_sim_trades)
def main(input_home='../output', dataset='BITCOIN_ALPHA', gpu_id=0): """ Evaluate SidNet :param input_home: directory where a trained model is stored :param dataset: dataset name :param gpu_id: gpu id """ device = torch.device(f"cuda:{gpu_id}" if ( torch.cuda.is_available() and gpu_id >= 0) else "cpu") param_output_path = f'{input_home}/{dataset}/param.json' model_output_path = f'{input_home}/{dataset}/model.pt' with open(param_output_path, 'r') as in_file: param = DotMap(json.load(in_file)) param.device = device if param.use_torch_random_seed: torch.manual_seed(param.torch_seed) # data_loader = DataLoader( random_seed=param.random_seed, reduction_dimension=param.reduction_dimension, reduction_iterations=param.reduction_iterations) # data = {train, test}, train = {X, y}, test = {X, y} according to heldout_ratio data = data_loader.load(data_path=param.data_path, heldout_ratio=param.heldout_ratio) trainer = SidNetTrainer(param) hyper_param = param.hyper_param converted_data = trainer.convert_data(data) model = SidNet(hid_dims=hyper_param.hid_dims, in_dim=hyper_param.in_dim, device=device, num_nodes=converted_data.num_nodes, num_layers=hyper_param.num_layers, num_diff_layers=hyper_param.num_diff_layers, c=hyper_param.c).to(device) model.load_state_dict( torch.load(model_output_path, map_location=device)) loss = model(nApT=converted_data.train.nApT, nAmT=converted_data.train.nAmT, X=converted_data.H, edges=converted_data.train.edges, y=converted_data.train.y) model.eval() auc, f1_scores, _ = model.evaluate( test_edges=converted_data.test.edges, test_y=converted_data.test.y) logger.info('test auc: {:.4f}'.format(auc)) logger.info('test f1_macro: {:.4f}'.format(f1_scores.macro))
def test_visualize(self): self.test_data_loader = DataLoader() self.test_japanese_wiki_data = '../data/raw/jawiki_only_word_random_choose.txt' test_word2index, test_index2word, test_window_data, \ test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data( file_name=self.test_japanese_wiki_data) # noqa model_name = '../models/glove_wiki/glove_model_40.pth' self.test_glove_visualize = GloveVisualize(model_name=model_name) self.test_glove_visualize.visualize(vocab=self.test_data_loader.vocab)
def test_visualize(self): self.test_data_loader = DataLoader() self.test_japanese_wiki_data = '../data/raw/source_replay_twitter_data_sort.txt' test_word2index, test_index2word, test_window_data, \ test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data( file_name=self.test_japanese_wiki_data) # noqa model_name = '../models/glove_model_40.pth' test_word2index.update({'<UNK>': len(test_word2index)}) self.test_glove_visualize = GloveVisualize(model_name=model_name) self.test_glove_visualize.visualize(vocab=self.test_data_loader.vocab)
def __init__(self, paths_with_labels_1, paths_with_labels_2, batch_size, n_classes, shuffle, n_threads=8, process_fn_1=None, process_fn_2=None): random.seed(int(1e6 * (time.time() % 1))) self.paths_with_labels_1 = paths_with_labels_1 self.paths_with_labels_2 = paths_with_labels_2 # parameters self.n_classes = n_classes self.batch_size = batch_size self.n_threads = n_threads self.n_samples_1 = len(self.paths_with_labels_1) self.n_samples_2 = len(self.paths_with_labels_2) self.dtype_labels = 'int32' # generate data pairs indices_1, indices_2 = self.generate_pair_indices() if shuffle: indices = list(zip(indices_1, indices_2)) random.shuffle(indices) indices_1, indices_2 = zip(*indices) loader_1_list = [self.paths_with_labels_1[i] for i in indices_1] loader_2_list = [self.paths_with_labels_2[i] for i in indices_2] # initialize loaders self.loader_1 = DataLoader(loader_1_list, batch_size=self.batch_size, n_threads=n_threads, process_fn=process_fn_1) self.loader_2 = DataLoader(loader_2_list, batch_size=self.batch_size, n_threads=n_threads, process_fn=process_fn_2) # state self.n_pairs = len(indices_1) self.n_batches = math.floor(self.n_pairs / self.batch_size) self.n_remain = self.n_pairs % batch_size self.i = 0 # async_load self.async_load_pool = [None, None, None] self.async_load_thread = None
def __init__(self): self.opt = TrainOptions().parse() self.dataset = DataLoader(self.opt) print('# training images = %d' % len(self.dataset)) self.model = DIFLModel(self.opt) self.visualizer = Visualizer(self.opt) self.total_steps = 0
class TestClassifier(TestCase): def test_classify(self): model_name = '../models/glove_wiki/glove_model_40.pth' output_file = 'test/test_data/glove_classify_model.pkl' compare_output_file = 'glove_classify_model.pkl' classifier = Classifier(model_name=model_name) classifier.classify() assert True is filecmp.cmp(output_file, compare_output_file) def test_classify_predict(self): self.test_data_loader = DataLoader() self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt' test_word2index, test_index2word, test_window_data, \ test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data) # noqa model_name = '../models/glove_wiki/glove_model_40.pth' output_file = 'test/test_data/glove_classify_model.pkl' classifier = Classifier(model_name=model_name) print(test_word2index) classes = classifier.classify_predict(word='の', classify_model_name=output_file, word2index=test_word2index) assert 2 == classes classes = classifier.classify_predict(word='どうよ?', classify_model_name=output_file, word2index=test_word2index) assert 9999 == classes
def __init__(self): # Parse test options. Note test code only supports nThreads=1 and batchSize=1 self.opt = TestOptions().parse() self.opt.nThreads = 1 self.opt.batchSize = 1 self.dataset = DataLoader(self.opt) self.model = DIFLModel(self.opt) # Read groundtruth poses of database from txt files for each slice, note that the poses has already been # transformed to R,t, not original R,c for CMU-Seasons dataset self.split_file = os.path.join( self.opt.dataroot, 's' + str(self.opt.which_slice), 'pose_new_s' + str(self.opt.which_slice) + '.txt') self.names = np.loadtxt(self.split_file, dtype=str, delimiter=' ', skiprows=0, usecols=(0)) with open(self.split_file, 'r') as pose_file: self.poses = pose_file.read().splitlines() if self.opt.test_using_cos: metric_mode = "cos" else: metric_mode = "l2" # Open the result txt file self.result_file = open( self.opt.results_dir + self.opt.name + "_" + str(self.opt.which_epoch) + '_s' + str(self.opt.which_slice) + "_" + metric_mode + ".txt", 'w')
def single_run(index): custom_reader = Reader('../demo/credit_data', 'train.pkl', 'train_target.pkl', 'test.pkl') custom_spliter = Spliter() data = DataLoader(custom_reader, custom_spliter) data.load() lgb_custom = LGB(config) base_model = Model(lgb_custom) evaler = Evaler() print("[KFold Time] Num: %d" % (index+1)) kfoldEnsemble = KFoldEnsemble(base_model=base_model, evaler=evaler, nfold=5, seed=index, nni_log=False) kfoldEnsemble.fit(data) return kfoldEnsemble
def main(): params={} data_loader=DataLoader(data_dir='../data/international-airline-passengers.csv') dataset=data_loader(batch_size=8,training=True) model=Model(use_model='seq2seq', use_loss='mse',use_optimizer='adam') # model: seq2seq, tcn, transformer model.train(dataset,n_epochs=10,mode='eager') # mode can choose eager or fit
def main(): data_loader=DataLoader() dataset=data_loader(data_dir=params['data_dir'], batch_size=8,training=True, sample=0.8) valid_dataset=data_loader(data_dir=params['data_dir'],batch_size=8, training=True, sample=0.2) model=Model(params=params, use_model=params['use_model'], use_loss='mse',use_optimizer='adam') # model: seq2seq, tcn, transformer model.train(dataset,n_epochs=10,mode='eager',export_model=True) # mode can choose eager or fit model.eval(valid_dataset)
def main(configs: Configs = None, data_loader: DataLoader = None): """ main function for data processor from raw files SAP to tables in database to be consumed by forecast model usage example: $ python spike-challenge/src/make_dataset.py """ if configs is None: configs = Configs('default_config.yaml') if data_loader is None: data_loader = DataLoader() data_loader.load_data()
def setup(self, mocker: MockerFixture): data_provider = MockDataProvider() mock = mocker.patch.object(data_provider, 'get_heart_records') mock.return_value = { date(year=2020, month=1, day=1): { 'resting_heart': 60 } } mock = mocker.patch.object(data_provider, 'get_body_records') mock.return_value = { date(year=2020, month=1, day=1): { 'weight': 75, 'fat': 15.12, 'bmi': 21.12 }, date(year=2020, month=1, day=2): { 'weight': 74, 'fat': 14.12, 'bmi': 20.12 } } mock = mocker.patch.object(data_provider, 'get_sleep_records') mock.return_value = { date(year=2020, month=1, day=1): { 'sleep_duration': 480, 'sleep_efficiency': 80 } } mock = mocker.patch.object(data_provider, 'get_activity_records') mock.return_value = { date(year=2020, month=1, day=2): { 'total_calories': 1600, 'active_calories': 400, 'sedentary_minutes': 600, 'lightly_active_minutes': 10, 'fairly_active_minutes': 10, 'highly_active_minutes': 10 } } self.data_loader = DataLoader(data_provider) yield self.data_loader if os.path.exists(self.TEST_CSV_FILE): os.remove(self.TEST_CSV_FILE)
def main(): parser = argparse.ArgumentParser(description="Training glove model") parser.add_argument( "-c", "--train_data", metavar="train_data", # type=str, default='../data/raw/jawiki_only_word_random_choose.txt', type=str, default='../data/raw/source_replay_twitter_data_sort.txt', dest="train_data", help="set the training data ") parser.add_argument("-e", "--embedding_size", metavar="embedding_size", type=int, default=300, dest="embedding_size", help="set the embedding size") args = parser.parse_args() data_loader = DataLoader() japanese_wiki_data = args.train_data word2index, index2word, window_data, X_ik, weightinhg_dict = \ data_loader.load_data(file_name=japanese_wiki_data) # noqa print(word2index) prepare_train_data = PrepareTrainData() train_data = \ prepare_train_data.prepare_train_data_method( window_data=window_data, word2index=word2index, weighting_dic=weightinhg_dict, X_ik=X_ik) model = Glove(vocab_size=len(word2index), projection_dim=args.embedding_size) trainer = Trainer(model=model) trainer.train_method(train_data=train_data) word_similarity = trainer.word_similarity(target=data_loader.vocab[0], vocab=data_loader.vocab, word2index=word2index, top_rank=2) print(word_similarity)
def main(): config = get_args() if config.load_model is not None: model, features, target_feature = load_model(config) data_loader = DataLoader(config, split=False, pretrained=True) data_loader.setup(features, target_feature) evaluator = Evaluator(config) evaluator.evaluate_pretrianed(model, data_loader, target_feature) exit(0) if config.load_checkpoint: auc, acc, pred, classes, completed = load_checkpoint(config) data_loader = DataLoader(config, split=not config.active_features) evaluator = Evaluator(config) trainer = Trainer(config, data_loader, evaluator) if config.load_checkpoint: evaluator.set_checkpoint(auc, acc, pred, classes) trainer.set_completed(completed) trainer.train() if not config.active_features: print(f"AUC ({config.evaluation_mode}): {evaluator.get_auc()}") print(f"Accuracy ({config.evaluation_mode}): {evaluator.get_accuracy()}") evaluator.save(data_loader.getFeatures()) display_runtime(config)
def test_prepare_train_data_method(self): self.test_data_loader = DataLoader() self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt' test_word2index, test_index2word, test_window_data, \ test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data) # noqa self.test_prepare_train_data = PrepareTrainData() test_train_data = \ self.test_prepare_train_data.prepare_train_data_method( window_data=test_window_data, word2index=test_word2index, weighting_dic=test_weightinhg_dict, X_ik=test_X_ik) APP_PATH = os.path.dirname(__file__) output_file = APP_PATH + '/test_data/train_data.pkl' compare_output_file = APP_PATH + '/test_data/test_train_data.pkl' with open(output_file, 'wb') as handle: pickle.dump(test_train_data, handle) assert True is filecmp.cmp(output_file, compare_output_file)
def sample_mode(): sampling_window_start_time = config.start_time - datetime.timedelta(seconds=config.sampling_window) sampling_window_end_time = config.start_time orders_df, trades_df, cancels_df = DataLoader.load_split_data(config.real_root, sampling_window_start_time, sampling_window_end_time, config.product) params = Sample.generate_sim_params(orders_df, trades_df, cancels_df, graph=True) print(params) Writer.json_to_file(params, config.params_output_root + "params.json")
def init_data(self): with h5py.File(self.params["data_file"], 'r') as f: images = np.array(f["train/images"]) depth_maps = np.array(f["train/depths"]) sparse_maps = np.array(f["train/sparse"]) tr_loader = DataLoader(images, sparse_maps, depth_maps, self.sess, self.params) return tr_loader
def __fetch_real_prices(self): df = DataLoader().load_feed(self.config.real_root, self.sim_st, self.sim_st + timedelta(seconds=self.config.simulation_window), self.config.product) trades_df = DataSplitter.get_trades(df) trades_df['time'] = DataUtils().get_times_in_seconds_after_start(trades_df['time']) trades_df['price'].iloc[0] = DataUtils().get_first_non_nan(trades_df['price']) return trades_df[['time', 'price']]
def test_plot_monte(self): root = "/Users/jamesprince/project-data/random-walk/" files = DataLoader.get_files_in_dir(root) for file in files: sim = pd.read_csv(root + file) plt.plot(sim['time'], sim['price']) plt.show()
def get_all_data(st: datetime, config): # Get all data which we will use to reconstruct the order book all_ob_start_time = st - datetime.timedelta(seconds=config.orderbook_window) all_ob_end_time = st all_ob_data = DataLoader().load_split_data(config.real_root, all_ob_start_time, all_ob_end_time, config.product) # Assume orderbook_window > sampling_window, and therefore filter already loaded ob data all_sample_start_time = st - datetime.timedelta(seconds=config.sampling_window) all_sample_end_time = st all_sampling_data = map(lambda x: DataSplitter.get_between(x, all_sample_start_time, all_sample_end_time), all_ob_data) # Get future data all_future_data_start_time = st all_future_data_end_time = st + datetime.timedelta(seconds=config.sampling_window) all_future_data = DataLoader().load_split_data(config.real_root, all_future_data_start_time, all_future_data_end_time, config.product) return all_ob_data, all_sampling_data, all_future_data
def main(): data_loader = DataLoader(data_dir=params['data_dir']) dataset = data_loader(batch_size=8, training=True) model = Model(use_model=params['use_model'], params=params, use_loss='mse', use_optimizer='adam') # model: seq2seq, tcn, transformer model.train(dataset, n_epochs=10, mode='eager') # mode can choose eager or fit
def orderbook_mode(st: datetime.datetime = None): closest_ob_state, closest_ob_state_str = OrderBookCreator.locate_closest_ob_state(config.orderbook_output_root, st) orders_df, trades_df, cancels_df = DataLoader.load_split_data(config.real_root, closest_ob_state, st, config.product) ob_state_path = config.root_path + closest_ob_state_str ob_state = OrderBookCreator.load_orderbook_state(ob_state_path) orderbook = OrderBookCreator.get_orderbook(orders_df, trades_df, cancels_df, ob_state) output_file = "/Users/jamesprince/project-data/orderbook-" + st.isoformat() + ".csv" OrderBookCreator.orderbook_to_file(orderbook, output_file) logger.info("Orderbook saved to: " + output_file)
def test_DELETE(self): df = pd.read_parquet("/Users/jamesprince/project-data/real/2018-03-25.parquet") df = DataLoader().format_dd(df) for hour in range(0, 3): start_time = datetime.datetime(year=2018, month=3, day=25, hour=hour, minute=0, second=0) end_time = datetime.datetime(year=2018, month=3, day=25, hour=hour + 1, minute=0, second=0) hour_df = df[start_time < df['time']] hour_df = hour_df[hour_df['time'] < end_time] hour_df.to_parquet("/Users/jamesprince/project-data/real/2018-03-25/" + str(hour) + ".parquet")