def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('training_data', choices=arg_type.training_data_choices) parser.add_argument('neighborhood', type=arg_type.neighborhood) parser.add_argument('model', choices=['en', 'gb', 'rf']) parser.add_argument('n_processes', type=int) parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') parser.add_argument('--year', type=arg_type.year) arg = parser.parse_args(argv) arg.me = arg.invocation.split('.')[0] if arg.trace: pdb.set_trace() dir_working = Path.Path().dir_working() path_out_dir = (os.path.join(dir_working, arg.me + '-test') if arg.test else os.path.join(dir_working, arg.me)) dirutility.assure_exists(path_out_dir) return Bunch.Bunch( arg=arg, path_out_log=os.path.join(path_out_dir, '0log.txt'), timer=Timer.Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') arg = parser.parse_args(argv) arg.me = arg.invocation.split('.')[0] if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() dir_out = os.path.join(dir_working, arg.me + ('-test' if arg.test else '')) dirutility.assure_exists(dir_out) return Bunch( arg=arg, path_in_samples=os.path.join(dir_working, 'samples2', 'train.csv'), # path_out_csv=os.path.join(dir_out, 'reduction.csv'), path_out_report_by_price=os.path.join(dir_out, 'report-by-price.txt'), path_out_report_by_n_trades=os.path.join(dir_out, 'report-by-n-trades.txt'), path_out_log=os.path.join(dir_out, '0log.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') arg = parser.parse_args(argv[1:]) arg.me = parser.prog.split('.')[0] if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() dir_out = os.path.join(dir_working, arg.me) dirutility.assure_exists(dir_out) return Bunch( arg=arg, path_in_dir=os.path.join(dir_working, 'fit-predict-v2'), path_out_no_data=os.path.join(dir_out, 'no_data.pickle'), path_out_reduction=os.path.join(dir_out, 'reduction.pickle'), path_out_reduction_2007=os.path.join(dir_out, 'reduction_2007.pickle'), path_out_reduction_200701=os.path.join(dir_out, 'reduction_200701.pickle'), path_out_log=os.path.join(dir_out, '0log.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('model', choices=('en', 'gb', 'rf')) parser.add_argument('processes') parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') arg = parser.parse_args(argv) arg.me = arg.invocation.split('.')[0] if arg.trace: pdb.set_trace() try: arg.processes_int = int(arg.processes) except: print 'processes is not an int; was: %s' % arg.processes raise ValueError dir_working = Path.Path().dir_working() path_out_dir = (os.path.join(dir_working, arg.me + '-test') if arg.test else os.path.join(dir_working, arg.me)) dirutility.assure_exists(path_out_dir) return Bunch.Bunch( arg=arg, path_out_log=os.path.join(path_out_dir, '0log.txt'), timer=Timer.Timer(), )
def make_control(argv): print 'argv', argv parser = argparse.ArgumentParser() parser.add_argument('ticker') parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') arg = parser.parse_args(argv[1:]) # ignore invocation name arg.me = 'bds' if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) # put all output in directory path_out_dir = dirutility.assure_exists('../data/working/' + arg.me + '/' + arg.ticker + '/') return Bunch.Bunch( arg=arg, path_in_dir= '../data/input/7chord_team_folder/NYU/7chord_ticker_universe_nyu_poc/', path_in_glob='*.csv', path_out_dir=path_out_dir, path_out_log=path_out_dir + '0log.txt', path_out_report_ticker_maturity_template=path_out_dir + '%s.txt', path_out_ticker_maturity_template_csv=path_out_dir + '%s.csv', path_out_report_counts=path_out_dir + '0counts.txt', path_out_report_na=path_out_dir + '0na.txt', random_seed=random_seed, test=arg.test, timer=Timer.Timer(), )
def make_control(argv): # return a Bunch print argv if len(argv) not in (1, 2): usage('invalid number of arguments') parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('--test', action='store_true') arg = parser.parse_args(argv) arg.base_name = arg.invocation.split('.')[0] path = Path() dir_out = dirutility.assure_exists(path._dir_working + arg.base_name + ('-test' if arg.test else '')) + '/' file_out = path._dir_working + arg.base_name + '-derived' + ( '-test' if arg.test else '') + '.csv' path_in = path._dir_input + 'neighborhood-data/census.csv' random_seed = 123456 random.seed(random_seed) debug = False return Bunch( arg=arg, debug=debug, path_in=path_in, path_out=file_out, path_out_log=dir_out + '0log.txt', random_seed=random_seed, test=arg.test, )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('training_data', choices=arg_type.training_data_choices) parser.add_argument('neighborhood', type=arg_type.neighborhood) parser.add_argument('model', choices=arg_type.model_choices) parser.add_argument('n_processes', type=arg_type.n_processes) parser.add_argument('--cache', action='store_true') parser.add_argument('--test', action='store_true') parser.add_argument('--testmapper', action='store_true') parser.add_argument('--trace', action='store_true') parser.add_argument('--dry', action='store_true') # don't write output arg = parser.parse_args(argv[1:]) arg.me = parser.prog.split('.')[0] if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() dir_final = '%s-%s-%s' % (arg.training_data, arg.neighborhood, arg.model) if arg.test: dir_final += '-test' dir_out = os.path.join(dir_working, arg.me, dir_final) dirutility.assure_exists(dir_out) return Bunch( arg=arg, path_cache=os.path.join(dir_out, 'cache.pickle'), path_in_dir_fit_predict=os.path.join(dir_working, 'fit-predict-v2', ''), # TODO: remove v2 path_in_query_samples_all=os.path.join(dir_working, 'samples2', 'all.csv'), path_in_query_samples_train=os.path.join(dir_working, 'samples2', 'train.csv'), # path_out_csv=os.path.join(dir_out, 'reduction.csv'), path_out_dir=dir_out, # path_out_fitted_attributes=os.path.join(dir_out, 'fitted-attributes.pickle'), path_out_log=os.path.join(dir_out, '0log.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('samples', choices=['all', 'train']) parser.add_argument('model', choices=['en', 'gb', 'rf']) parser.add_argument('transaction_month') parser.add_argument('neighborhood') parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') arg = parser.parse_args(argv) arg.me = arg.invocation.split('.')[0] if arg.trace: pdb.set_trace() # convert arg.neighborhood into arg.all and arg.city arg.city = (None if arg.neighborhood == 'all' else arg.neighborhood.replace('_', ' ')) random_seed = 123 random.seed(random_seed) prior_month = Month(arg.transaction_month).decrement().as_str() in_dir = '%s-%s-%s-%s' % (arg.samples, arg.model, prior_month, arg.neighborhood) out_dir = '%s-%s-%s-%s' % (arg.samples, arg.model, arg.transaction_month, arg.neighborhood) dir_working = Path().dir_working() output_dir = (os.path.join(dir_working, arg.me + '-test', out_dir, '') if arg.test else os.path.join(dir_working, arg.me, out_dir, '')) dirutility.assure_exists(output_dir) return Bunch( arg=arg, path_in_fitted=os.path.join(dir_working, 'fit', in_dir, ''), path_in_samples=os.path.join(dir_working, 'samples2', arg.samples + '.csv'), path_out_file=os.path.join(output_dir, 'predictions.pickle'), path_out_log=os.path.join(output_dir, '0log.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('data', choices=['all', 'train']) parser.add_argument('model', choices=['en', 'gb', 'rf']) parser.add_argument('last_month') parser.add_argument('neighborhood') parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') parser.add_argument('--dry', action='store_true') # don't write output arg = parser.parse_args(argv) arg.me = arg.invocation.split('.')[0] if arg.trace: pdb.set_trace() arg.last = Month(arg.last_month) # convert to Month and validate value # convert arg.neighborhood into arg.all and arg.city arg.city = ( None if arg.neighborhood == 'all' else arg.neighborhood.replace('_', ' ') ) random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() fit_dir = ( os.path.join(dir_working, arg.me + '-test') if arg.test else os.path.join(dir_working, arg.me) ) last_dir = '%s-%s-%s-%s' % (arg.data, arg.model, arg.last_month, arg.neighborhood) path_out_dir = os.path.join(fit_dir, last_dir, '') dirutility.assure_exists(path_out_dir) return Bunch( arg=arg, path_in_dir=os.path.join(dir_working, 'samples2', ''), path_out_dir=path_out_dir, path_out_log=os.path.join(path_out_dir, '0log.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('training_data', choices=arg_type.training_data_choices) parser.add_argument('neighborhood', type=arg_type.neighborhood) parser.add_argument('model', choices=arg_type.model_choices) parser.add_argument('prediction_month', type=arg_type.month) parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') parser.add_argument('--dry', action='store_true') # don't write output arg = parser.parse_args(argv) arg.me = arg.invocation.split('.')[0] + '-v2' if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) dir_working = Path().dir_working() fit_dir = ( os.path.join(dir_working, arg.me + '-test') if arg.test else os.path.join(dir_working, arg.me) ) result_dir = '%s-%s-%s-%s' % (arg.training_data, arg.neighborhood, arg.model, arg.prediction_month) path_out_dir = os.path.join(fit_dir, result_dir, '') dirutility.assure_exists(path_out_dir) return Bunch( arg=arg, path_in_query_samples=os.path.join(dir_working, 'samples2', 'all.csv'), path_in_training_samples=os.path.join(dir_working, 'samples2', arg.training_data + '.csv'), path_out_actuals=os.path.join(path_out_dir, 'actuals.pickle'), path_out_transaction_ids=os.path.join(path_out_dir, 'transaction_ids.pickle'), path_out_predictions_attributes=os.path.join(path_out_dir, "predictions-attributes.pickle"), path_out_dir=path_out_dir, path_out_feature_names=os.path.join(path_out_dir, 'feature_names.pickle'), path_out_log=os.path.join(path_out_dir, '0log.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('filename', type=arg_type.filename_csv) parser.add_argument('lookback', type=arg_type.positive_int) parser.add_argument('typical_bid_offer', type=arg_type.positive_int) parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') arg = parser.parse_args(argv[1:]) arg.me = parser.prog.split('.')[0] if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) dir_working = seven.path.working() if arg.test: dir_out = os.path.join(dir_working, arg.me + '-test') else: dir_out = os.path.join(dir_working, arg.me) dirutility.assure_exists(dir_out) args_str = '%s-%s-%s' % ( arg.filename.split('.')[0], arg.lookback, arg.typical_bid_offer, ) return Bunch( arg=arg, path_in_file=os.path.join(seven.path.midpredictor_data(), arg.filename), path_out_file_csv=os.path.join(dir_out, args_str + '.csv'), path_out_file_pickle=os.path.join(dir_out, args_str + '.pickle'), path_out_log=os.path.join(dir_out, '0log-' + args_str + '.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('--test', action='store_true') parser.add_argument('--trace', action='store_true') arg = parser.parse_args(argv) arg.me = parser.prog.split('.')[0] if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) np.random.seed(random_seed) dir_working = Path().dir_working() dir_out = os.path.join(dir_working, arg.me + ('-test' if arg.test else '')) dirutility.assure_exists(dir_out) base = 'city_medianprice_ntrades' base_all = base + '_all' base_selected = base + '_selected' return Bunch( arg=arg, path_in_column_defs=os.path.join('column_defs.json'), path_in_samples=os.path.join(dir_working, 'samples2', 'train.csv'), path_out_csv_all=os.path.join(dir_out, base_all + '.csv'), path_out_csv_selected=os.path.join(dir_out, base_selected + '.csv'), path_out_report_all=os.path.join(dir_out, base_all + '.txt'), path_out_report_selected=os.path.join(dir_out, base_selected + '.txt'), path_out_log=os.path.join(dir_out, '0log.txt'), random_seed=random_seed, timer=Timer(), )
def make_control(argv): print 'argv', argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('--test', action='store_true', help='if present, truncated input and enable test code') parser.add_argument('--trace', action='store_true', help='if present, call pdb.set_trace() early in run') arg = parser.parse_args(argv) # ignore invocation name arg.me = arg.invocation.split('.')[0] if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) dir_working = Path.Path().dir_working() path_out_dir = ( os.path.join(dir_working, arg.me + '-test', '') if arg.test else os.path.join(dir_working, arg.me, '') ) dirutility.assure_exists(path_out_dir) # path_out_dir = dirutility.assure_exists(dir_working + arg.me + ('-test' if arg.test else '') + '/') return Bunch.Bunch( arg=arg, path_in_test=os.path.join(dir_working, 'samples-test.csv'), path_in_train=os.path.join(dir_working, 'samples-train.csv'), path_out_log=os.path.join(path_out_dir, '0log.txt'), path_out_test=os.path.join(path_out_dir, 'test.csv'), path_out_train=os.path.join(path_out_dir, 'train.csv'), path_out_all=os.path.join(path_out_dir, 'all.csv'), path_out_duplicates=os.path.join(path_out_dir, 'duplicates.pickle'), path_out_uniques=os.path.join(path_out_dir, 'uniques.pickle'), random_seed=random_seed, test=arg.test, timer=Timer.Timer(), )
def make_control(argv): 'return a Bunch of controls' print 'argv', argv parser = argparse.ArgumentParser() parser.add_argument( '--test', action='store_true', help='if present, truncated input and enable test code') parser.add_argument('--trace', action='store_true', help='if present, call pdb.set_trace() early in run') arg = parser.parse_args(argv[1:]) # ignore invocation name arg.me = 'regression_test_seven_classify_dealer_trades' if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) # put all output in directory ticker = 'ms' path_out_dir = dirutility.assure_exists('../data/working/' + arg.me + '/' + ticker + '/') return Bunch.Bunch( arg=arg, ticker=ticker, maturity='2012-04-01', path_cache=path_out_dir + 'cache.pickle', path_in=seven.path('poc', ticker), path_out_log=path_out_dir + '0log.txt', path_out_nareport_original=path_out_dir + 'nareport-original.txt', path_out_nareport_transformed=path_out_dir + 'nareport-transformed.txt', path_out_report_classify=path_out_dir + 'report-classify.txt', path_out_report_remaining=path_out_dir + 'report-remaining.txt', random_seed=random_seed, test=arg.test, timer=Timer.Timer(), )
def make_control(argv): 'return a Bunch' print argv parser = argparse.ArgumentParser() parser.add_argument('invocation') parser.add_argument('fhl', type=arg_type.features_hps_locality) parser.add_argument('--data', action='store_true') parser.add_argument('--test', action='store_true') parser.add_argument('--debug', action='store_true') parser.add_argument('--subset', action='store_true') parser.add_argument('--norwalk', action='store_true') parser.add_argument('--all', action='store_true') parser.add_argument('--trace', action='store_true') parser.add_argument('--use-samples-train-analysis-test', action='store_true') arg = parser.parse_args(argv) # arg.__dict__ contains the bindings arg.base_name = arg.invocation.split('.')[0] # for now, we only know how to process global files # local files will probably have a different path in WORKING/valavm/ # details to be determined arg.features, arg.hsheps, arg.locality = arg.fhl.split('-') assert arg.locality == 'global' or arg.locality == 'city', arg.fhl if arg.norwalk: assert arg.locality == 'city', argv if arg.trace: pdb.set_trace() random_seed = 123 random.seed(random_seed) # assure output directory exists dir_working = Path().dir_working() dir_out_reduction = dirutility.assure_exists(dir_working + arg.base_name) + '/' dir_out = dirutility.assure_exists(dir_out_reduction + arg.fhl) + '/' validation_months = ( '200612', '200701', '200702', '200703', '200704', '200705', '200706', '200707', '200708', '200709', '200710', '200711', ) validation_months_long = ( '200512', '200601', '200602', '200603', '200604', '200605', '200606', '200607', '200608', '200609', '200610', '200611', '200612', '200701', '200702', '200703', '200704', '200705', '200706', '200707', '200708', '200709', '200710', '200711', '200712', '200801', '200802', '200803', '200804', '200805', '200806', '200807', '200808', '200809', '200810', '200811', '200812', '200901', '200902', ) def all_k_values(): ks = range(1, 31, 1) ks.extend([40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200]) return ks return Bunch( all_k_values=all_k_values(), arg=arg, column_definitions=ColumnDefinitions(), debug=arg.debug, errors=[], exceptions=[], path_in_valavm='%svalavm/%s/*.pickle' % ( dir_working, arg.fhl, ), path_in_chart_01_reduction=dir_working + 'chart01/0data.pickle', path_in_data=dir_out + ( '0data-subset.pickle' if arg.subset else '0data-norwalk.pickle' if arg.norwalk else '0data.pickle' ), path_in_interesting_cities=dir_working + 'interesting_cities.txt', path_in_transactions=( dir_working + 'samples-train-analysis%s/transactions.csv' % ('-test' if arg.use_samples_train_analysis_test else '') ), path_all_price_histories=dir_out + '0all_price_histories.pickle', path_out_a=dir_out + 'a.pdf' if arg.locality == 'global' else dir_out + 'a-%s.pdf', path_out_b=dir_out + 'b-%d.txt', path_out_cd=dir_out + '%s.txt', path_out_c_pdf=dir_out+'c.pdf', path_out_b_pdf_subplots=dir_out + 'b.pdf', path_out_b_pdf=dir_out + 'b-%d.pdf', path_out_d=dir_out + 'd.txt', path_out_e_txt=dir_out + 'e-%04d-%6s.txt', path_out_e_pdf=dir_out + 'e-%04d.pdf', path_out_f=dir_out + 'f-%04d.txt', path_out_g=dir_out + 'g.txt', path_out_h_template=dir_out + ('h-%03d-%6s' if arg.locality == 'global' else 'h-%s-%03d-%6s') + '.txt', path_out_i_template=dir_out + ('i' if arg.locality == 'global' else 'i-%s') + '.txt', path_out_i_all_1_only_pdf=dir_out + 'i1-only.pdf', path_out_i_all_1_skip_pdf=dir_out + 'i1-skip.pdf', path_out_i_all_12_pdf=dir_out + 'i12-all.pdf', path_out_i_le_50_12_pdf=dir_out + 'i12-le50.pdf', path_out_data=dir_out + '0data.pickle', path_out_data_report=dir_out + '0data-report.txt', path_out_data_subset=dir_out + '0data-subset.pickle', path_out_data_norwalk=dir_out + '0data-norwalk.pickle', path_out_log=dir_out + '0log' + ('-data' if arg.data else '') + '.txt', random_seed=random_seed, sampling_rate=0.02, selected_cities=( 'BEVERLY HILLS', 'CANYON COUNTRY', # low number of transactions; high/low price 'SHERMAN OAKS', 'POMONA', # high number of transactions; high/low price 'LOS ANGELES', ), test=arg.test, timer=Timer(), validation_months=validation_months, validation_months_long=validation_months_long, )
def create_dir(path1, path2): result_path = os.path.join(path1, path2) dirutility.assure_exists(result_path) return result_path