def main(): arg_parser = argparse.ArgumentParser() arg_parser.add_argument('read_path') arg_parser.add_argument('write_path') args = arg_parser.parse_args() with open(args.read_path, 'r') as source: text = source.read() lexer = Lexer(text) tokens = lexer.lex() parser = Parser(tokens) ast = parser.parse() symbolizer = Symbolizer(ast) symbolizer.symbolize() optimizer = Optimizer(ast) optimizer.optimize() grapher = Grapher(ast) grapher.graph() generator = Generator(ast) generator.generate(args.write_path) runner = Runner(ast) runner.run()
def main(): try: config = Config.load() init_logging(config) runner = Runner(config) runner.run() return 0 except KeyboardInterrupt: _logger.info("aborted.") return 0 except MessageException as ex: _logger.error(ex) _logger.error("aborted!") return 1 except Exception as ex: _logger.exception(ex) _logger.error("aborted!") # no runner.close() to signal abnormal termination! return 1
def test_run_proc(self, cfg_read): with mock.patch('src.runner.multiprocessing', autospec=True) as multi_process_mock: runner = Runner() procs = [runner.web_monitor_proc] runner.run_procs(procs, cfg_read) multi_process_mock.Process.assert_called_once_with( target=procs[0], args=(cfg_read, )) multi_process_mock.Process.return_value.join.assert_called_once()
def main(is_debug): """ Training Pipeline """ with open("./config.yaml") as yf: config = yaml.safe_load(yf) # run single models for config_ in config["models"]: pprint.pprint(config_) runner = Runner(settings, AttrDict(config_)) runner.run(is_debug=args.debug, multi_gpu=args.multi_gpu)
def test_web_monitor_proc(self, asyncio_mock, web_monitor_app_mock, cfg_read): runner = Runner() runner.web_monitor_proc(cfg_read) asyncio_mock.get_event_loop.assert_called_once() loop_mock = asyncio_mock.get_event_loop.return_value loop_mock.stop.assert_called_once() loop_mock.run_until_complete.assert_called_once() web_monitor_app_mock.return_value.run.assert_called_once() web_monitor_app_mock.return_value.stop.assert_called_once()
def test_stats_consumer_proc(self, asyncio_mock, consumer_app_mock, db_mock, cfg_read): runner = Runner() runner.stats_consumer_proc(cfg_read) asyncio_mock.get_event_loop.assert_called_once() loop_mock = asyncio_mock.get_event_loop.return_value loop_mock.stop.assert_called_once() loop_mock.run_until_complete.assert_called_once() db_mock.return_value.clean_up.assert_called_once() consumer_app_mock.return_value.run.assert_called_once() consumer_app_mock.return_value.stop.assert_called_once()
def test_find_existing_file_or_new_mail_path_1(self): # test that file already exists! class DummyMail: def __init__(self): self.uid = 123 self.date = datetime(2020, 9, 10, 18, 7, 6) self.subject = " Fwd: Re: Fwd: fwD: re: 123 " self.to = ( "*****@*****.**", "*****@*****.**", ) self.from_ = "*****@*****.**" self.raw_data = bytearray(b"\x00\x11\x0F") # no mail data mail = DummyMail() test_path = os.path.join(os.path.dirname(__file__), "../__test__/find") os.makedirs(test_path, exist_ok=True) mail_path = os.path.join(test_path, "orig.no-eml") with open(mail_path, "wb") as file: file.write(mail.raw_data) result = Runner.find_existing_file_or_new_mail_path(mail, mail_path) self.assertEqual(result, None) # file exits already
def main(): server_socket = get_server_socket() while True: connection, address = server_socket.accept() connection.send('accepted'.encode()) print('server start') Runner(ClientInfo(connection, address)).start()
def measurement__set_csv_writer(self, args): import os from src.bank import Bank from src.household import Household from src.firm import Firm from src.environment import Environment from src.transaction import Transaction from src.market import Market from src.runner import Runner from src.measurement import Measurement text = "This test checks measurement.set_csv_writer \n" self.print_info(text) # # INITIALIZATION # environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) # Configure logging parameters so we get output while the program runs logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S', filename=log_directory + identifier + ".log", level=logging.INFO) logging.info('START logging for test measurement__set_csv_writer in run: %s', environment_directory + identifier + ".xml") # Construct household filename environment = Environment(environment_directory, identifier) # Construct a runner runner = Runner(environment) # generate a bank bank = Bank() bank.identifier = "test_bank" environment.banks.append(bank) # generate a firm firm = Firm() firm.identifier = "test_firm" environment.firms.append(firm) # generate a household household = Household() household.identifier = "test_household" environment.households.append(household) # # TESTING # import csv file_new = open("__init__.py", "r") csv_writer = csv.writer(file_new, lineterminator='\n') measurement = Measurement(environment, runner) print("Measurement's csv_writer:") print(measurement.get_csv_writer()) measurement.set_csv_writer(csv_writer) print("Measurement's csv_writer:") print(measurement.get_csv_writer())
def initialize_shock(self, shock_config): from src.runner import Runner runner = Runner(self) from src.shock import Shock shock = Shock(self, runner) shock.read_xml_config_file(shock_config) self.shocks.append(shock)
def main(): args = parse_args() set_global_seeds(666) config = get_config(args.config) pprint(config) config['train_params'][ 'name'] = f'{config["train_params"]["name"]}/fold{args.fold}' factory = Factory(config['train_params']) data_factory = DataFactory(config['data_params'], fold=args.fold) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') callbacks = create_callbacks(config['train_params']['name'], config['dumps']) trainer = Runner(stages=config['stages'], factory=factory, callbacks=callbacks, device=device) trainer.fit(data_factory)
class TestRunner(unittest.TestCase): def setUp(self): self.logger = MockLogger() self.logger.record_results = mock.MagicMock() self.gdb_manager = MockGdbManager() self.gdb_manager.read_energy = mock.MagicMock() self.runner = Runner(self.logger, self.gdb_manager) def testLoggerExists(self): self.assertEqual(self.logger, self.runner.logger) def testGdbManagerExists(self): self.assertEqual(self.gdb_manager, self.runner.gdb_manager) def testRunBinaryCallsGdbManager(self): old_num_calls = len(self.gdb_manager.read_energy.call_args_list) self.runner.run_binary(None, None) self.assertEqual(1, len(self.gdb_manager.read_energy.call_args_list) - old_num_calls)
def init(variant, ckpt="lve", base="", prefix="", graph_file=None, device=None): # Initialize model # If graph file is specified in config, that will be used # If config specifies directory, we'll use `graph_file` for the filename # If `graph_file` is None, the (alphabetically) first file will be used run_type = "eval" exp_config = osp.join("../configs", prefix, f"{variant}.yaml") if base != "": exp_config = [osp.join("../configs", f"{base}.yaml"), exp_config] ckpt_path = f"{variant}.{ckpt}.pth" config, ckpt_path = prepare_config(exp_config, run_type, ckpt_path, [ "USE_TENSORBOARD", False, "SYSTEM.NUM_GPUS", 1, ], suffix=prefix, graph_file=graph_file) if graph_file is None and osp.isdir(config.MODEL.GRAPH_FILE): config.defrost() graphs = sorted(f for f in os.listdir(config.MODEL.GRAPH_FILE) if f.endswith('.edgelist')) graph = graphs[0] # ! Oh shoot. I messed this up. config.MODEL.GRAPH_FILE = osp.join(config.MODEL.GRAPH_FILE, graph) graph_id = graph[:5] add_suffix(config, graph_id) ckpt_dir, ckpt_fn = osp.split(ckpt_path) ckpt_path = osp.join(ckpt_dir, graph_id, ckpt_fn) # Update relative path # Incorporate graph file into this loading. Currently, it will use the default one in the config. config.freeze() runner = Runner(config) runner.logger.clear_filehandlers() runner.load_device(device=device) return runner, ckpt_path
def runner__do_run(self, args): import os from src.bank import Bank from src.household import Household from src.firm import Firm from src.environment import Environment # needed for the Directory from src.runner import Runner from src.measurement import Measurement text = "This test checks runner.do_run \n" self.print_info(text) # # INITIALIZATION # environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) # Configure logging parameters so we get output while the program runs logging.basicConfig( format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %H:%M:%S", filename=log_directory + identifier + ".log", level=logging.INFO, ) logging.info("START logging for test runner_do_run in run: %s", environment_directory + identifier + ".xml") # Construct bank filename environment = Environment(environment_directory, identifier) # get the bank_directory from the environment bank_directory = environment.bank_directory # and loop over all banks in the directory for testing purpose (just one bank) listing = os.listdir(bank_directory) bank_filename = bank_directory + listing[0] # making an instance of the Runner class runner = Runner(environment) # # TESTING # runner.do_run(environment)
def setup(params, epsilon_action_modifier, parallel_size): env_creator = EnvCreator(params.env_name, parallel_size, wrapper=params.env_wrapper, seed=12) agent = DQNAgent(env_creator, params.network_fn, epsilon_action_modifier, params.gamma, params.learning_rate, params.target_net_sync, params.use_double_q) runner = Runner(env_creator, agent) return agent, runner
def start(): try: path = "var/accounts/" + os.getenv('ACCOUNT_FILE_NAME') if os.path.isfile(path): logger.info('Found file ' + path) with open(path) as json_file: data = json.load(json_file) if not data[0]['username'] or not data[0]['password']: logger.error('Username and password are required') for account in data: runner = Runner(account, os.getenv('API_URL'), logger) try: runner.start() except: runner.driver.quit() else: if not os.getenv('ACCOUNT_FILE_NAME'): logger.error('ACCOUNT_FILE_NAME environment variable not set') else: logger.error('Could not find file: ' + path) except Exception as error: just_the_string = traceback.format_exc() logger.error(just_the_string) logger.exception(error) return 'Finished'
def initialize_shock(self, shock_config): from src.runner import Runner runner = Runner(self) from src.shock import Shock shock = Shock(self, runner) shock.read_xml_config_file(shock_config) self.shocks.append(shock) shock.measure_intitial_shock(self) for k, v in shock.legend.iteritems(): if shock.legend[k] != 0: self.shock_measure = (k, v) # df_shock = pd.DataFrame[] # you can use this code below to see if the function of reading the shock worked for key in shock.asset_returns: if shock.asset_returns[key] != 0.0: # print "0. ***ENV.PY*** When shock is initialised: The asset class", key, "is shocked by", shock.asset_returns[key] * 100, "%" pass
ax.set_ylabel('Accuracy', size=14) ax.tick_params(labelsize=14) plt.savefig(f'../model/tuning/{NAME}-NB.png', dpi=300) if __name__ == '__main__': base_params = {'alpha': 1.0, 'fit_prior': True, 'class_prior': None} params_NB = dict(base_params) param_grid_ = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]} features = ["bow", "n-gram", "tf-idf", "n-gram-tf-idf"] results = [] NAME = ":".join(features) for name in features: x = Runner.load_x_train(name) y = Runner.load_y_train() model = ModelMultinomialNB(name, **dict(params_NB)) search = GridSearchCV(model, cv=6, param_grid=param_grid_, return_train_score=True, verbose=10, refit=True) search.fit(x, y) results.append((search, name)) logger.info( f'{name} - bestscore : {search.best_score_} - result :{search.cv_results_["mean_test_score"]}' ) res = pd.DataFrame.from_dict(
params = {'kernel': 'linear', 'gamma': 0.001} params_SVC = dict(params) param_grid_ = [{ 'n_components': [10, 30, 50, 100], 'n_iter': [8, 16], 'C': [1, 10, 100, 1000] }, { 'apply_svd': [False], 'C': [1, 10, 100, 1000] }] feature = [["mfcc", "delta", "power"]] results = [] x = Runner.load_x_train(feature) y = Runner.load_y_train() model = ModelSVC("SVC", **params_SVC) search = GridSearchCV(model, cv=5, param_grid=param_grid_, return_train_score=True) search.fit(x, y) results.append((search, feature)) logger.info( f'{feature} - bestscore : {search.best_score_} - result :{search.best_params_}' ) for search, name in results: logger.info(f'{name} - bestscore : {search.best_score_}')
# load neighbourhood data with open('parameters/ineffective_lock_down/neighbourhood_data.json') as json_file: neighbourhood_data = json.load(json_file) # load age data age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0) age_distribution_per_ward = dict(age_distribution.transpose()) # Monte Carlo simulation for seed in range(parameters['monte_carlo_runs']): # make new folder for seed, if it does not exist if not os.path.exists('measurement/inef_lockdown/seed{}'.format(seed)): os.makedirs('measurement/inef_lockdown/seed{}'.format(seed)) # initialization environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward) # running the simulation runner = Runner() runner.ineffective_lock_down(environment, seed) # save network if not parameters["high_performance"]: for idx, network in enumerate(environment.infection_states): for i, node in enumerate(network.nodes): network.nodes[i]['agent'] = network.nodes[i]['agent'].status idx_string = '{0:04}'.format(idx) nx.write_graphml_lxml(network, "measurement/inef_lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
environment_directory = str(args[1]) identifier = str(args[2]) log_directory = str(args[3]) measurement_directory = str(args[4]) # Configure logging parameters so we get output while the program runs logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S', filename=log_directory + identifier + ".log", level=logging.INFO) logging.info('START logging for run: %s', environment_directory + identifier + ".xml") environment = Environment() environment.initialize(environment_directory, identifier) runner = Runner() measurement = Measurement() # # UPDATE STEP # for i in range(environment.parameters.numSimulations): logging.info(' STARTED with run %s', str(i)) environment.initialize(environment_directory, identifier) # check that environment file has been read correctly #environment.write_environment_file(identifier) runner.initialize(environment) measurement.initialize() # clear the previous measurement # do the run runner.do_run(measurement, "info")
# ------------------------------------------------------------------------- # # MAIN # # ------------------------------------------------------------------------- if __name__ == '__main__': from src.environment import Environment from src.runner import Runner args = ["configs/environment/", "environment_config", "log/"] # # INITIALIZATION # environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) environment = Environment(environment_directory, identifier) runner = Runner(environment) # # UPDATE STEP # # for i in range(int(environment.static_parameters['num_simulations'])): # environment.initialize(environment_directory, identifier) # runner.initialize(environment) # # do the run # runner.do_run(environment)
def setUp(self): self.logger = MockLogger() self.logger.record_results = mock.MagicMock() self.gdb_manager = MockGdbManager() self.gdb_manager.read_energy = mock.MagicMock() self.runner = Runner(self.logger, self.gdb_manager)
import os, sys sys.path.append(os.path.abspath('..')) from src.runner import Runner ''' Run this script to train and pickle models for app use. Pickeled models get saved to the models directory. Script also produces various visualizations of the data that can be found in the media directory. ''' if __name__ == '__main__': runner = Runner() '''TF-IDF''' runner.run_naive_bayes_sentiment() runner.run_naive_bayes_topic() '''Doc2Vec''' runner.run_doc2vec_logreg() runner.run_doc2vec_naivebayes() '''EDA Plots of Data''' # runner.make_plots()
'input_dropout': 0.05, 'optimizer': { 'lr': 0.0015, 'type': 'adam' } } #tf-idf = {'batch_norm': 'no', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.0, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0018, 'type': 'adam'}} #word2vec_mean = {'batch_norm': 'before_act', 'batch_size': 256.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00037, 'type': 'adam'}} #word2vec_max = {'batch_norm': 'no', 'batch_size': 32.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 3.0, 'hidden_units': 160.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00016, 'type': 'adam'}} #word2vec_concat = {'batch_norm': 'before_act', 'batch_size': 32.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 2.0, 'hidden_units': 96.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00075, 'type': 'sgd'}} #word2vec_hier = {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0024, 'type': 'sgd'}} #fasttext_mean = {'batch_norm': 'before_act', 'batch_size': 224.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 192.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0032, 'type': 'sgd'}} #fasttex_max = {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00016, 'type': 'adam'}} #fasttext_concat = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.6, 'hidden_layers': 2.0, 'hidden_units': 224.0, 'input_dropout': 0.15, 'optimizer': {'lr': 0.00048, 'type': 'adam'}} #fasttext_hier = {'batch_norm': 'no', 'batch_size': 64.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.3, 'hidden_layers': 2.0, 'hidden_units': 128.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.00025, 'type': 'adam'}} #doc2vec-dbow = {'batch_norm': 'no', 'batch_size': 96.0, 'hidden_activation': 'prelu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 160.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0017, 'type': 'sgd'}} #doc2vec-dmpv = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 4.0, 'hidden_units': 224.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0040, 'type': 'sgd'}}, #doc2vec-concat = {'batch_norm': 'no', 'batch_size': 160.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.05, 'optimizer': {'lr': 0.0025, 'type': 'sgd'}} #sdv = {'batch_norm': 'before_act', 'batch_size': 192.0, 'hidden_activation': 'relu', 'hidden_dropout': 0.25, 'hidden_layers': 3.0, 'hidden_units': 256.0, 'input_dropout': 0.2, 'optimizer': {'lr': 0.0029, 'type': 'sgd'}} params.update(bow) params_MLP = dict(params) # MLPで予測 feature = "bow" runner = Runner(run_name='MLP1', model_cls=ModelMLP, features=feature, params=params_MLP) runner.run_train_cv()
import sys, os sys.path.append('../') import numpy as np import pandas as pd from src.runner import Runner from src.model_NB import ModelMultinomialNB if __name__ == '__main__': params = { 'alpha' : 1.0, 'fit_prior' : True, 'class_prior' : None } #### Best Parameters bow = { 'alpha' : 1.0 } #tf-tdf = { 'alpha' : 1.0 } #n-gram = { 'alpha' : 1.0 } #ngram-tf-idf = { 'alpha' : 0.1 } params.update(bow) params_NB = dict(params) # Naive Beys での分析 feature = "bow" runner = Runner(run_name='NB1', model_cls=ModelMultinomialNB, features=feature, params=params_NB) runner.run_train_cv()
def runner__init__(self, args): import os from src.bank import Bank from src.household import Household from src.firm import Firm from src.environment import Environment # needed for the Directory from src.runner import Runner text = "This test checks runner.__init__ \n" self.print_info(text) # # INITIALIZATION # environment_directory = str(args[0]) identifier = str(args[1]) log_directory = str(args[2]) # Configure logging parameters so we get output while the program runs logging.basicConfig( format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %H:%M:%S", filename=log_directory + identifier + ".log", level=logging.INFO, ) logging.info("START logging for test runner__init__ in run: %s", environment_directory + identifier + ".xml") # Construct bank filename environment = Environment(environment_directory, identifier) # get the bank_directory from the environment bank_directory = environment.bank_directory # and loop over all banks in the directory for testing purpose (just one bank) listing = os.listdir(bank_directory) bank_filename = bank_directory + listing[0] # generate a household household = Household() household.identifier = "test_household" environment.households.append(household) # generate a firm firm = Firm() firm.identifier = "test_firm" environment.firms.append(firm) # generate the bank bank = Bank() environment.banks.append(bank) helper = Helper() helper.initialize_standard_bank(bank, environment) # making an instance of the Runner class runner = Runner(environment) # # TESTING # text = "Identifier: " text = text + runner.get_identifier() print(text) text = "Number of sweeps:" text = text + str(runner.num_sweeps) print(text) text = "Updater" text = text + str(runner.updater) print(text)
# # INITIALIZATION # environment_directory = str(args[1]) identifier = str(args[2]) log_directory = str(args[3]) # Configure logging parameters so we get output while the program runs logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S', filename=log_directory + identifier + ".log", level=logging.INFO) logging.info('START logging for run: %s', environment_directory + identifier + ".xml") environment = Environment(environment_directory, identifier) runner = Runner(environment) # # UPDATE STEP # for i in range(int(environment.num_simulations)): logging.info(' STARTED with run %s', str(i)) environment.initialize(environment_directory, identifier) runner.initialize(environment) # do the run runner.do_run(environment) logging.info(' DONE') # # MEASUREMENT AND LOGGING #
identifier = str(args[2]) log_directory = str(args[3]) measurement_directory = str(args[4]) # Configure logging parameters so we get output while the program runs logging.basicConfig( format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %H:%M:%S", filename=log_directory + identifier + ".log", level=logging.INFO, ) logging.info("START logging for run: %s", environment_directory + identifier + ".xml") environment = Environment(environment_directory, identifier) # environment.initialize(environment_directory, identifier) runner = Runner() measurement = Measurement() # # UPDATE STEP # for i in range(environment.static_parameters["numSimulations"]): logging.info(" STARTED with run %s", str(i)) environment.initialize(environment_directory, identifier) # check that environment file has been read correctly # environment.write_environment_file(identifier) runner.initialize(environment) measurement.initialize() # clear the previous measurement # do the run runner.do_run(measurement, "info")
'lr': hp.loguniform('sgd_lr', np.log(0.00001), np.log(0.01)) }]), 'batch_size': hp.quniform('batch_size', 32, 128, 32) } features = ["tf-idf", "n-gram", "n-gram-tf-idf"] NAME = "-".join(features) result = {} # 1つの特徴量について探索するパラメータの組み合わせ数 max_evals = 100 # リストfeaturesの各特徴量に対して, 最良なパラメータを探索する for i, name in enumerate(features): train_x = Runner.load_x_train(name) train_y = Runner.load_y_train() skf = StratifiedKFold(n_splits=6, shuffle=True, random_state=71) tr_idx, va_idx = list(skf.split(train_x, train_y))[0] tr_x, va_x = train_x[tr_idx], train_x[va_idx] tr_y, va_y = train_y[tr_idx], train_y[va_idx] # hyperoptによるパラメータ探索の実行 trials = Trials() history = [] fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=max_evals)
def main(): # ========================================= # === Settings # ========================================= # Get logger logger = get_logger(__name__) logger.info('Settings') # Get argument parser = argparse.ArgumentParser() parser.add_argument( '--config', default='model_lgb_hakubishin_20200317/configs/model_0.json') parser.add_argument("--debug", action="store_true") args = parser.parse_args() logger.info(f'config: {args.config}') logger.info(f'debug: {args.debug}') # Get config config = json.load(open(args.config)) config.update({ 'args': { 'config': args.config, 'debug': args.debug } }) config["model"]["model_params"]["nthread"] = cpu_count() # Create a directory for model output model_no = pathlib.Path(args.config).stem model_output_dir = ( pathlib.Path(config['model_dir_name']) / pathlib.Path(config['dataset']['output_directory']) / model_no ) if not model_output_dir.exists(): model_output_dir.mkdir() logger.info(f'model_output_dir: {str(model_output_dir)}') logger.debug(f'model_output_dir exists: {model_output_dir.exists()}') config.update({ 'model_output_dir': str(model_output_dir) }) # ========================================= # === Loading features # ========================================= logger.info('Loading features') logger.info(f'targets: {config["target"]}') logger.info(f'features: {config["features"]}') # features x_train = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["features"]) # targets y_train_set = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["target"]) # folds folds_train = FeatureLoader( data_type="training", debugging=args.debug ).load_features(config["folds"]) logger.debug(f'y_train_set: {y_train_set.shape}') logger.debug(f'x_train: {x_train.shape}') # ========================================= # === Train model and predict # ========================================= logger.info('Train model and predict') # Get target values y_train = y_train_set["Target_answered_correctly"].values # Get folds trn_idx = folds_train.query("Fold_val != 1").index val_idx = folds_train.query("Fold_val == 1").index folds_ids = [(trn_idx, val_idx)] logger.debug(f"n_trn={len(trn_idx)}, n_val={len(val_idx)}") logger.debug(f"trn_pos={y_train[trn_idx].sum()}, val_pos={y_train[val_idx].sum()}") # Train and predict model_cls = model_map[config['model']['name']] model_params = config['model'] runner = Runner( model_cls, model_params, model_output_dir, f'{model_cls.__name__}', n_fold=1, ) oof_preds, evals_result, importances = runner.train_cv( x_train, y_train, folds_ids) config.update(evals_result) # Save importances importances.mean(axis=1).reset_index().rename( columns={"index": "feature", 0: "value"} ).sort_values("value", ascending=False).to_csv( model_output_dir / "importances.csv", index=False ) # Save oof-pred file oof_preds_file_name = f"oof_pred" np.save(model_output_dir / oof_preds_file_name, oof_preds) logger.info(f'Save oof-pred file: {model_output_dir/ oof_preds_file_name}') # Save files (override) logger.info('Save files') save_path = model_output_dir / 'output.json' json_dump(config, save_path) logger.info(f'Save model log: {save_path}') # ========================================= # === Upload to GCS # ========================================= if not args.debug: logger.info('Upload to GCS') bucket_dir_name = config["model_dir_name"] + "/" + model_no logger.info(f'bucket_dir_name: {bucket_dir_name}') files = list(model_output_dir.iterdir()) upload_to_gcs(bucket_dir_name, files)
import sys, os sys.path.append('../') import numpy as np import pandas as pd from src.runner import Runner from src.model_GaussNB import ModelGaussNB if __name__ == '__main__': params = {'priors': None, 'var_smoothing': 1e-09} params_NB = dict(params) # 特徴量を指定して実行 feature = "bow" runner = Runner(run_name='GNB1', model_cls=ModelGaussNB, features=feature, params=params_NB) # 1回だけ実行 # runner.train_fold(0) # クロスバリデーションで実行 runner.run_train_cv()
def main(): # ========================================= # === Settings # ========================================= # Get logger logger = get_logger(__name__) logger.info('Settings') # Get argument parser = argparse.ArgumentParser() parser.add_argument('--config', default='./configs/model_0.json') parser.add_argument("--debug", action="store_true") args = parser.parse_args() logger.info(f'config: {args.config}') logger.info(f'debug: {args.debug}') # Get config config = json.load(open(args.config)) config.update({'args': {'config': args.config, 'debug': args.debug}}) if config["model"]["name"] == "lightgbm": config["model"]["model_params"]["nthread"] = cpu_count() # Create a directory for model output model_no = pathlib.Path(args.config).stem model_output_dir = (pathlib.Path(config['dataset']['output_directory']) / model_no) if not model_output_dir.exists(): model_output_dir.mkdir() logger.info(f'model_output_dir: {str(model_output_dir)}') logger.debug(f'model_output_dir exists: {model_output_dir.exists()}') config.update({'model_output_dir': str(model_output_dir)}) # ========================================= # === Loading data # ========================================= logger.info('Loading data') # Get train and test input_dir = pathlib.Path(config['dataset']['input_directory']) train = pd.read_csv(input_dir / 'train.csv') test = pd.read_csv(input_dir / 'test.csv') # Get target values target_column = config['data_type']['target'] y_train = train[target_column].values # ========================================= # === Loading features # ========================================= logger.info('Loading features') # Get features x_train, x_test = load_features(config) feature_name = x_test.columns logger.debug(f'number of features: {len(feature_name)}') # ========================================= # === Adversarial Validation # ========================================= logger.info("adversarial validation") train_adv = x_train test_adv = x_test train_adv['target'] = 0 test_adv['target'] = 1 train_test_adv = pd.concat([train_adv, test_adv], axis=0, sort=False).reset_index(drop=True) target = train_test_adv['target'].values train_set, val_set = train_test_split(train_test_adv, test_size=0.33, random_state=71, shuffle=True) x_train_adv = train_set[feature_name] y_train_adv = train_set['target'] x_val_adv = val_set[feature_name] y_val_adv = val_set['target'] logger.debug(f'the number of train set: {len(x_train_adv)}') logger.debug(f'the number of valid set: {len(x_val_adv)}') train_lgb = lgb.Dataset(x_train_adv, label=y_train_adv) val_lgb = lgb.Dataset(x_val_adv, label=y_val_adv) lgb_model_params = config["adversarial_validation"]["lgb_model_params"] lgb_train_params = config["adversarial_validation"]["lgb_train_params"] clf = lgb.train(lgb_model_params, train_lgb, valid_sets=[train_lgb, val_lgb], valid_names=['train', 'valid'], **lgb_train_params) feature_imp = pd.DataFrame(sorted( zip(clf.feature_importance(importance_type='gain'), feature_name)), columns=['value', 'feature']) plt.figure(figsize=(20, 10)) sns.barplot(x='value', y='feature', data=feature_imp.sort_values(by='value', ascending=False).head(20)) plt.title('LightGBM Features') plt.tight_layout() plt.savefig(model_output_dir / "feature_importance_adv.png") config.update({ 'adversarial_validation_result': { 'score': clf.best_score, 'feature_importances': feature_imp.set_index("feature").sort_values( by="value", ascending=False).head(20).to_dict()["value"] } }) # ========================================= # === Train model and predict # ========================================= logger.info('Train model and predict') # Get features x_train, x_test = load_features(config) feature_name = x_test.columns logger.debug(f'number of features: {len(feature_name)}') # Get folds folds_ids = Fold( n_splits=config['cv']['n_splits'], shuffle=config['cv']['shuffle'], random_state=config['cv']['random_state']).get_stratifiedkfold( x_train, y_train) # Train and predict model_name = config['model']['name'] model_cls = model_map[model_name] params = config['model'] runner = Runner(model_cls, params, model_output_dir, f'Train_{model_cls.__name__}') oof_preds, evals_result = runner.train_cv(x_train, y_train, folds_ids) config.update(evals_result) test_preds = runner.predict_cv(x_test) # ========================================= # === Make submission file # ========================================= sub = create_submission(test, test_preds, target_column) sub.to_csv(model_output_dir / 'submission.csv', index=False, header=True) # ========================================= # === Save files # ========================================= save_path = model_output_dir / 'output.json' json_dump(config, save_path) pd.DataFrame(oof_preds, columns=["target"]).to_csv(model_output_dir / 'oof.csv', index=False, header=True)
# load neighbourhood data with open('parameters/lock_down/neighbourhood_data.json') as json_file: neighbourhood_data = json.load(json_file) # load age data age_distribution = pd.read_csv('age_dist.csv', sep=';', index_col=0) age_distribution_per_ward = dict(age_distribution.transpose()) # Monte Carlo simulation for seed in range(parameters['monte_carlo_runs']): # make new folder for seed, if it does not exist if not os.path.exists('measurement/lockdown/seed{}'.format(seed)): os.makedirs('measurement/lockdown/seed{}'.format(seed)) # initialization environment = EnvironmentNetwork(seed, parameters, neighbourhood_data, age_distribution_per_ward) # running the simulation runner = Runner() runner.lock_down(environment, seed) # save network if not parameters["high_performance"]: for idx, network in enumerate(environment.infection_states): for i, node in enumerate(network.nodes): network.nodes[i]['agent'] = network.nodes[i]['agent'].status idx_string = '{0:04}'.format(idx) nx.write_graphml_lxml(network, "measurement/lockdown/seed{}/network_time{}.graphml".format(seed, idx_string))
# Dataset1 mfccの主成分分析 import os, sys sys.path.append('../') import numpy as np import pandas as pd from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt import seaborn as sns from src.runner import Runner X = Runner.load_x_train(["mfcc"]) print(X.shape) #標準化 scaler = StandardScaler() scaler.fit(X) standard_X = scaler.transform(X) # 12次元 -> 6次元 へ落とす. dim = 6 params = { 'n_components': dim, 'random_state': 71, } # 主成分分析 clf = PCA(**params) clf.fit(standard_X) pca = clf.transform(standard_X)