def main(): ensure_config() wsserver_ip, wsserver_port = config["wsserver_addr"].split(':') queue = multiprocessing.Queue() proc = multiprocessing.Process(target=connectionManager.init, args=(queue, wsserver_ip, int(wsserver_port))) proc.start() create_logger() flask_init(queue) server_ip, server_port = config["server_addr"].split(':') start_flask(server_ip, int(server_port))
def cli(model_family_dir, model_name, data_dir, batch_size, max_seq_len, file_limit, gpu_id): model_family_dir = Path(model_family_dir) model_dir = model_family_dir / model_name device = torch.device(f'cuda:{gpu_id}' if gpu_id is not None else 'cpu') log = util.create_logger('test', os.path.join(model_dir, 'testing.log'), True) log.info(f"Loading data from {data_dir}.") fields = data.load_fields(model_family_dir) fields['text'].include_lengths = True test_data = data.load_data(data_dir, fields, 'test', max_seq_len, file_limit) vocab_size = len(fields['text'].vocab.itos) comm_vocab_size = len(fields['community'].vocab.itos) comms = fields['community'].vocab.itos pad_idx = fields['text'].vocab.stoi['<pad>'] log.info(f"Loaded {len(test_data)} test examples.") model_args = json.load(open(model_dir / 'model_args.json')) lm = model.CommunityConditionedLM.build_model(**model_args).to(device) lm.load_state_dict(torch.load(model_dir / 'model.bin')) lm.to(device) lm.eval() log.debug(str(lm)) test_iterator = tt.data.BucketIterator(test_data, device=device, batch_size=batch_size, sort_key=lambda x: -len(x.text), shuffle=True, train=False) def batchify_comm(comm, batch_size): comm_idx = fields['community'].vocab.stoi[comm] return torch.tensor(comm_idx).repeat(batch_size).to(device) with torch.no_grad(), open(model_dir / 'nll.csv', 'w') as f: meta_fields = ['community', 'example_id', 'length'] data_fields = comms if lm.use_community else ['nll'] writer = csv.DictWriter(f, fieldnames=meta_fields + data_fields) writer.writeheader() for i, batch in enumerate(test_iterator): nlls_batch = [ dict(zip(meta_fields, meta_values)) for meta_values in zip( [comms[i] for i in batch.community.tolist()], batch.example_id.tolist(), batch.text[1].tolist()) ] for comm in comms: if lm.use_community: batch_comm = batchify_comm(comm, batch.batch_size) else: batch_comm = None nlls_comm = batch_nll(lm, batch, pad_idx, comm=batch_comm) for j, nll in enumerate(nlls_comm): nlls_batch[j][comm] = nll.item() writer.writerows(nlls_batch) log.info(f"Completed {i+1}/{len(test_iterator)}")
def __init__(self, sess, dataset, conf): super(NGCF, self).__init__(dataset, conf) self.logger = create_logger(conf) self.learning_rate = conf["learning_rate"] self.learner = conf["learner"] self.batch_size = conf["batch_size"] self.emb_dim = conf["embedding_size"] self.weight_size = conf["layer_size"] self.n_layers = len(self.weight_size) self.num_epochs = conf["epochs"] self.reg = conf["reg"] self.adj_type = conf["adj_type"] self.alg_type = conf["alg_type"] self.node_dropout_flag = conf["node_dropout_flag"] self.node_dropout_ratio = conf["node_dropout_ratio"] self.mess_dropout_ratio = conf["mess_dropout_ratio"] self.data_name = conf["data.input.dataset"] self.embed_init_method = conf["embed_init_method"] self.weight_init_method = conf["weight_init_method"] self.stddev = conf["stddev"] self.verbose = conf["verbose"] self.dataset = dataset self.num_users = dataset.num_users self.num_items = dataset.num_items self.graph = dataset.train_matrix.toarray() self.norm_adj = self.get_adj_mat() # self.n_nonzero_elems = self.norm_adj.count_nonzero() self.pretrain_data = None self.sess = sess self.logger.info(conf)
def main(args): logger = create_logger('logger') # it doesn't matter which models we load here because we only do white space or rule-based tokenization anyway nlp = spacy.load("en_core_web_sm") nlp.tokenizer = Tokenizer(nlp.vocab) matcher = Matcher(nlp.vocab) triggers = read_file(args.cue_list) setup_matcher(triggers, matcher) # load data data = load_data(args.input_file) tagged_sentences = [] for seqid, _ in enumerate(data): observed_sequences = set() out, idxs = process_doc(nlp, matcher, ' '.join(data[seqid]['seq']), split_sents=False) for sid, elm in enumerate(out[0]): if ' '.join(elm) not in observed_sequences: tagged_sentences.append({ 'uid': len(tagged_sentences), 'seq': elm, 'sid': '{}_{}'.format(seqid, sid) }) observed_sequences.add(' '.join(elm)) logger.info('Writing tagged sequences to {}'.format( '.'.join(args.input_file.split('.')[:-1]) + '#cues.jsonl')) with open('.'.join(args.input_file.split('.')[:-1]) + '#cues.jsonl', 'w') as fout: for elm in tagged_sentences: fout.write(json.dumps(elm) + '\n') fout.close() # produce html with colored negation cues logger.info('Writing html for visualization to {}'.format( '.'.join(args.input_file.split('.')[:-1]) + '#cues.html')) html = [] for seq in tagged_sentences: seq = seq['seq'] labels = ['O'] * len(seq) for i, tok in enumerate(seq): if tok == '[CUE]': labels[i] = 'CUE' if i < len(labels) - 1: labels[i + 1] = 'CUE' html.append( html_heatmap( words=[elm for elm in seq if elm != '[CUE]'] + ['<br>'], labels=[ elm for i, elm in enumerate(labels) if seq[i] != '[CUE]' ] + ['O'])) with open('.'.join(args.input_file.split('.')[:-1]) + '#cues.html', 'w') as fout: for elm in html: fout.write(elm + '\n') fout.close()
def __init__(self, sess, dataset, conf): super(LightGCN, self).__init__(dataset, conf) self.logger = create_logger(conf) self.learning_rate = float(conf["learning_rate"]) self.embedding_size = int(conf["embedding_size"]) self.learner = conf["learner"] self.num_epochs = int(conf["epochs"]) self.batch_size = int(conf["batch_size"]) self.verbose = conf["verbose"] self.reg = float(conf["reg"]) self.init_method = conf["init_method"] self.stddev = conf["stddev"] self.weight_size = conf["weight_size"] self.n_layers = len(self.weight_size) self.data_name = conf["data.input.dataset"] self.dataset = dataset self.num_users = dataset.num_users self.num_items = dataset.num_items self.R = dataset.train_matrix self.graph = dataset.train_matrix.tolil() self.norm_adj = self.get_adj_mat() self.sess = sess self.logger.info(conf)
def __init__(self): self.logger = create_logger(__name__) self.actions_file = os.getenv(self.ACTIONS_FILE, self.ACTIONS_FILE_WRITE_LOCATION) self.conf_file = os.getenv(self.LEGACY_CONFIG, None) self.projectre = re.compile(r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?$') self.projectmaxlen = 63
def __init__(self): super().__init__() self.logger = util.create_logger("idea-bot") parent_conn, child_conn = Pipe() self.conn = parent_conn self.generator_process = GeneratorProcess(conn=child_conn) self.generator_process.start() self.loop.create_task(self.check_responses())
def __init__(self): self.logger = create_logger(__name__) curator_cmd = CuratorCmd() self.cmd_list = curator_cmd.build_cmd_list() self.defaults = curator_cmd.get_defaults() self.hour = self.defaults.get('runhour', 0) self.minute = self.defaults.get('runminute', 0) self.timezone = self.defaults.get('timezone', 'UTC') self.job_list = CronTab()
def main(): try: print("Starting embeddings generation") random_state = 1 np.random.seed(random_state) # DEFINE MODEL PARAMS K = 45 num_of_dims = 100 #epochs bound_on_iter = 5 omega = 0.45557 e_release = 0.0414 kg_root = '../dbpedia/pyke_data' kg_path = kg_root + '/' print("Path to KG: ", kg_path) storage_path, experiment_folder = ut.create_experiment_folder() logger = ut.create_logger(name='PYKE', p=storage_path) logger.info("Storage path: " + storage_path + "\texperiment folder: " + experiment_folder) parser = Parser(p_folder=storage_path, k=K) parser.set_logger(logger) logger.info("Setting similarity measure") parser.set_similarity_measure(PPMI) logger.info("Model init") model = PYKE(logger=logger) logger.info("Analyzer init") analyser = DataAnalyser(p_folder=storage_path, logger=logger) # For the illustration purpusoes lets only process first 5000 ntriples from each given file. # To reproduce reported results => parser.pipeline_of_preprocessing(kg_path) holder = parser.pipeline_of_preprocessing(kg_path) vocab_size = len(holder) logger.info("Vocab size: " + str(vocab_size)) embeddings = ut.randomly_initialize_embedding_space( vocab_size, num_of_dims) learned_embeddings = model.pipeline_of_learning_embeddings( e=embeddings, max_iteration=bound_on_iter, energy_release_at_epoch=e_release, holder=holder, omega=omega) logger.info("Writing to file.") learned_embeddings.to_csv(storage_path + '/PYKE_100_embd.csv') logger.info("Done!") # To use memory efficiently del holder del embeddings except Exception as e: print(e)
def run(self): self.logger = util.create_logger(__name__) self.logger.info("Starting GeneratorProcess") try: while not self.terminate: request = self.conn.recv() self.handle_request(request) except KeyboardInterrupt: return
def __init__(self, config_file): self.config_file = config_file self.allowed_units = {'days': 'days', 'weeks': 'weeks', 'months': 'months'} self.default_time_unit = 'days' self.default_count = int(os.getenv('CURATOR_DEFAULT_DAYS', 31)) self.runhour = int(os.getenv('CURATOR_RUN_HOUR', 0)) self.runminute = int(os.getenv('CURATOR_RUN_MINUTE', 0)) self.timezone = str(os.getenv('CURATOR_RUN_TIMEZONE', 'UTC')) self.logger = create_logger(__name__) self.internal_config_yaml = {}
def __init__(self): config_file = os.getenv('CURATOR_CONF_LOCATION', 'config.yaml') parser = Parser(config_file) self.conf = parser.parse() self.projectre = re.compile(r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?$') self.projectmaxlen = 63 self.allowed_operations = {'delete': 'delete'} self.allowed_params = {'raw_regex': self.RAW_REGEX} self.curator_settings = {'delete': {}} self.logger = create_logger(__name__) self.curator_log_level = os.getenv('CURATOR_LOG_LEVEL', 'INFO') # ERROR by default self.commands = []
def __init__(self): config_file = os.getenv('CURATOR_CONF_LOCATION', '/etc/curator/settings/config.yaml') parser = Parser(config_file) self.conf = parser.parse() self.projectre = re.compile(r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?$') self.projectmaxlen = 63 self.allowed_operations = {'delete': 'delete'} self.allowed_params = {'raw_regex': self.RAW_REGEX} self.curator_settings = {'delete': {}} self.logger = create_logger(__name__) self.curator_log_level = os.getenv('CURATOR_LOG_LEVEL', 'ERROR') self.commands = []
def __init__(self): self.logger = create_logger(__name__) curator_cmd = CuratorCmd() self.cmd_list = curator_cmd.build_cmd_list() self.defaults = curator_cmd.get_defaults() self.hour = self.defaults.get('runhour', 0) self.minute = self.defaults.get('runminute', 0) self.timezone = self.defaults.get('timezone', 'UTC') self.job_list = CronTab() self.ca = os.getenv('ES_CA', '/etc/curator/keys/ca') self.cert = os.getenv('ES_CLIENT_CERT', '/etc/curator/keys/cert') self.key = os.getenv('ES_CLIENT_KEY', '/etc/curator/keys/key') self.es_host = os.getenv('ES_HOST', 'logging-es') self.es_port = os.getenv('ES_PORT', '9200')
def __init__(self, config_file): self.config_file = config_file self.allowed_units = { 'days': 'days', 'weeks': 'weeks', 'months': 'months' } self.default_time_unit = 'days' self.default_count = int(os.getenv('CURATOR_DEFAULT_DAYS', 31)) self.runhour = int(os.getenv('CURATOR_RUN_HOUR', 0)) self.runminute = int(os.getenv('CURATOR_RUN_MINUTE', 0)) self.timezone = str(os.getenv('CURATOR_RUN_TIMEZONE', 'UTC')) self.logger = create_logger(__name__) self.internal_config_yaml = {}
def __init__(self): self.logger = create_logger(__name__) curator_cmd = CuratorCmd() self.cmd_list = curator_cmd.build_cmd_list() self.defaults = curator_cmd.get_defaults() self.hour = self.defaults.get('runhour', 0) self.minute = self.defaults.get('runminute', 0) self.timezone = self.defaults.get('timezone', 'UTC') self.job_list = CronTab() self.ca = os.getenv('ES_CA', '/etc/curator/keys/ca') self.cert = os.getenv('ES_CLIENT_CERT', '/etc/curator/keys/cert') self.key = os.getenv('ES_CLIENT_KEY', '/etc/curator/keys/key') self.es_host = os.getenv('ES_HOST', 'logging-es') self.es_port = os.getenv('ES_PORT', '9200') self.es_hostport = self.es_host + ':' + self.es_port
def __init__(self, sess, dataset, config): super(FastLightGCN, self).__init__(dataset, config) self.logger = create_logger(config) # argument settings self.model_type = config['recommender'] self.epoch = config["epoch"] self.adj_type = config["adj_type"] self.alg_type = config["alg_type"] self.n_users, self.n_items, self.num_ratings, self.sparsity = dataset.num_users, dataset.num_items, dataset.num_r, dataset.sparsity self.R = dataset.train_matrix self.dataset = dataset self.data_name = config["data.input.dataset"] self.n_fold = 100 self.lr = config["learning_rate"] self.emb_dim = config["embed_size"] self.weight_size = config["weight_size"] self.node_dropout_flag = config["node_dropout_flag"] self.node_dropout = config["node_dropout"] self.mess_dropout = config["mess_dropout"] self.n_layers = len(self.weight_size) self.r_alpha = config["r_alpha"] self.fast_reg = config["fast_reg"] self.logger.info( "\"num_users\": %d,\"num_items\":%d, \"num_ratings\":%d, \"sparsity\":%.4f" % (self.n_users, self.n_items, self.num_ratings, self.sparsity)) self.logger.info(config) self.sess = sess plain_adj, norm_adj, mean_adj, pre_adj = self.get_adj_mat() if config["adj_type"] == 'plain': self.norm_adj = plain_adj print('use the plain adjacency matrix') elif config["adj_type"] == 'norm': self.norm_adj = norm_adj print('use the normalized adjacency matrix') elif config["adj_type"] == 'gcmc': self.norm_adj = mean_adj print('use the gcmc adjacency matrix') elif config["adj_type"] == 'pre': self.norm_adj = pre_adj print('use the pre adjcency matrix') else: self.norm_adj = mean_adj + sp.eye(mean_adj.shape[0]) print('use the mean adjacency matrix') self.n_nonzero_elems = self.norm_adj.count_nonzero()
def main(log_dir): logger = util.create_logger(name='bc_training', log_dir='pretrained/ant_pi') device = torch.device('cuda:0') policy = PIStudent( act_dim=ACT_DIM, msg_dim=32, pos_em_dim=8, hidden_dim=32, ).to(device) batch_size = 8 data = load_data(os.path.join(log_dir, 'data.npz')) batches = sample_batch_data(data, batch_size=batch_size) criterion = nn.MSELoss() optimizer = torch.optim.Adam(policy.parameters(), lr=3e-4) max_iter = 1000000 noise_sd = 0.1 for i in range(max_iter): batch_data = torch.from_numpy(next(batches)).float().to(device) optimizer.zero_grad() # This is only to show how BC training works, it is inefficient. seq_len = batch_data.shape[1] losses = [] for traj in batch_data: pred_act = [] policy.attention_neuron.reset() # Reset AttentionNeuron's hx. for t in range(seq_len): prev_act, obs = traj[t][:ACT_DIM], traj[t][ACT_DIM:-ACT_DIM] prev_act = prev_act + torch.randn(ACT_DIM).to(device) * noise_sd pred_act.append(policy(obs, prev_act)) pred_act = torch.vstack(pred_act) act = traj[:, -ACT_DIM:] losses.append(criterion(input=pred_act, target=act)) loss = sum(losses) / batch_size loss.backward() torch.nn.utils.clip_grad_norm_(policy.parameters(), max_norm=.1) optimizer.step() logger.info('iter={}, loss={}'.format(i, loss.item())) if i % 1000 == 0: save_model(policy, i) save_model(policy, max_iter)
def __init__(self, sess, dataset, config): super(Local_end, self).__init__(dataset, config) self.logger = create_logger(config) self.logger.info(config) # argument settings self.model_type = config['recommender'] self.epoch = config["epoch"] self.adj_type = config["adj_type"] self.alg_type = config["alg_type"] self.n_users, self.n_items = dataset.num_users, dataset.num_items self.R = dataset.train_matrix self.dataset = dataset self.data_name = config["data.input.dataset"] self.lr = config["learning_rate"] self.emb_dim = config["embed_size"] self.weight_size = config["weight_size"] self.node_dropout_flag = config["node_dropout_flag"] self.node_dropout = config["node_dropout"] self.mess_dropout = config["mess_dropout"] self.n_layers = len(self.weight_size) self.r_alpha = config["r_alpha"] self.fast_reg = config["fast_reg"] self.localmodel = config['localmodel'] self.bw = config['d'] self.sess = sess self.verbose = config['verbose'] plain_adj, norm_adj, mean_adj, pre_adj, random_adj = self.get_adj_mat() if config["adj_type"] == 'plain': self.norm_adj = plain_adj print('use the plain adjacency matrix') elif config["adj_type"] == 'norm': self.norm_adj = norm_adj print('use the normalized adjacency matrix') elif config["adj_type"] == 'gcmc': self.norm_adj = mean_adj print('use the gcmc adjacency matrix') elif config["adj_type"] == 'pre': self.norm_adj = pre_adj print('use the pre adjcency matrix') else: self.pre_adj = pre_adj self.norm_adj = random_adj print('use the random adjacency matrix')
def main(config): logger = util.create_logger(name='test_solution', log_dir=config.log_dir) task = util.create_task(logger=logger) task.seed(config.seed) solution = util.create_solution(device='cpu:0') model_file = os.path.join(config.log_dir, config.model_filename) solution.load(model_file) rewards = [] time_costs = [] for ep in range(config.n_episodes): start_time = time.perf_counter() reward = task.rollout(solution=solution, evaluation=True) time_cost = time.perf_counter() - start_time rewards.append(reward) time_costs.append(time_cost) logger.info('Episode: {0}, reward: {1:.2f}'.format(ep + 1, reward)) logger.info('Avg reward: {0:.2f}, sd: {1:.2f}'.format( np.mean(rewards), np.std(rewards))) logger.info('Time per rollout: {}s'.format(np.mean(time_costs)))
def main(log_dir): logger = util.create_logger(name='data_collection') solution = util.create_solution(device='cpu:0') model_file = os.path.join(log_dir, 'model.npz') solution.load(model_file) trajectories = [] env = gym.make('AntBulletEnv-v0') # Collect trajectories from rollouts. max_ep_cnt = 1000 traj_len = 500 ep_saved = 0 while ep_saved < max_ep_cnt: ep_reward = 0 ep_steps = 0 obs = env.reset() prev_act = np.zeros(8) ep_traj = [] done = False while not done and ep_steps < traj_len: act = solution.get_action(obs) ep_traj.append(np.concatenate([prev_act, obs, act], axis=0)) obs, reward, done, info = env.step(act) ep_reward += reward ep_steps += 1 logger.info('Episode:{0}, steps:{1}, reward:{2:.2f}'.format( ep_saved + 1, ep_steps, ep_reward)) if ep_steps >= traj_len: trajectories.append(np.vstack(ep_traj)) ep_saved += 1 else: logger.info('Trajectory too short, discard.') trajectories = np.stack(trajectories) logger.info('trajectories.shape={}'.format(trajectories.shape)) np.savez(os.path.join(log_dir, 'data.npz'), data=trajectories)
# create the save directory (for trianed model paremeters, logs, arguments) if not os.path.exists('models'): os.mkdir('models') if os.path.exists(save_dir): go_ahead = input("Overwriting files in {}. Continue? (y/n): ".format(save_dir)) if go_ahead == 'y': util.rm_dir(save_dir) else: exit() os.mkdir(save_dir) # save the args so we can recover hyperparameters, etc. with open(os.path.join(save_dir, 'args.json'), 'w') as f: json.dump(args.__dict__, f) log = util.create_logger(args.verbose, os.path.join(save_dir, 'train.log')) eval_model.log = log # set the eval_model logger to go to 'train.log' device = torch.device('cuda:{}'.format(args.gpu_id) if args.cuda and torch.cuda.is_available() else 'cpu') log.info("Training on {}.".format(device)) word_vocab, word2id = data.load_vocab(args.vocab_file) tag_vocab, tag2id = data.load_vocab(args.tag_vocab_file) n_tags = len(tag_vocab) # select an utt_encoder and compatible utt tokenization log.info("Utt encoder: {}".format(args.utt_encoder)) log.info("DAR model uses LSTM: {}".format(args.lstm)) if args.utt_encoder == 'wordvec-avg': if args.use_glove: weights = torch.FloatTensor(data.load_glove(args.utt_dims, word_vocab))
import util from bot_issue_finder import find_issues from repo_finder import find_repos from repo_cloner import clone_repos import pre_bot_issue_finder import repo_analyser_v2 if __name__ == "__main__": settings = util.load_settings('settings.json') util.verify_loglevels(settings.get('loglevels')) loglevels = settings.get('loglevels') logoutputs = settings.get('logoutputs') # General logger logger = util.create_logger('bot_issue_finder', loglevels.get('general'), logoutputs.get('general')) util.g_logger = logger logger.info("======SETTINGS======") util.verify_settings(settings) if settings.get('log-pygithub-requests'): util.load_gh_logger(settings.get('shorten-pygithub-requests')) # Load GitHub Login information login_settings = util.load_settings('login.json') token_or_username = login_settings.get('login_or_token') if token_or_username and login_settings.get('password'): # Someone logged in with their username/password combination logger.info(f"Logged in as {token_or_username}")
from util import create_logger logger = create_logger('get_words', 'log.csv') for word in ['first', 'second', 'third']: print(word) logger.debug('debug message') logger.warn('warn message') logger.critical('critical message')
import sys from util import create_logger logger = create_logger('count_words', 'ERROR', 'log.csv') count = 0 for line in sys.stdin: count += 1 message = 'counter {} for {}'.format(count, line.strip()) logger.debug(message) logger.warn(message) logger.error(message) print(count)
import math import os import statistics from typing import Tuple, List, Union, Optional import numpy from openpyxl import load_workbook from openpyxl.cell import ReadOnlyCell from openpyxl.workbook.defined_name import DefinedName from util import create_logger logger = create_logger(__name__) class Data: def __init__(self, filename: str = None, locations: List[str] = None, vehicle_types: List[str] = None, distance_cost: List[float] = None, time_cost: List[float] = None, pallet_capacity: List[int] = None, available_vehicles: List[int] = None, hired_cost_multiplier: List[float] = None, demand: List[int] = None, window_start: List[float] = None, window_end: List[float] = None, average_unload_time: List[float] = None, distances: List[List[float]] = None, times: List[List[float]] = None): """If filename provided, will read in run_data from .xlsx using OpenPyXL. If no filename provided, will check named parameters for values.""" if filename: try: self.workbook = load_workbook(filename=filename, read_only=True, data_only=True) # one_dimension_sheet: Worksheet = self.workbook["1D"]
def run_experiment(args): """ The entry point for the dynamics extraction algorithm. """ from util import create_logger locale.setlocale(locale.LC_ALL, '') policy = torch.load(args.policy) env_fn = env_factory(True) layers = [int(x) for x in args.layers.split(',')] env = env_fn() policy.init_hidden_state() policy(torch.tensor(env.reset()).float()) latent_dim = get_hiddens(policy).shape[0] models = [] opts = [] for fn in [env.get_damping, env.get_mass, env.get_ipos]: output_dim = fn().shape[0] model = Model(latent_dim, output_dim, layers=layers) models += [model] opts += [optim.Adam(model.parameters(), lr=args.lr, eps=1e-5)] model.policy_path = args.policy logger = create_logger(args) best_loss = None actor_dir = os.path.split(args.policy)[0] create_new = True if os.path.exists(os.path.join(logger.dir, 'test_latents.pt')): x = torch.load(os.path.join(logger.dir, 'train_latents.pt')) test_x = torch.load(os.path.join(logger.dir, 'test_latents.pt')) damps = torch.load(os.path.join(logger.dir, 'train_damps.pt')) test_damps = torch.load(os.path.join(logger.dir, 'test_damps.pt')) masses = torch.load(os.path.join(logger.dir, 'train_masses.pt')) test_masses = torch.load(os.path.join(logger.dir, 'test_masses.pt')) ipos = torch.load(os.path.join(logger.dir, 'train_ipos.pt')) test_ipos = torch.load(os.path.join(logger.dir, 'test_ipos.pt')) if args.points > len(x) + len(test_x): create_new = True else: create_new = False if create_new: if not ray.is_initialized(): ray.init(num_cpus=args.workers) print("Collecting {:4d} timesteps of data.".format(args.points)) points_per_worker = max(args.points // args.workers, 1) start = time.time() damps, masses, ipos, x = concat( ray.get([ collect_data.remote(policy, points=points_per_worker) for _ in range(args.workers) ])) split = int(0.8 * len(x)) test_x = x[split:] x = x[:split] test_damps = damps[split:] damps = damps[:split] test_masses = masses[split:] masses = masses[:split] test_ipos = ipos[split:] ipos = ipos[:split] print( "{:3.2f} to collect {} timesteps. Training set is {}, test set is {}" .format(time.time() - start, len(x) + len(test_x), len(x), len(test_x))) torch.save(x, os.path.join(logger.dir, 'train_latents.pt')) torch.save(test_x, os.path.join(logger.dir, 'test_latents.pt')) torch.save(damps, os.path.join(logger.dir, 'train_damps.pt')) torch.save(test_damps, os.path.join(logger.dir, 'test_damps.pt')) torch.save(masses, os.path.join(logger.dir, 'train_masses.pt')) torch.save(test_masses, os.path.join(logger.dir, 'test_masses.pt')) torch.save(ipos, os.path.join(logger.dir, 'train_ipos.pt')) torch.save(test_ipos, os.path.join(logger.dir, 'test_ipos.pt')) for epoch in range(args.epochs): random_indices = SubsetRandomSampler(range(len(x) - 1)) sampler = BatchSampler(random_indices, args.batch_size, drop_last=False) for j, batch_idx in enumerate(sampler): batch_x = x[batch_idx] #.float() batch = [damps[batch_idx], masses[batch_idx], ipos[batch_idx]] losses = [] for model, batch_y, opt in zip(models, batch, opts): loss = 0.5 * (batch_y - model(batch_x)).pow(2).mean() opt.zero_grad() loss.backward() opt.step() losses.append(loss.item()) print("Epoch {:3d} batch {:4d}/{:4d} ".format( epoch, j, len(sampler) - 1), end='\r') train_y = [damps, masses, ipos] test_y = [test_damps, test_masses, test_ipos] order = ['damping', 'mass', 'com'] with torch.no_grad(): print("\nEpoch {:3d} losses:".format(epoch)) for model, y_tr, y_te, name in zip(models, train_y, test_y, order): loss_total = 0.5 * (y_tr - model(x)).pow(2).mean().item() preds = model(test_x) test_loss = 0.5 * (y_te - preds).pow(2).mean().item() pce = torch.mean(torch.abs((y_te - preds) / (y_te + 1e-5))) err = torch.mean(torch.abs((y_te - preds))) logger.add_scalar(logger.arg_hash + '/' + name + '_loss', test_loss, epoch) logger.add_scalar(logger.arg_hash + '/' + name + '_percenterr', pce, epoch) logger.add_scalar(logger.arg_hash + '/' + name + '_abserr', err, epoch) model.dyn_parameter = name torch.save(model, os.path.join(logger.dir, name + '_extractor.pt')) print( "\t{:16s}: train loss {:7.6f} test loss {:7.6f}, err {:5.4f}, percent err {:3.2f}" .format(name, loss_total, test_loss, err, pce))
__game_state__ = "playing" __fov_recompute__ = True __lookmode__ = False __msgs__ = [] __msg_history__ = [] MSG_WIDTH = 50 MSG_COUNT = 10 __hp_warning__ = 0.5 __trap_low_hp_warning__ = 0.3 __show_chapter__ = False __chapter_text__ = 'Chapter 1. Departure' log = util.create_logger() #DEBUG - debug only (wiz mode), NONE -general info, WARN - warning (hp/mp), # critical - critical hits, stepping on traps, critical hp level, #info - general info on skills level up etc message_levels = { 'DEBUG' : 0, 'NONE' : 1, 'WARN' : 2, 'CRITICAL' : 3, 'INFO' : 4, 'DAMAGE': 5} prev_message = None prev_message_count = 1 def message(text, level = 1): log.info(text) global prev_message, prev_message_count if isinstance(level, str): level = message_levels[level] wraped_msg = textwrap.wrap(text, MSG_WIDTH) if prev_message == hash(text): prev_message_count += 1 for line in wraped_msg:
from collections import Iterable import os from random import randrange, random, choice, shuffle from features import features import thirdparty.libtcod.libtcodpy as libtcod import util import des from items import Item, items from types import FunctionType from critters import mobs from maputils import replace_feature, replace_feature_atxy, find_feature, square_search_nearest logger = util.create_logger('DG') ft = util.NamedMap(features) default_map_chars = { 'X': ft.fixed_wall, '#': ft.rock_wall, ' ': ft.none, '.': ft.floor, ',': ft.grass, '+': ft.door, '0': ft.window, '{' : ft.fountain, '<' : ft.stairs_up, '>' : ft.stairs_down, 'h' : ft.chair, 'T' : ft.table, '8' : ft.bed, }
dev_file = "data/dev.csv" test_file = "data/test.csv" mode = sys.argv[1] if mode != "train" and mode != "test": raise ValueError # Running with open(vocab_file, 'r') as f: vocab = json.load(f) glove_emb = torch.load(glove_emb_file) model = HARM_Model(len(vocab), glove_emb).to(device) if mode == "train": best_mrr = 0. logger = create_logger("log/", "train.log") optimizer = torch.optim.Adadelta(model.parameters(), lr=lr) clock = LossClock(["loss"], interval=20) ds_train = TrainLoader(train_file, vocab, device) ds_dev = TestLoader(dev_file, vocab, device) for epoch in range(total_epochs): # train logger.info("=" * 30 + f"Train epoch {epoch}" + "=" * 30) for query, docs in ds_train(): r = model(query, docs) margin_loss = max_margin_loss(r[:1].expand(r[1:].size(0)), r[1:]) # update optimizer.zero_grad()
import unittest import random import itertools import logging import util import model import train import torch.optim as optim import torch.nn as nn log = util.create_logger(logging.DEBUG) train.log = log # synthetic training data random.seed(777) input_vocab = list(range(50)) # 50 token vocab label_vocab = list(range(7)) # 7 DA tags data = [ [ random.choices(input_vocab, k=random.randint(3, 10)) # utts of 3-10 tokens for r in range(random.randint(2, 15)) ] # 2-15 utts per dialogue for i in range(5) ] # 5 dialogues labels = [random.choices(label_vocab, k=len(convo)) for convo in data] utt_dims = 250 n_hidden = 50
from collections import Iterable import os from random import randrange, random, choice, shuffle from features import features import thirdparty.libtcod.libtcodpy as libtcod import util import des from items import Item, items from types import FunctionType from critters import mobs from maputils import replace_feature, replace_feature_atxy, find_feature, square_search_nearest logger = util.create_logger('DG') ft = util.NamedMap(features) default_map_chars = { 'X': ft.fixed_wall, '#': ft.rock_wall, ' ': ft.none, '.': ft.floor, ',': ft.grass, '+': ft.door, '0': ft.window, '{': ft.fountain, '<': ft.stairs_up, '>': ft.stairs_down, 'h': ft.chair, 'T': ft.table, '8': ft.bed, }
def main(args): config = configparser.ConfigParser( interpolation=configparser.ExtendedInterpolation()) config.read(args.config) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # check if output dir exists. if so, assign a new one if os.path.isdir(args.outdir): # create new output dir outdir = os.path.join(args.outdir, str(uuid.uuid4())) else: outdir = args.outdir # make the output dir os.makedirs(outdir) if args.save_best: os.makedirs(os.path.join(outdir, 'best_model')) # create a logger logger = create_logger(__name__, to_disk=True, log_file='{}/{}'.format(outdir, args.logfile)) tasks = [] for task_name in args.tasks.split(','): task = load_task( os.path.join(args.task_spec, '{}.yml'.format(task_name))) tasks.append(task) device = "cuda" if torch.cuda.is_available() else "cpu" device = torch.device(device) if not args.load_checkpoint: tokenizer = setup_customized_tokenizer(model=args.tokenizer, do_lower_case=False, config=config, tokenizer_class=BertTokenizer) else: tokenizer = BertTokenizer.from_pretrained(args.checkpoint) train_datasets = {} dev_dataloaders = {} test_dataloaders = {} for task_id, task in enumerate(tasks): task.set_task_id(task_id) logging.info('Task {}: {} on {}'.format(task_id, task.task_type, task.dataset)) if 'train' in task.splits: train_datasets[task_id] = get_data(task=task, split='train', config=config, tokenizer=tokenizer) train_datasets[task_id].set_task_id(task_id) task.set_label_map(train_datasets[task_id].label_map) if 'dev' in task.splits: dev_data = get_data(task=task, split='dev', config=config, tokenizer=tokenizer) dev_data.set_task_id(task_id) dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=8, collate_fn=dev_data.collate_fn) dev_dataloaders[task_id] = dev_dataloader if 'test' in task.splits: test_data = get_data(task=task, split='test', config=config, tokenizer=tokenizer) test_data.set_task_id(task_id) if task.dataset == 'iulaconv': import json #with open('iulaconv_test.json.analsyis', 'w') as f: # for elm in test_data: # f.write(json.dumps(elm) + '\n') #f.close() test_dataloader = DataLoader(test_data, shuffle=False, batch_size=8, collate_fn=test_data.collate_fn) test_dataloaders[task_id] = test_dataloader padding_label = train_datasets[0].padding_label sorted_train_datasets = [ds for _, ds in sorted(train_datasets.items())] mtl_dataset = MultiTaskDataset(sorted_train_datasets) multi_task_batch_sampler = MultiTaskBatchSampler( sorted_train_datasets, batch_size=args.bs, mix_opt=args.mix_opt, extra_task_ratio=args.extra_task_ratio, annealed_sampling=args.annealed_sampling, max_epochs=args.epochs) mtl_train_dataloader = DataLoader(mtl_dataset, batch_sampler=multi_task_batch_sampler, collate_fn=mtl_dataset.collate_fn, pin_memory=False) model = MTLModel(bert_encoder=args.bert_model, device=device, tasks=tasks, padding_label_idx=padding_label, load_checkpoint=args.load_checkpoint, checkpoint=os.path.join(args.checkpoint, 'model.pt'), tokenizer=tokenizer) # get optimizer # TODO: in case of loading from checkpoint, initialize optimizer using saved optimizer state dict optimizer = get_optimizer(optimizer_name='adamw', model=model, lr=args.lr, eps=args.eps, decay=args.decay) # get lr schedule total_steps = (len(mtl_dataset) / args.grad_accumulation_steps) * args.epochs warmup_steps = args.warmup_frac * total_steps logger.info( 'Bs_per_device={}, gradient_accumulation_steps={} --> effective bs= {}' .format(args.bs, args.grad_accumulation_steps, args.bs * args.grad_accumulation_steps)) logger.info('Total steps: {}'.format(total_steps)) logger.info('Scheduler: {} with {} warmup steps'.format( 'warmuplinear', warmup_steps)) scheduler = get_scheduler(optimizer, scheduler='warmuplinear', warmup_steps=warmup_steps, t_total=total_steps) model.fit(tasks, optimizer, scheduler, gradient_accumulation_steps=args.grad_accumulation_steps, train_dataloader=mtl_train_dataloader, dev_dataloaders=dev_dataloaders, test_dataloaders=test_dataloaders, epochs=args.epochs, evaluation_step=args.evaluation_steps, save_best=args.save_best, outdir=outdir, predict=args.predict)
import torch from model import fn_cls import util from transformers import BertTokenizer, BertModel import torch.nn as nn import numpy as np import time import argparse parser = argparse.ArgumentParser("Text-cls training task.") parser.add_argument("--epoch", type=int, default=10) logger = util.create_logger("Bert Training", log_file='train.log') args = parser.parse_args() def main(): logger.info("Loading data...") tokenizer = BertTokenizer.from_pretrained('bert-base-chinese') train_loader = util.profile('train.txt', tokenizer) # test_loader = util.profile('test.txt', tokenizer) dev_loader = util.profile('dev.txt', tokenizer) logger.info("Build model...") bert = BertModel.from_pretrained('bert-base-chinese') model = fn_cls(bert) criterion = nn.BCELoss() sigmoid = nn.Sigmoid() max_epoch = args.epoch model.cuda() model.train() optim = torch.optim.Adam(model.parameters(), lr=1e-5)
def main(config): logger = util.create_logger(name='train_log', log_dir=config.log_dir) if not os.path.exists(config.log_dir): os.makedirs(config.log_dir, exist_ok=True) util.save_config(config.log_dir, config.config) logger.info('Logs and models will be save in {}.'.format(config.log_dir)) rnd = np.random.RandomState(seed=config.seed) solution = util.create_solution(device='cpu:0') num_params = solution.get_num_params() if config.load_model is not None: solution.load(config.load_model) print('Loaded model from {}'.format(config.load_model)) init_params = solution.get_params() else: init_params = None solver = cma.CMAEvolutionStrategy( x0=np.zeros(num_params) if init_params is None else init_params, sigma0=config.init_sigma, inopts={ 'popsize': config.population_size, 'seed': config.seed if config.seed > 0 else 42, 'randn': np.random.randn, }, ) best_so_far = -float('Inf') ii32 = np.iinfo(np.int32) repeats = [config.reps] * config.population_size device_type = 'cpu' if args.num_gpus <= 0 else 'cuda' num_devices = mp.cpu_count() if args.num_gpus <= 0 else args.num_gpus with mp.get_context('spawn').Pool( initializer=worker_init, initargs=(args.config, device_type, num_devices), processes=config.num_workers, ) as pool: for n_iter in range(config.max_iter): params_set = solver.ask() task_seeds = [rnd.randint(0, ii32.max)] * config.population_size fitnesses = [] ss = 0 while ss < config.population_size: ee = ss + min(config.num_workers, config.population_size - ss) fitnesses.append( pool.map(func=get_fitness, iterable=zip(params_set[ss:ee], task_seeds[ss:ee], repeats[ss:ee]))) ss = ee fitnesses = np.concatenate(fitnesses) if isinstance(solver, cma.CMAEvolutionStrategy): # CMA minimizes. solver.tell(params_set, -fitnesses) else: solver.tell(fitnesses) logger.info( 'Iter={0}, ' 'max={1:.2f}, avg={2:.2f}, min={3:.2f}, std={4:.2f}'.format( n_iter, np.max(fitnesses), np.mean(fitnesses), np.min(fitnesses), np.std(fitnesses))) best_fitness = max(fitnesses) if best_fitness > best_so_far: best_so_far = best_fitness model_path = os.path.join(config.log_dir, 'best.npz') save_params(solver=solver, solution=solution, model_path=model_path) logger.info( 'Best model updated, score={}'.format(best_fitness)) if (n_iter + 1) % config.save_interval == 0: model_path = os.path.join(config.log_dir, 'iter_{}.npz'.format(n_iter + 1)) save_params(solver=solver, solution=solution, model_path=model_path)
# -*- coding: utf-8 -*- from __future__ import print_function from node import Node from random import choice from twisted.internet import reactor from twisted.web.error import Error from twisted.web.resource import NoResource from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET from twisted.web.server import Site import cgi import json import util logger = util.create_logger("node_api.log") class NodeAPI(Resource): """ Handles the basic routing of requests to the appropirate resource. '/{register, unregister}/<service_name>' maps to the 'Register' resource. '/<service_name>' maps to the 'Service' resource. """ def __init__(self, node, web_port): Resource.__init__(self) self.node = node reactor.listenTCP(web_port, Site(self)) logger.info("API listening on: {}".format(web_port))
from npc import * import world from random import choice, shuffle, randrange from itertools import combinations, chain sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) import util import items import acquire logger = util.create_logger('plot') #3d10 + 20 roll for mNPC MNPC_ROLL = (3, 10, 20) DEITY_ROLL = (2, 3, 2) #1d4 + 4 (from 6-10 quests) QUEST_GIVER_ROLL = (2, 3, 4) #1d4 - 1 roll for immobile NPCs (means 0-3). IMMOBILE_NPC_ROLL = (1, 4, -1) #uniques roll 5d2 + 3 (from 8 to 13) UNIQUES_ROLL = (5, 2, 3) #traders roll. we want at leat 10 of them, 20 at max. TRADERS_ROLL = (2, 6, 8) #min artefacts count ARTEFACTS_COUNT = (4, 3, 4) CITIES_COUNT = 6 #the chance of ceratain NPC to become holder of some artefact ARTEFACT_OWNER_CHANCE = 4 debug_names = True def assign_random_name_from_list(instances, names, demon = False): for npc in world.mNPC: if npc.name is not None: continue