def run_on_gpu(device_id, od, device2max, max2frac, cmd_pre): od.update({gi_: device_id, gp_: max2frac[device2max[device_id]]}) entries = [(k if k.startswith('-') else '-' + k, v) for k, v in od.items()] command = cmd_pre + au.entries2name(entries, inter=' ', inner=' ') v = None if (sum(device2max.values()) == 1) else V Popen(command, cwd='./', shell=True, stdin=v, stdout=v, stderr=None).communicate() return device_id, od[gid_]
def update_od_list(od_list, log_path, shuffle): for i, od in enumerate(od_list): od[gid_] = i od[lg_] = log_path if shuffle: od_list = au.shuffle(od_list) for i, od in enumerate(od_list): print(au.entries2name(od, inner='=', inter=' ')) if i <= 10 else None return od_list
def __init__(self, args: dict): self.args = args self.gid = args[C.gid] self.gpu_id = args[C.gi] self.gpu_frac = args[C.gp] self.epoch_num = args[C.ep] self.batch_size = args[C.bs] self.neg_size = args[C.ns_] self.data_name = args[C.dn] self.model_name = args[C.vs] self.w_init = args[C.wini_] self.c_init = args[C.cini_] self.scale = args[C.sc] self.c_num = args[C.cn_] self.log_path = args[C.lg] entries = [(k, v) for k, v in args.items() if v is not None] log_name = au.entries2name(entries, exclude={C.gi, C.gp, C.lg}, postfix='.txt') self.log_file = iu.join(self.log_path, log_name) self.logger = lu.get_logger(self.log_file) # self.is_record = Nodes.is_1702() self.is_record = False if self.is_record: self.writer_path = iu.join(self.log_path, 'gid={}'.format(self.gid)) self.param_file = iu.join(self.writer_path, 'model.ckpt') self.hyper_file = iu.join(self.writer_path, 'hyper') iu.mkdir(self.writer_path) iu.dump_json(self.hyper_file, args) self.history = list() self.writer_step = 0 self.ppp(args)
def __init__(self, args: dict): full_args = args.copy() if args.get(K.lg, None) is not None: log_path = args.pop(K.lg) entries = [(k, v) for k, v in args.items() if v is not None] log_name = au.entries2name(entries, exclude={K.gi, K.gp, K.lg}, postfix='.txt') self.logger = lu.get_logger(str(iu.Path(log_path) / log_name)) self.writer_path = str( iu.Path(log_path) / 'gid={}'.format(args.pop(K.gid))) self.param_file = str(iu.Path(self.writer_path) / 'model_param') iu.mkdir(self.writer_path) else: self.logger = self.writer_path = self.param_file = None gpu_id, gpu_frac = args.pop(K.gi), args.pop(K.gp) self.data_name, self.model_name = args.pop(K.dn), args.pop(K.vs) self.epoch_num, self.early_stop = args.pop(K.ep), args.pop(K.es) self.is_full_data = args.pop(K.fda) self.model_cls = name2m_class[self.model_name] self.model_args = args self.save_model_params = False self.data = self.model = None self.train_size = self.valid_size = self.test_size = None self.brk_cnt = 0 self.best_valid = None self.ppp(iu.dumps(full_args)) self.ppp( iu.dumps({ 'writer_path': self.writer_path, 'param_file': self.param_file })) self.sess = get_session(gpu_id, gpu_frac, Nodes.is_1702())
def main(self): log_path = self.get_log_path() print('log path:', log_path) log_files = iu.list_children(log_path, pattern=r'^gid.+\.txt$', full_path=True) best_list = list() for file in log_files: entries = au.name2entries(name=iu.get_name(file), postfix='.txt', exclude=self.exclude) scores = [ iu.loads(l) for l in iu.read_lines(file) if (l.startswith('{') and 'v_NDCG' in l) ] scores_with_test = [s for s in scores if 't_NDCG' in s] if len(scores) == 0 or len(scores_with_test) == 0: print(au.entries2name(entries), 'lacks test info') continue best_scores = scores_with_test[-3:] name2score = pd.DataFrame() for idx, rvs2scores in enumerate(best_scores): rvs2scores.pop('brk_cnt') for title, value in rvs2scores.items(): name2score.loc[idx, title] = value # for rvs, score in rvs2scores.items(): # for name, value in score.items(): # title = '{}_{}'.format(rvs[0], name) name2score = name2score.mean(axis=0).round(4) name2score['ep'] = len(scores) best_list.append((dict(entries), name2score.to_dict())) table = pd.DataFrame() for i, (name2param, name2score) in enumerate(best_list): for k, v in list(name2param.items()) + list(name2score.items()): table.loc[i, k] = v table.fillna('-', inplace=True) temp = 'mmm' pre = 't' table[temp] = table['%s_NDCG' % pre] + table['%s_MAP' % pre] + table['%s_MRR' % pre] table = table.sort_values(by=temp) table.drop([temp, K.lr, K.reg], axis=1, inplace=True) # table = table.query('dpt=="1"') if self.args.s: table.to_csv(iu.join(log_path, 'summary.csv')) # print(table.columns) # print(table) # group_col = [K.dn, K.mix, K.act, K.dpt] for value, df in table.groupby(K.vs): df.pop(K.ep) print(value) print(df) mean = df.groupby(K.dn).mean() print(mean) mean.to_csv('%s.csv' % value) return group_col = [K.dn] grouped = table.groupby(group_col) kv_df_list = list() summ = pd.DataFrame() import numpy as np for idx, (values, table) in enumerate(grouped): # print(list(zip(group_col, values))) kv = dict(zip(group_col, values)) kv['final'] = np.mean(table['v_NDCG'] + table['v_MAP'] + table['v_MRR']) / 3 kv['final'] = kv['final'].round(3) kv_df_list.append([kv, table]) columns = [ '%s_%s' % (a, b) for a in ['v', 't'] for b in ['NDCG', 'MAP', 'MRR'] ] s = table[columns].mean(0) print(dict(s)) # print(s.index) # print(s[s.index]) # print(list(s.data)) # summ.loc[idx, 'data'] = values # summ.loc[idx, columns] = list(s.data) summ.append(dict(s), ignore_index=True) # print(table, '\n') print(summ)