예제 #1
0
def run_on_gpu(device_id, od, device2max, max2frac, cmd_pre):
    od.update({gi_: device_id, gp_: max2frac[device2max[device_id]]})
    entries = [(k if k.startswith('-') else '-' + k, v) for k, v in od.items()]
    command = cmd_pre + au.entries2name(entries, inter=' ', inner=' ')
    v = None if (sum(device2max.values()) == 1) else V
    Popen(command, cwd='./', shell=True, stdin=v, stdout=v,
          stderr=None).communicate()
    return device_id, od[gid_]
예제 #2
0
def update_od_list(od_list, log_path, shuffle):
    for i, od in enumerate(od_list):
        od[gid_] = i
        od[lg_] = log_path
    if shuffle:
        od_list = au.shuffle(od_list)
    for i, od in enumerate(od_list):
        print(au.entries2name(od, inner='=', inter=' ')) if i <= 10 else None
    return od_list
예제 #3
0
    def __init__(self, args: dict):
        self.args = args
        self.gid = args[C.gid]
        self.gpu_id = args[C.gi]
        self.gpu_frac = args[C.gp]
        self.epoch_num = args[C.ep]
        self.batch_size = args[C.bs]
        self.neg_size = args[C.ns_]
        self.data_name = args[C.dn]
        self.model_name = args[C.vs]

        self.w_init = args[C.wini_]
        self.c_init = args[C.cini_]
        self.scale = args[C.sc]
        self.c_num = args[C.cn_]

        self.log_path = args[C.lg]
        entries = [(k, v) for k, v in args.items() if v is not None]
        log_name = au.entries2name(entries,
                                   exclude={C.gi, C.gp, C.lg},
                                   postfix='.txt')
        self.log_file = iu.join(self.log_path, log_name)
        self.logger = lu.get_logger(self.log_file)

        # self.is_record = Nodes.is_1702()
        self.is_record = False
        if self.is_record:
            self.writer_path = iu.join(self.log_path,
                                       'gid={}'.format(self.gid))
            self.param_file = iu.join(self.writer_path, 'model.ckpt')
            self.hyper_file = iu.join(self.writer_path, 'hyper')
            iu.mkdir(self.writer_path)
            iu.dump_json(self.hyper_file, args)

        self.history = list()
        self.writer_step = 0
        self.ppp(args)
예제 #4
0
    def __init__(self, args: dict):
        full_args = args.copy()
        if args.get(K.lg, None) is not None:
            log_path = args.pop(K.lg)
            entries = [(k, v) for k, v in args.items() if v is not None]
            log_name = au.entries2name(entries,
                                       exclude={K.gi, K.gp, K.lg},
                                       postfix='.txt')
            self.logger = lu.get_logger(str(iu.Path(log_path) / log_name))
            self.writer_path = str(
                iu.Path(log_path) / 'gid={}'.format(args.pop(K.gid)))
            self.param_file = str(iu.Path(self.writer_path) / 'model_param')
            iu.mkdir(self.writer_path)
        else:
            self.logger = self.writer_path = self.param_file = None

        gpu_id, gpu_frac = args.pop(K.gi), args.pop(K.gp)
        self.data_name, self.model_name = args.pop(K.dn), args.pop(K.vs)
        self.epoch_num, self.early_stop = args.pop(K.ep), args.pop(K.es)
        self.is_full_data = args.pop(K.fda)
        self.model_cls = name2m_class[self.model_name]
        self.model_args = args

        self.save_model_params = False
        self.data = self.model = None
        self.train_size = self.valid_size = self.test_size = None
        self.brk_cnt = 0
        self.best_valid = None

        self.ppp(iu.dumps(full_args))
        self.ppp(
            iu.dumps({
                'writer_path': self.writer_path,
                'param_file': self.param_file
            }))
        self.sess = get_session(gpu_id, gpu_frac, Nodes.is_1702())
예제 #5
0
    def main(self):
        log_path = self.get_log_path()
        print('log path:', log_path)
        log_files = iu.list_children(log_path,
                                     pattern=r'^gid.+\.txt$',
                                     full_path=True)
        best_list = list()
        for file in log_files:
            entries = au.name2entries(name=iu.get_name(file),
                                      postfix='.txt',
                                      exclude=self.exclude)
            scores = [
                iu.loads(l) for l in iu.read_lines(file)
                if (l.startswith('{') and 'v_NDCG' in l)
            ]
            scores_with_test = [s for s in scores if 't_NDCG' in s]
            if len(scores) == 0 or len(scores_with_test) == 0:
                print(au.entries2name(entries), 'lacks test info')
                continue
            best_scores = scores_with_test[-3:]
            name2score = pd.DataFrame()
            for idx, rvs2scores in enumerate(best_scores):
                rvs2scores.pop('brk_cnt')
                for title, value in rvs2scores.items():
                    name2score.loc[idx, title] = value
                # for rvs, score in rvs2scores.items():
                #     for name, value in score.items():
                #         title = '{}_{}'.format(rvs[0], name)
            name2score = name2score.mean(axis=0).round(4)
            name2score['ep'] = len(scores)
            best_list.append((dict(entries), name2score.to_dict()))

        table = pd.DataFrame()
        for i, (name2param, name2score) in enumerate(best_list):
            for k, v in list(name2param.items()) + list(name2score.items()):
                table.loc[i, k] = v
        table.fillna('-', inplace=True)
        temp = 'mmm'
        pre = 't'
        table[temp] = table['%s_NDCG' % pre] + table['%s_MAP' %
                                                     pre] + table['%s_MRR' %
                                                                  pre]
        table = table.sort_values(by=temp)
        table.drop([temp, K.lr, K.reg], axis=1, inplace=True)
        # table = table.query('dpt=="1"')
        if self.args.s:
            table.to_csv(iu.join(log_path, 'summary.csv'))

        # print(table.columns)
        # print(table)
        # group_col = [K.dn, K.mix, K.act, K.dpt]

        for value, df in table.groupby(K.vs):
            df.pop(K.ep)
            print(value)
            print(df)
            mean = df.groupby(K.dn).mean()
            print(mean)
            mean.to_csv('%s.csv' % value)
        return

        group_col = [K.dn]
        grouped = table.groupby(group_col)
        kv_df_list = list()
        summ = pd.DataFrame()
        import numpy as np
        for idx, (values, table) in enumerate(grouped):
            # print(list(zip(group_col, values)))
            kv = dict(zip(group_col, values))
            kv['final'] = np.mean(table['v_NDCG'] + table['v_MAP'] +
                                  table['v_MRR']) / 3
            kv['final'] = kv['final'].round(3)
            kv_df_list.append([kv, table])
            columns = [
                '%s_%s' % (a, b) for a in ['v', 't']
                for b in ['NDCG', 'MAP', 'MRR']
            ]
            s = table[columns].mean(0)
            print(dict(s))
            # print(s.index)
            # print(s[s.index])
            # print(list(s.data))
            # summ.loc[idx, 'data'] = values
            # summ.loc[idx, columns] = list(s.data)
            summ.append(dict(s), ignore_index=True)
            # print(table, '\n')
        print(summ)