Пример #1
0
    def _gen_doc_file(filepath):
        doc = {"filepath": filepath, "doc": ""}

        # Creates file and writes json content
        dump_json(filepath, doc)

        return None
Пример #2
0
def main(args):
    stats = {}

    for config_dir in tqdm(sorted(glob(join(args.data_root, '*'))),
                           desc="Camera-light configurations"):
        id_ = basename(config_dir)

        stats[id_] = {
            'alpha': join(config_dir, 'alpha.png'),
            'cam': join(config_dir, 'cam.json'),
            'cvis': join(config_dir, 'cvis.png'),
            'diffuse': join(config_dir, 'diffuse.png'),
            'light': join(config_dir, 'light.json'),
            'lvis': join(config_dir, 'lvis.png'),
            'nn': join(config_dir, 'nn.json'),
            'rgb': join(config_dir, 'rgb.png'),
            'rgb_camspc': join(config_dir, 'rgb_camspc.png'),
            'uv2cam': join(config_dir, 'uv2cam.npy')
        }

        # Check existence
        all_exist = True
        for _, v in stats[id_].items():
            all_exist = all_exist and exists(v)
        stats[id_]['complete'] = all_exist

        # Make the paths relative, to reduce the file size and make it
        # root-independent
        for k, v in stats[id_].items():
            if k != 'complete':
                stats[id_][k] = relpath(v, args.data_root)

    dump_json(stats, args.out_json)
Пример #3
0
def gen_file_list(args):
    filelist = {}

    for config_dir in tqdm(xm.os.sortglob(args.data_root, '*'),
                           desc="Generating file list"):
        id_ = basename(config_dir)

        filelist[id_] = {
            'cam': join(config_dir, 'cam.json'),
            'cvis': join(config_dir, 'cvis.png'),
            'diffuse': join(config_dir, 'diffuse.png'),
            'light': join(config_dir, 'light.json'),
            'lvis': join(config_dir, 'lvis.png'),
            'nn': join(config_dir, 'nn.json'),
            'uv2cam': join(config_dir, 'uv2cam.npy')
        }

        if id_.startswith('trainvali_'):
            filelist[id_]['alpha'] = join(config_dir, 'alpha.png')
            filelist[id_]['rgb'] = join(config_dir, 'rgb.png')
            filelist[id_]['rgb_camspc'] = join(config_dir, 'rgb_camspc.png')

        # Check existence
        all_exist = True
        for _, v in filelist[id_].items():
            all_exist = all_exist and exists(v)
        filelist[id_]['complete'] = all_exist

        # Make the paths relative, to reduce the file size and make it
        # root-independent
        for k, v in filelist[id_].items():
            if k != 'complete':
                filelist[id_][k] = relpath(v, args.data_root)

    dump_json(filelist, args.out_json)
Пример #4
0
    def generate_subtree(self):

        if not self.exists:

            try:

                self.parse_wiki_table()

                self.subtree["root"] = self.raw_subtree["root"]

                # self.subtree["origins"] = parse_origin_dates(
                #     self.raw_subtree["cultural origins"][0])

                self.subtree["parents"] = filter_lists(
                    set(self.raw_subtree["stylistic origins"]))

                self.subtree["children"] = filter_lists(
                    self.raw_subtree["fusion genres"] +
                    self.raw_subtree["subgenres"] +
                    self.raw_subtree["derivative forms"])

                self.subtree["instruments"] = filter_lists(
                    set(self.raw_subtree["typical instruments"]))

                dump_json(file_path=self.file_path, data=self.subtree)

            except IndexError:

                print("Could not parse", self.endpoint)
                self.subtree["children"] = []
Пример #5
0
    def _gen_hist_file(filepath):
        hist = {"filepath": filepath, "history": []}

        # Creates file and writes json content
        dump_json(filepath, hist)

        return None
Пример #6
0
    def dump_dag(self):

        dump_json(
            DATA_PATH + "data.json", {
                "num_genres": len(self.get_nodes()),
                "edges": self.get_edges(),
                "nodes": self.get_nodes(),
                "subgenres": self.subgenres
            })
Пример #7
0
 def calc(self, codes, use_cache=True):
     code_list = codes.split(',')
     if len(codes) < 5 or len(code_list) < 1:
         return {'code': {'error': 'codes is error:' + codes}}
     dts = datetime.datetime.now().strftime('%Y%m%d')
     cachefile = '.cache/' + codes + '-' + dts + '.json'
     if use_cache is True:
         try:
             redata = util.load_json(cachefile)
             alog.info('hit cache:' + codes + ' cache file:' + cachefile,
                       'good_price', 'calc')
             return redata
         except:
             pass
     self.chinabond10 = DataGether.chinabond10()
     self.market_pe = DataGether.szpe()
     market_data = {
         CN_chinabond10: "%.3f%%" % (self.chinabond10 * 100),
         CN_szpe: str(self.market_pe)
     }
     alog.info(
         'market_pe:' + str(self.market_pe) + ' chinabond10:' +
         str(self.chinabond10), 'good_price', 'calc')
     redata = {}
     for code in code_list:
         symbol = util.code2symbal(code)
         alog.info(symbol + '...', 'good_price', 'calc')
         if True:
             try:
                 #if True:
                 ttm_data = DataGether.quote_ttm(symbol)
                 #util.print_json(ttm_data)
                 itemdata = {}
                 itemdata['a'] = ttm_data
                 itemdata['b'] = market_data
                 itemdata['c'] = self.calc_good_price(symbol, ttm_data)
                 redata[symbol] = itemdata
             #else:
             except Exception as e1:
                 print("[req_quote] error:" + str(e1))
                 itemdata = {}
                 itemdata['a'] = {CN_symbol: symbol}
                 itemdata['b'] = market_data
                 itemdata['c'] = {}
                 redata[symbol] = itemdata
                 continue
         else:
             ttm_data = DataGether.quote_ttm(symbol)
             #util.print_json(ttm_data)
             itemdata = self.calc_good_price(symbol, ttm_data)
             redata[symbol] = itemdata
         time.sleep(2)
     util.dump_json(cachefile, redata)
     return redata
Пример #8
0
    def _gen_conf_file(filepath):
        conf = {
            "filepath": filepath,
            "default_tabs": ["Saved", "History", "Documentation"],
            "user_defined_tabs": []
        }

        # Creates file and writes json content
        dump_json(filepath, conf)

        return None
Пример #9
0
def main():
    flags = initialize()
    logging.debug(f'Loading from {flags.in_path}')
    a = np.load(flags.in_path, allow_pickle=True)
    all_results_3d = {}
    for image_path, coords3d_pred in zip(a['image_path'], a['coords3d_pred_world']):
        image_path = image_path.decode('utf8')
        all_results_3d.setdefault(
            image_path, []).append(coords3d_pred.tolist())

    logging.info(f'Writing to file {flags.out_path}')
    util.dump_json(all_results_3d, flags.out_path)
Пример #10
0
def fox_etf_history_all(all_filename, outpath, fox_type='new'):
    jsdata = util.load_json(all_filename)
    cnt = 0
    timestamp = str(int(time.time()*1000))
    for item in jsdata:
        fn = outpath+item['symbol']+'_sina.json'
        if fox_type == 'new' and os.path.exists(fn) is True:
            continue
        dlist = sina.history_etf(item['code'], timestamp=timestamp)
        util.dump_json(fn, dlist)
        cnt += 1
        if cnt % 5 == 0:
            time.sleep(10)
Пример #11
0
def generate_poseviz_gt(i_subject, activity_name, camera_id):
    camera_names = ['54138969', '55011271', '58860488', '60457274']
    camera_name = camera_names[camera_id]
    data, camera = get_examples(i_subject,
                                activity_name,
                                camera_id,
                                frame_step=1,
                                correct_S9=True)

    results = []
    examples = []
    for image_relpath, world_coords, bbox in data:
        results.append({
            'gt_poses': [world_coords.tolist()],
            'camera_intrinsics':
            camera.intrinsic_matrix.tolist(),
            'camera_extrinsics':
            camera.get_extrinsic_matrix().tolist(),
            'image_path':
            image_relpath,
            'bboxes': [bbox.tolist()]
        })
        ex = ps3d.Pose3DExample(image_relpath,
                                world_coords,
                                bbox,
                                camera,
                                activity_name=activity_name)
        examples.append(ex)

    joint_names = ('rhip,rkne,rank,lhip,lkne,lank,tors,neck,head,htop,'
                   'lsho,lelb,lwri,rsho,relb,rwri,pelv'.split(','))
    edges = ('htop-head-neck-lsho-lelb-lwri,neck-rsho-relb-rwri,'
             'neck-tors-pelv-lhip-lkne-lank,pelv-rhip-rkne-rank')
    joint_info = ps3d.JointInfo(joint_names, edges)
    ds = ps3d.Pose3DDataset(joint_info, test_examples=examples)
    util.dump_pickle(
        ds,
        f'{paths.DATA_ROOT}/h36m/poseviz/S{i_subject}_{activity_name}_{camera_name}.pkl'
    )

    output = {}
    output['joint_names'] = joint_info.names
    output['stick_figure_edges'] = joint_info.stick_figure_edges
    output['world_up'] = camera.world_up.tolist()
    output['frame_infos'] = results
    util.dump_json(
        output,
        f'{paths.DATA_ROOT}/h36m/poseviz/S{i_subject}_{activity_name}_{camera_name}.json'
    )
Пример #12
0
def login_user(handler,
               union_id=None,
               corp_open_id=None,
               person_id=None,
               open_id=None):
    profile = {}
    if union_id:
        profile['authorized'] = yield handler.fetchone_db(
            "select * from t_person where auth_id=%s", union_id)

    if corp_open_id:
        profile['authorized'] = yield handler.fetchone_db(
            "select * from t_person where corp_open_id=%s", corp_open_id)
        logging.info('corp_open_id :%s' % corp_open_id)

    if person_id:
        profile['authorized'] = yield handler.fetchone_db(
            "select * from t_person where person_id=%s", person_id)

    if open_id:
        profile['authorized'] = yield handler.fetchone_db(
            "select * from t_person where open_id=%s", open_id)

    if profile['authorized']:
        person_id = profile['authorized']['person_id']
        logging.info("login_user:%s" % person_id)
        profile['member'] = yield handler.query_db(
            """(select t.* , c.code_name as member_type_name
                                                            from t_member as t
                                                            left join t_codes as c
                                                            on c.code_id = t.member_type
                                                            where t.person_id=%s)
                                                            UNION
                                                            (select t.* ,c.code_name as member_type_name from t_org_person as op
                                                            left join t_member as t
                                                            on op.org_id = t.org_id
                                                            left join t_codes as c
                                                            on c.code_id = t.member_type
                                                            where op.is_primary = 1
                                                            and op.person_id = %s)""",
            person_id, person_id)
        profile['person'] = yield handler.fetchone_db(
            "select a.* from t_person as a where a.person_id=%s", person_id)
        profile['org'] = yield handler.fetchone_db(
            "select a.* from t_org as a, t_person as b, t_org_person as c where b.person_id=%s and c.person_id = b.person_id and c.org_id = a.org_id order by c.updated DESC",
            person_id)
        profile['roles'] = yield handler.query_db(
            "select a.*, b.code_name as role_name from t_auth_role as a, t_codes as b where a.person_id=%s and b.code_id = a.role_id and b.code_type = 'mgr_role'",
            person_id)

        handler.set_cache("user-profile-" + person_id, dump_json(profile),
                          86400 * 300)
        handler.set_secure_cookie("user", person_id, expires_days=300)
        handler.session.set("user-section", person_id)

        handler.current_user_profile = profile
        raise Return((True, "登录成功"))
    else:
        logout_user(handler)
        raise Return((False, "登录失败"))
Пример #13
0
    def refresh_user_profile(self):
        user_id = self.current_user
        handler = self
        if not user_id or handler.get_cache(
                "user-profile-refreshed-%s" % user_id):
            return

        profile = {}
        profile['authorized'] = yield handler.fetchone_db(
            "select * from t_person where auth_id=%s", user_id)
        profile['member'] = yield handler.fetchone_db(
            "select t.* , c.code_name as member_type_name from t_member as t left join t_codes as c on c.code_id = t.member_type  where t.auth_id=%s",
            user_id)
        profile['person'] = yield handler.fetchone_db(
            "select a.* from t_person as a where a.auth_id=%s", user_id)
        profile['org'] = yield handler.fetchone_db(
            "select a.* from t_org as a, t_person as b, t_org_person as c where b.auth_id=%s and c.person_id = b.person_id and c.org_id = a.org_id",
            user_id)
        profile['roles'] = yield handler.query_db(
            "select a.* from t_auth_role as a where a.auth_id=%s", user_id)

        handler.set_cache("user-profile-" + user_id, dump_json(profile),
                          86400 * 300)
        handler.set_secure_cookie("user", user_id, expires_days=300)
        handler.set_cache("user-profile-refreshed-%s" % user_id, '1', 60)
Пример #14
0
    def train_1fold(self, fold, params, params_custom):
        X_train, X_valid, y_train, y_valid, X_test, vdx, tdx = self.get_fold_data(fold)

        if fold == 0:
            X_train.dtypes.to_csv(self.models_path + "/dtypes.csv")
            logger.info(f"X_train.shape = {X_train.shape}")

        params2 = copy.deepcopy(params)
        if params2["random_state"] is not None:
            params2["random_state"] = params2["random_state"] + fold
            logger.info(f"Set {self.model_type} train random_state = {params2['random_state']}")

        model = self.model_class(**params2)
        model.fit(X_train, y_train)

        joblib.dump(model, self.models_path + f'/model-{self.model_type}-f{fold:02d}.joblib')
        util.dump_json({
            "coef":list(model.coef_[0]), 
            "intercept":model.intercept_[0],
            "coef_name":list(X_train.columns)
            }, 
            self.models_path + f'/model-{self.model_type}-f{fold:02d}.joblib'
            )

        preds_valid = self.predict_proba(model, X_valid)
        self.preds_valid_all.loc[vdx, "pred"] = preds_valid

        preds_train = self.predict_proba(model, X_train)
        self.preds_train_all.append(pd.DataFrame({fold:preds_train}, index=tdx))

        preds_test = self.predict_proba(model, X_test)
        self.preds_test_all.append(preds_test)

        acc_valid = accuracy_score(y_valid, np.round(preds_valid))
        acc_train = accuracy_score(y_train, np.round(preds_train))
        logloss_valid = log_loss(y_valid, preds_valid)
        logloss_train = log_loss(y_train, preds_train)

        ms = [fold, acc_train, acc_valid, logloss_train, logloss_valid, None]
        self.mets.append(ms)
        show_mets(*ms)

        imp = pd.Series(model.coef_[0], index=X_train.columns)
        imp.name = fold
        imp.index.name = "feature"
        self.importance["coef_abs"].append(imp.abs())
        self.importance["coef"].append(imp)
Пример #15
0
 def __init__(self,value):
     JsonItem.__init__(self)
     self.setEditable(True)
     JsonItem.setData(
         self,
         dump_json(value),
         QtCore.Qt.DisplayRole
     )
Пример #16
0
 def on_save():
     json_object = model.to_json()
     filename = get_file(WRITE)
     if filename is None:
         return
     with open(filename,'w') as f:
         f.write(dump_json(json_object))
     update_filename_label(filename)
Пример #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--in-path', type=str, required=True)
    parser.add_argument('--out-path', type=str, default=None)
    options.initialize(parser)

    if FLAGS.out_path is None:
        FLAGS.out_path = FLAGS.in_path.replace('.npz', '.json')

    logging.debug(f'Loading from {FLAGS.in_path}')
    a = np.load(FLAGS.in_path, allow_pickle=True)
    all_results_3d = collections.defaultdict(list)
    for image_path, coords3d_pred in zip(a['image_path'],
                                         a['coords3d_pred_world']):
        all_results_3d[image_path.decode('utf8')].append(
            coords3d_pred.tolist())
    logging.info(f'Writing to file {FLAGS.out_path}')
    util.dump_json(all_results_3d, FLAGS.out_path)
Пример #18
0
    def do_task(self, func, queue, args):
        key = 'task-working-' + genenate_file_key(dump_json(args))
        if self.get_cache(key):
            raise tornado.web.gen.Return(None)
            return

        res = None
        try:
            self.set_cache(key, args, 60)
            res = yield tornado.gen.Task(func, queue=queue, args=args)
            self.set_cache(key, '', 1)
        except:
            self.set_cache(key, '', 1)
            raise tornado.web.gen.Return(None)
            return

        raise tornado.web.gen.Return(res)
Пример #19
0
 def log_op(self, op_type, op_id, op_content):
     '''
     这段代码没被使用,记录后台操作日志
     :param op_type:
     :param op_id:
     :param op_content:
     :return:
     '''
     mgr_id = self.current_mgr
     yield self.insert_db(
         "insert into ac_mgr_log (mgr_id, op_type, op_id, op_content, created) values (%s, %s, %s, %s, now()) ",
         mgr_id,
         op_type,
         op_id,
         dump_json(op_content)
         if type(op_content) != type('') else op_content,
     )
Пример #20
0
 def response_as_json(self, res):
     self.set_header("Content-Type", 'text/html; charset="utf-8"')
     self.write(dump_json(res))
     self.finish()
Пример #21
0
 def set_cache_obj(self, key, value, timeout=60):
     self.set_cache(key, dump_json(value), timeout)
Пример #22
0
 def calc(self, codes, use_cache=True, youji_level=1, user=''):
     alog.debug(codes, 'good_price::calc')
     code_list = codes.split(',')
     if len(codes) < 5 or len(code_list) < 1:
         return {'code': {'error': 'codes is error:' + codes}}
     if (use_cache is True) and len(code_list) == 1:
         #查询单只股票的时候才直接请求cache
         symbol = util.code2symbal(codes)
         self.cachefile = util.cache_filename('goodprice', symbol)
         try:
             redata = util.load_json(self.cachefile)
             self.record_history(user, redata, symbol)
             alog.info(
                 'hit cache 1:' + symbol + ' cache file:' + self.cachefile,
                 'good_price', 'calc')
             return redata
         except:
             pass
     self.chinabond10 = DataGether.chinabond10()
     self.market_pe = DataGether.szpe()
     market_data = {
         CN_chinabond10: "%.3f%%" % (self.chinabond10 * 100),
         CN_szpe: str(self.market_pe)
     }
     alog.info(
         'market_pe:' + str(self.market_pe) + ' chinabond10:' +
         str(self.chinabond10), 'good_price', 'calc')
     redata = {}
     for code in code_list:
         symbol = util.code2symbal(code)
         self.cachefile = util.cache_filename('goodprice', symbol)
         if use_cache is True:
             try:
                 cachedata = util.load_json(self.cachefile)
                 self.record_history(user, cachedata, symbol)
                 redata = dict(redata, **cachedata)
                 alog.info(
                     'hit cache 2:' + symbol + ' cache file:' +
                     self.cachefile, 'good_price', 'calc')
                 continue
             except:
                 pass
         alog.info(symbol + '...', 'good_price', 'calc')
         try:
             #if True:
             ttm_data = DataGether.quote_ttm(symbol)
             #util.print_json(ttm_data)
             itemdata = {}
             itemdata['basic'] = ttm_data
             itemdata['market'] = market_data
             itemdata['goodprice'] = self.calc_good_price(
                 symbol, ttm_data, youji_level)
             redata[symbol] = itemdata
             util.dump_json(self.cachefile, redata)
             self.record_history(user, redata, symbol)
         #else:
         except Exception as e1:
             print("[req_quote] error:" + str(e1))
             itemdata = {}
             itemdata['basic'] = {CN_symbol: symbol}
             itemdata['market'] = market_data
             itemdata['goodprice'] = {}
             redata[symbol] = itemdata
             #self.record_history(user, redata, symbol)
         time.sleep(2)
     return redata
Пример #23
0
    def dump_list(self):

        dump_json(self.file_name,
                  {"genres": filter(None, list(self.genre_list))})
Пример #24
0
def main(args):
    config = configparser.ConfigParser(
        interpolation=configparser.ExtendedInterpolation())
    config.read(args.config)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    outdir = args.outdir
    if not os.path.isdir(outdir):
        # make the output dir
        os.makedirs(outdir)

    # create a logger
    logger = create_logger(__name__,
                           to_disk=True,
                           log_file='{}/{}'.format(outdir, args.logfile))

    device = "cuda" if torch.cuda.is_available() else "cpu"
    device = torch.device(device)

    tokenizer = BertTokenizer.from_pretrained('/'.join(
        args.model_checkpoint.split('/')[:-1]))

    test_dataloaders = []
    tasks = []
    ds_names = []
    for ds in args.test_datasets.split(','):

        task_name = ''.join(ds.split('_')[:-1])

        splt = ds.split('_')[-1]
        ds_names.append(ds)
        task = load_task(
            os.path.join(args.task_spec, '{}.yml'.format(task_name)))
        task.task_id = 0
        task.num_labels = 5

        test_data = get_data(task=task,
                             split=splt,
                             config=config,
                             tokenizer=tokenizer)
        task.set_label_map(test_data.label_map)
        tasks.append(task)

        test_data.set_task_id(task.task_id)
        test_dataloader = DataLoader(test_data,
                                     shuffle=False,
                                     batch_size=args.bs,
                                     collate_fn=test_data.collate_fn)

        test_dataloaders.append(test_dataloader)

    padding_label = test_data.padding_label

    model = MTLModel(bert_encoder=None,
                     device='cpu',
                     tasks=tasks,
                     padding_label_idx=padding_label,
                     load_checkpoint=True,
                     checkpoint=args.model_checkpoint,
                     tokenizer=tokenizer)

    for task, dl, ds in zip(tasks, test_dataloaders, ds_names):
        logger.info('Evaluating {} with output layer {}'.format(
            task.dataset, task.task_id))
        results = model.evaluate_on_dev(dl, task)
        test_score, test_report, test_predictions = results['score'], results[
            'results'], results['predictions']
        # dump to file
        logger.info('Dumping results to {}'.format(
            os.path.join(outdir, 'results_{}.json'.format(ds))))
        dump_json(fname=os.path.join(outdir, 'results_{}.json'.format(ds)),
                  data={
                      'f1': test_score,
                      'report': test_report,
                      'predictions': test_predictions
                  })
Пример #25
0
    def train_1fold(self, fold, params, params_custom):
        X_train, X_valid, y_train, y_valid, X_test, vdx, tdx = self.get_fold_data(fold)

        cat_feature_idx = []
        for i, c in enumerate(X_train):
            if not is_numeric_dtype(X_train[c]):
                cat_feature_idx.append(i)

        if fold == 0:
            X_train.dtypes.to_csv(self.models_path + "/dtypes.csv")
            logger.info(f"X_train.shape = {X_train.shape}")

        params2 = copy.deepcopy(params)
        if params2["random_seed"] is not None:
            params2["random_seed"] = params2["random_seed"] + fold
            logger.info(f"Set catboost train random_seed = {params2['random_seed']}")

        model = CatBoostClassifier(**params2)

        model.fit(
            X_train, y_train,
            cat_features=cat_feature_idx,
            eval_set=(X_valid, y_valid)
        )

        model.save_model(self.models_path + f'/model-catboost-f{fold:02d}.bin')
        util.dump_json(model.get_all_params(), self.models_path + "/params.json")

        evals = model.get_evals_result()
        evals_df = pd.DataFrame({
            f"logloss_train_f{fold:02d}":evals["learn"]['Logloss'],
            f"accuracy_train_f{fold:02d}":evals["learn"]['Accuracy'],
            f"logloss_valid_f{fold:02d}":evals['validation']['Logloss'],
            f"accuracy_valid_f{fold:02d}":evals['validation']['Accuracy']
        })
        self.evals_df.append(evals_df)

        preds_valid = model.predict_proba(X_valid)[:,1]
        logger.info(f"len(vdx)={len(vdx)} len(preds_valid)={len(preds_valid)}")
        self.preds_valid_all.loc[vdx, "pred"] = preds_valid

        preds_train = model.predict_proba(X_train)[:,1]
        self.preds_train_all.append(pd.DataFrame({fold:preds_train}, index=tdx))

        preds_test = model.predict_proba(X_test)[:,1]
        self.preds_test_all.append(preds_test)

        acc_valid = accuracy_score(y_valid, np.round(preds_valid))
        acc_train = accuracy_score(y_train, np.round(preds_train))
        logloss_valid = log_loss(y_valid, preds_valid)
        logloss_train = log_loss(y_train, preds_train)

        ms = [fold, acc_train, acc_valid, logloss_train, logloss_valid, model.get_best_iteration()]
        self.mets.append(ms)
        show_mets(*ms)

        for it in ["FeatureImportance"]:
            imp = pd.Series(model.get_feature_importance(type=it), index=X_train.columns)
            imp.name = fold
            imp.index.name = "feature"
            self.importance[it].append(imp)
Пример #26
0
    if args.DO_TEST:
        args.features_path = "../features/test"
        util.trash(args.features_path)
    os.makedirs(args.features_path, exist_ok=False)
else:  # 既存のdirに出力
    if args.DO_TEST:
        args.features_path = "../features/test"
    os.makedirs(args.features_path, exist_ok=True)

logging_config.init(f"{args.features_path}/log_{timestr}.log")

logger.info(f"features_path = {args.features_path}")

logger.info("args =\n" + pp.pformat(vars(args)))

util.dump_json(vars(args), f"{args.features_path}/args_{timestr}.json")
shutil.copytree("../src", args.features_path + "/src_" + timestr)

mprof_timestamp("basic")


def create_team_agg_features_wrp(merge):
    ma = merge[[
        c for c in merge.columns if re.match("A[1234]-(level|rank-int)", c)
    ]]
    mb = merge[[
        c for c in merge.columns if re.match("B[1234]-(level|rank-int)", c)
    ]]
    ma_agg = create_team_agg_features(ma, "A")
    ma234_agg = create_team_agg_features(ma, "A234")
    mb_agg = create_team_agg_features(mb, "B")
Пример #27
0
    def fit(self, tasks, optimizer, scheduler, gradient_accumulation_steps,
            train_dataloader, dev_dataloaders, test_dataloaders, epochs,
            evaluation_step, save_best, outdir, predict):

        # get lr schedule
        total_steps = (len(train_dataloader) /
                       gradient_accumulation_steps) * epochs

        loss_values = []
        global_step = 0

        best_dev_score = 0
        epoch = -1

        task_specific_forward = {t.task_id: 0 for t in tasks}

        accumulated_steps = 0
        for epoch in range(epochs):
            logger.info('Starting epoch {}'.format(epoch))

            total_loss = 0

            for step, batch in enumerate(train_dataloader):

                self.train()

                # batch to device

                batch = batch_to_device(batch, self.device)

                task_id = batch['task_id'][0]

                # perform forward pass
                output = self(tasks[task_id],
                              input_ids=batch['input_ids'],
                              attention_mask=batch['attention_mask'],
                              labels=batch['labels'])

                # compute loss
                loss = output[0]

                total_loss += loss.item()

                # scale the loss before doing backward pass
                loss = loss / gradient_accumulation_steps

                # perform backward pass
                loss.backward()

                # clip the gradients
                torch.nn.utils.clip_grad_norm_(
                    self.parameters(), 1.0
                )  # Update parameters and take a step using the computed gradient.
                # The optimizer dictates the "update rule"--how the parameters are
                # modified based on their gradients, the learning rate, etc.
                accumulated_steps += 1

                # keep track of the task specific steps
                task_specific_forward[task_id.item()] += 1

                #print(accumulated_steps)
                if accumulated_steps > 0 and accumulated_steps % gradient_accumulation_steps == 0:
                    #logger.info('Performing update after accumulating {} batches'.format(accumulated_steps))
                    # take a step and update the model
                    optimizer.step()

                    # Update the learning rate.
                    scheduler.step()
                    optimizer.zero_grad()
                    global_step += 1

                    accumulated_steps = 0

                    # evaluate on dev
                    if global_step > 0 and global_step % evaluation_step == 0:
                        self.eval()
                        dev_results = self.evaluate_on_dev(
                            data_loader=dev_dataloaders[0], task=tasks[0])
                        logger.info(
                            'Epoch {}, global step {}/{}\ttask_specific forward passes: {}\ttrain loss: {:.5f}\t dev score: {}'
                            .format(epoch, global_step, total_steps,
                                    print_steps(task_specific_forward),
                                    total_loss / step, dev_results['score']))

            # Calculate the average loss over the training data.
            avg_train_loss = total_loss / len(train_dataloader)

            # Store the loss value for plotting the learning curve.
            loss_values.append(avg_train_loss)

            # evaluate on dev after epoch is finished
            self.eval()
            for task in tasks:
                dev_results = self.evaluate_on_dev(
                    data_loader=dev_dataloaders[task.task_id], task=task)
                dev_score = dev_results['score']
                logger.info(
                    'End of epoch {}, global step {}/{}\ttrain loss: {:.5f}\t task {} dev score: {:.5f}\ndev report {}'
                    .format(epoch, global_step, total_steps, total_loss / step,
                            task.task_id, dev_results['score'],
                            json.dumps(dev_results['results'], indent=4)))
                # use task 0 dev score for model selection
                if task.task_id == 0:
                    if dev_score >= best_dev_score:
                        logger.info(
                            'New task 0 dev score {:.5f} > {:.5f}'.format(
                                dev_score, best_dev_score))
                        best_dev_score = dev_score
                        if save_best:
                            #save model
                            logger.info(
                                'Saving model after epoch {} as best model to {}'
                                .format(epoch,
                                        os.path.join(outdir, 'best_model')))
                            self.save(
                                os.path.join(
                                    outdir,
                                    'best_model/model_{}.pt'.format(epoch)),
                                optimizer)

            if predict:
                for task in tasks:
                    logger.info(
                        'Predicting {} test data at end of epoch {}'.format(
                            task.dataset, epoch))
                    self.eval()
                    test_results = self.evaluate_on_dev(
                        data_loader=test_dataloaders[task.task_id], task=task)
                    test_score, test_report, test_predictions = test_results[
                        'score'], test_results['results'], test_results[
                            'predictions']
                    # dump to file
                    dump_json(fname=os.path.join(
                        outdir,
                        'test_preds_{}_{}.json'.format(task.dataset, epoch)),
                              data={
                                  'f1': test_score,
                                  'report': test_report,
                                  'predictions': test_predictions
                              })

        if not save_best:
            # save model
            logger.info('Saving model after epoch {} to {}'.format(
                epoch, os.path.join(outdir, 'model_{}.pt'.format(epoch))))
            self.save(os.path.join(outdir, 'model_{}.pt'.format(epoch)),
                      optimizer)
Пример #28
0
 def get_encoded_data(self, data):
     return dump_json(data)
Пример #29
0
            "b": 20,
            "l": 5,
            "r": 5,
            "t": 40
        },
        "xaxis": {
            "showgrid": False,
            "zeroline": False,
            "showticklabels": False
        },
        "yaxis": {
            "showgrid": False,
            "zeroline": False,
            "showticklabels": False
        },
        "annotations": [{
            "text": "<a href='https://github.com/sabbirahm3d/music-genre-nw'>What is this?</a> | <a href='https://github.com/sabbirahm3d/music-genre-nw'>Repository</a>",
            "showarrow": False,
            "xref": "paper",
            "yref": "paper",
            "x": 0.005,
            "y": -0.002
        }],
    }

    dump_json(
        DATA_PATH + "plot_data.json",
        {"layout": layout,
         "data": [make_edge_trace(), make_node_trace()]}
    )
Пример #30
0
def main(args):
    # Open scene
    xm.blender.scene.open_blend(args.scene)

    # Remove existing cameras and lights, if any
    for o in bpy.data.objects:
        o.select = o.type in ('LAMP', 'CAMERA')
    bpy.ops.object.delete()

    # Load camera and light
    cam = load_json(args.cam_json)
    light = load_json(args.light_json)

    # Add camera and light
    cam_obj = xm.blender.camera.add_camera(xyz=cam['position'],
                                           rot_vec_rad=cam['rotation'],
                                           name=cam['name'],
                                           f=cam['focal_length'],
                                           sensor_width=cam['sensor_width'],
                                           sensor_height=cam['sensor_height'],
                                           clip_start=cam['clip_start'],
                                           clip_end=cam['clip_end'])
    xm.blender.light.add_light_point(xyz=light['position'],
                                     name=light['name'],
                                     size=light['size'])

    # Common rendering settings
    xm.blender.render.easyset(n_samples=args.spp, color_mode='RGB')

    # Image and texture resolution
    imw = args.imh / cam['sensor_height'] * cam['sensor_width']
    imw = safe_cast_to_int(imw)
    xm.blender.render.easyset(h=args.imh, w=imw)

    # Render full RGB
    # TODO: Render in .exr to avoid saturated pixels (and tone mapping)
    rgb_camspc_f = join(args.outdir, 'rgb_camspc.png')
    xm.blender.render.render(rgb_camspc_f)
    rgb_camspc = xm.io.img.load(rgb_camspc_f, as_array=True)[:, :, :3]

    # Render diffuse RGB
    obj = bpy.data.objects['object']
    make_diffuse(obj)
    diffuse_camspc_f = join(args.outdir, 'diffuse_camspc.png')
    xm.blender.render.render(diffuse_camspc_f, obj_names=obj.name)
    diffuse_camspc = xm.io.img.load(diffuse_camspc_f, as_array=True)[:, :, :3]

    # Render alpha
    alpha_f = join(args.outdir, 'alpha.png')
    xm.blender.render.render_alpha(alpha_f, samples=args.spp)
    alpha = xm.io.img.load(alpha_f, as_array=True)
    alpha = xm.img.normalize_uint(alpha)

    # Cast rays through all pixels to the object
    xs, ys = np.meshgrid(range(imw), range(args.imh))
    # (0, 0)
    # +--------> (w, 0)
    # |           x
    # |
    # v y (0, h)
    xys = np.dstack((xs, ys)).reshape(-1, 2)
    ray_tos, x_locs, x_objnames, x_facei, x_normals = \
        xm.blender.camera.backproject_to_3d(
            xys, cam_obj, obj_names=obj.name, world_coords=True)
    intersect = {
        'ray_tos': ray_tos,
        'obj_names': x_objnames,
        'face_i': x_facei,
        'locs': x_locs,
        'normals': x_normals
    }

    # Compute mapping between UV and camera space
    uv2cam, cam2uv = calc_bidir_mapping(obj,
                                        xys,
                                        intersect,
                                        args.uvs,
                                        cached_unwrap=args.cached_uv_unwrap)
    uv2cam = add_b_ch(uv2cam)
    cam2uv = add_b_ch(cam2uv)
    uv2cam[alpha < 1] = 0  # mask out interpolated values that fall outside
    xm.io.img.write_arr(uv2cam, join(args.outdir, 'uv2cam.png'), clip=True)
    xm.io.img.write_arr(cam2uv, join(args.outdir, 'cam2uv.png'), clip=True)
    save_float16_npy(uv2cam[:, :, :2], join(args.outdir, 'uv2cam.npy'))
    save_float16_npy(cam2uv[:, :, :2], join(args.outdir, 'cam2uv.npy'))

    # Compute view and light cosines
    lvis_camspc, cvis_camspc = calc_cosines(cam_obj.location,
                                            light['position'], xys, intersect,
                                            obj.name)
    lvis_camspc = xm.img.denormalize_float(np.clip(lvis_camspc, 0, 1))
    cvis_camspc = xm.img.denormalize_float(np.clip(cvis_camspc, 0, 1))
    xm.io.img.write_img(cvis_camspc, join(args.outdir, 'cvis_camspc.png'))
    xm.io.img.write_img(lvis_camspc, join(args.outdir, 'lvis_camspc.png'))

    # Remap buffers to UV space
    cvis = remap(cvis_camspc, cam2uv)
    lvis = remap(lvis_camspc, cam2uv)
    diffuse = remap(diffuse_camspc, cam2uv)
    rgb = remap(rgb_camspc, cam2uv)
    xm.io.img.write_img(cvis, join(args.outdir, 'cvis.png'))
    xm.io.img.write_img(lvis, join(args.outdir, 'lvis.png'))
    xm.io.img.write_img(diffuse, join(args.outdir, 'diffuse.png'))
    xm.io.img.write_img(rgb, join(args.outdir, 'rgb.png'))
    if args.debug:
        # Remap it backwards to check if we get back the camera-space buffer
        # TODO: UV wrapped images may have seams/holes due to interpolation
        # errors (fixable by better engineering), but this should be fine
        # because the network will learn to eliminate such artifacts in
        # trying to match the camera-space ground truth
        cvis_camspc_repro = remap(cvis, uv2cam)
        lvis_camspc_repro = remap(lvis, uv2cam)
        diffuse_camspc_repro = remap(diffuse, uv2cam)
        rgb_camspc_repro = remap(rgb, uv2cam)
        xm.io.img.write_img(cvis_camspc_repro,
                            join(args.outdir, 'cvis_camspc_repro.png'))
        xm.io.img.write_img(lvis_camspc_repro,
                            join(args.outdir, 'lvis_camspc_repro.png'))
        xm.io.img.write_img(diffuse_camspc_repro,
                            join(args.outdir, 'diffuse_camspc_repro.png'))
        xm.io.img.write_img(rgb_camspc_repro,
                            join(args.outdir, 'rgb_camspc_repro.png'))

    # Dump camera and light
    copyfile(args.cam_json, join(args.outdir, 'cam.json'))
    copyfile(args.light_json, join(args.outdir, 'light.json'))

    # Dump neighbor information
    cam_nn = load_json(args.cam_nn_json)
    light_nn = load_json(args.light_nn_json)
    cam_name = name_from_json_path(args.cam_json)
    light_name = name_from_json_path(args.light_json)
    nn = {'cam': cam_nn[cam_name], 'light': light_nn[light_name]}
    dump_json(nn, join(args.outdir, 'nn.json'))
Пример #31
0
                children += 1
                print("Current queue")
                pprint(self.queued[children:])
                print("Processing child:", children, "of queue length:",
                      len(self.queued))
                print()


if __name__ == '__main__':

    failed = set(read_json(DATA_PATH + "failed.json")["genres"])
    genre_list = set(read_json(DATA_PATH + "genres.json")["genres"]) - failed

    for genre in genre_list:

        try:

            obj = Subgenres(genre, failed)
            obj.get_children_subtrees()
            failed.update(obj.failed)

        except KeyboardInterrupt:

            print("\b\bSCRAPING CANCELED\n")
            break

    print("FAILED QUEUE")
    failed = sorted(list(failed))
    pprint(failed)
    dump_json(DATA_PATH + "failed.json", {"genres": failed})