def epoch_step(self, logs={}):
        for (k, v) in logs.items():
            l = self.H.get(k, [])
            # np.float32会报错
            if not isinstance(v, np.float):
                v = round(float(v), 4)
            l.append(v)
            self.H[k] = l

        # 写入文件
        if self.json_path is not None:
            piop.write_json(self.json_path, self.H)

        # 保存train图像
        if len(self.H["loss"]) == 1:
            self.paths = {
                key: self.file_dir / (self.arch + f'_{key.upper()}')
                for key in self.H.keys()
            }

        if len(self.H["loss"]) > 1:
            # 指标变化
            # 曲线
            # 需要成对出现
            keys = [key for key, _ in self.H.items() if '_' not in key]
            for key in keys:
                N = np.arange(0, len(self.H[key]))
                plt.style.use("ggplot")
                plt.figure()
                plt.plot(N, self.H[key], label=f"train_{key}")
                plt.plot(N, self.H[f"valid_{key}"], label=f"valid_{key}")
                if self.add_test:
                    plt.plot(N, self.H[f"test_{key}"], label=f"test_{key}")
                plt.legend()
                plt.xlabel("Epoch #")
                plt.ylabel(key)
                plt.title(f"Training {key} [Epoch {len(self.H[key])}]")
                plt.savefig(str(self.paths[key]))
                plt.close()
Пример #2
0
                    fname = os.path.join(segpos_path, file)
                    cate_data = piop.read_json(fname)
                    tmp_duty_list.extend(cate_data['duty'])
                    tmp_require_list.extend(cate_data['require'])
                    tmp_dtimes_list.extend(cate_data['dtimes'])

            tmp_duty = get_common(tmp_duty_list, IGNORE, NEEDPOS)
            tmp_require = filter_require(
                tmp_duty, get_common(tmp_require_list, IGNORE, NEEDPOS))

            tmp[post]['duty'] = get_need_item(tmp_duty)
            tmp[post]['require'] = get_need_item(tmp_require)
            tmp[post]['demand'] = get_dtime_item(tmp_dtimes_list)

            duty_list.extend(tmp_duty_list)
            require_list.extend(tmp_require_list)
            dtimes_list.extend(tmp_dtimes_list)

        duty = get_common(duty_list, IGNORE, NEEDPOS)
        require = filter_require(duty, get_common(require_list, IGNORE,
                                                  NEEDPOS))

        res[cate]['duty'] = get_need_item(duty)
        res[cate]['require'] = get_need_item(require)
        res[cate]['demand'] = get_dtime_item(dtimes_list)
        res[cate]['posts'] = tmp
    piop.write_json(os.path.join(model_path, "model.txt"),
                    res,
                    indent=4,
                    ensure_ascii=False)
Пример #3
0
    return res


def flat_all_cates():
    all_items = []
    for cate, sub_cate in cate_gw.items():
        for scate, gangwei in sub_cate.items():
            gangwei_list = gangwei.split(";")
            gangwei_title_list = cate_url[cate][scate].split(";")
            for i, gw in enumerate(gangwei_list):
                all_items.append((cate, scate, gw, gangwei_title_list[i]))
    return all_items

if __name__ == '__main__':
    all_items = flat_all_cates()
    for item in all_items:
        url_dict = {}
        cate = item[0]
        gangwei = item[1]
        zw = item[2]
        title = item[3]
        urlist = get_urlist(title)
        # 不满 30 页的
        if len(urlist) < 15*30:
            print(cate, "\t", gangwei, "\t", zw, "\t", len(urlist))
        url_dict['position'] = cate + "_" + gangwei + "_" + zw + "_" + title
        url_dict['urlist'] = urlist
        out_fpath = os.path.join(urlist_path, url_dict['position'] + ".txt")
        piop.write_json(out_fpath, url_dict, indent=4, ensure_ascii=False)
    
Пример #4
0
def test_write_json():
    data = {"outjson1": "this is line 1.", "outjson2": "这是第二行。"}
    write_json(os.path.join(DATA_PATH, 'outjson.json'),
               data,
               indent=4,
               ensure_ascii=False)
Пример #5
0

def segpos(text: str) -> list:
    res = []
    try:
        bd_resp = client.lexer(text)
    except Exception as e:
        print("BaiDu Error:", e)
    for item in bd_resp['items']:
        pos = item['pos']
        w = item['item']
        res.append((w, pos))
    return res


if __name__ == '__main__':
    for file in os.listdir(extract_path):
        if file[0] == '.':
            continue
        fname = os.path.join(extract_path, file)
        outname = os.path.join(segpos_path, file)
        if os.path.exists(outname):
            continue
        data = dict()
        cate, fduty, frequire, fdtimes = get_cate_res(fname)
        data['cate'] = cate
        data['duty'] = fduty
        data['require'] = frequire
        data['dtimes'] = fdtimes
        piop.write_json(outname, data, indent=4, ensure_ascii=False)