Exemplo n.º 1
0
def batch_recongnize():
    global page
    index_position = 0
    totalpage = 1
    region_arr = []
    while True:
        if page > totalpage:
            break
        totalpage, items = get_region_by_page(page)
        print(f"【page={page}】")
        print(f"【totalpage={totalpage}】")
        page += 1
        if items and totalpage > 0:
            for item in items:
                address = {
                    "departmentId": str(item['department_id']),
                    "city": item['city'],
                    "region": item['region'],
                    "department": item['title'],
                    "road": item['road']
                }
                region_arr.append(address)
                index_position += 1
                region_str = item['title']
                print(f"【location_str.{index_position}={region_str}】")

    path = f'{time_util.now_to_date("%Y-%m-%d")}_dict_department_region_all.csv'
    keys = csv_util.get_head_from_arr(region_arr)
    csv_util.create_csv(path, keys, force=True)
    csv_util.append_csv(path, region_arr)
Exemplo n.º 2
0
def extra_region(path='2021-03-29_dict_department_shanghai.csv'):
    # 2.读csv示例
    i = 0
    data = csv_util.read_csv2array(path)
    array = []
    error_array = []
    for index, item in enumerate(data):
        region = item['小区']
        # region = trim_region(region)
        if region:
            address = {}
            address['departmentId'] = index
            address['city'] = item['城市']
            address['region'] = item['区域']
            address['department'] = item['小区']
            roadStr = item['道路号']
            if roadStr:
                index = roadStr.index(")")
                roadStr = str.strip(roadStr[index + 1:]).replace(',', ',')
                address['road'] = roadStr
            else:
                address['road'] = ''
            if not has_error(region):
                array.append(address)
                i = i + 1
                print(f"【main({i}).road={address['city']}.{region}】")
            else:
                error_array.append(address)

    path = 'dict_department_region.csv'
    keys = csv_util.get_head_from_arr(array)
    print(f"【main().keys={keys}】")
    csv_util.create_csv(path, keys, force=True)
    csv_util.append_csv(path, array)

    path = 'dict_department_region_error.csv'
    keys = csv_util.get_head_from_arr(error_array)
    print(f"【main().keys={keys}】")
    csv_util.create_csv(path, keys, force=True)
    csv_util.append_csv(path, error_array)
Exemplo n.º 3
0
def batch_recongnize():
    global page
    index_position = 0
    totalpage = 1
    location_arr = []
    md5_contents = []
    while True:
        if page > totalpage:
            break
        totalpage, items = get_moment_by_page(page)
        print(f"【page={page}】")
        print(f"【totalpage={totalpage}】")
        if items and totalpage > 0:
            for item in items:
                location_str = item['content'].replace('\n',
                                                       '').replace('\r', '')
                print(f"【location_str.{index_position}={location_str}】")
                index_position += 1
                location_arr.append(location_str)
                md5_contents.append(item['content_md5'])
            page += 1
    addresss = get_address_by_custom_lac(location_arr)
    address_arr = []
    for index, item in enumerate(addresss):
        loc_from = getLocFrom(item)
        if loc_from:
            print(f"【文本内容:{location_arr[index]}】")
            print(f"【检测到地址:{loc_from}】")
            print(f"【检测到地址:{'/'.join(loc_from)}】")
            item = {}
            item['md5_content'] = md5_contents[index]
            item['address'] = '/'.join(loc_from)
            address_arr.append(item)
    path = f"address_{time_util.now_to_date('%Y-%m-%d')}.csv"
    keys = csv_util.get_head_from_arr(address_arr)
    if not os.path.exists(path):
        csv_util.create_csv(path, keys)
    csv_util.append_csv(path, address_arr)
Exemplo n.º 4
0
        load_dict = json.load(load_f)
    return load_dict


def extra_region(arr):
    regions = []
    citys = []
    for index, region in enumerate(arr):
        address = f"{region['city']}|{region['region']}"
        regions.append(address)
    regions_norepeat = list(set(regions))
    # dumps = json.dumps(regions_norepeat, indent=4, ensure_ascii=False)
    # print(f"【extra_region({len(regions_norepeat)}条).regions_norepeat={dumps}】")
    return regions_norepeat


if __name__ == '__main__':
    count = 0
    # batch_recongnize()
    arr = read_csv2array("2021-02-08_dict_department_region_all.csv")
    region_arr = []
    for item in arr:
        if '上海' in item['city'] and not "上海周边" in item['region']:
            region_arr.append(item)
            count = count + 1
    print(f"【().count={count}】")
    path = f'{time_util.now_to_date("%Y-%m-%d")}_dict_department_shanghai.csv'
    keys = csv_util.get_head_from_arr(region_arr)
    csv_util.create_csv(path, keys, force=True)
    csv_util.append_csv(path, region_arr)
Exemplo n.º 5
0

if __name__ == '__main__':
    full_dir = FilePathUtil.get_full_dir("./common/20210207wx_contacts_moments.xls")
    array = excel2array(full_dir, "20210207")
    array_new = []
    for item in array:
        if item['phone']:
            contact = {}
            # nick_name, wx_number, phone
            contact["姓名"] = item['nick_name']
            contact["电话"] = item['phone']
            contact["备注"] = item['wx_number']
            array_new.append(contact)
    path = f'{time_util.now_to_date("%Y-%m-%d")}wx_contacts_moments.csv'
    keys = csv_util.get_head_from_arr(array_new)
    csv_util.create_csv(path, keys, force=True, encoding=ENCODING_GBK)
    csv_util.append_csv(path, array_new, encoding=ENCODING_GBK)
    # datas = [{'id': 1, 'text': "dsd", "abs": 1},
    #          {'id': 2, 'text': "dsd2"},
    #          {'id': 3, 'text': "dsd3"}]
    # write_excel(full_dir, 'test', datas)
    # full_dir = FilePathUtil.get_full_dir("wxfriend", "excel", "text")
    # full_dir = FilePathUtil.get_lastmodify_file(full_dir)
    # appendData(full_dir, datas)
    # full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("wxfriend", "excel", "pic"))
    # get_full_dir = FilePathUtil.get_full_dir("wxfriend", "excel", "pic", 'test.xls')
    # utf16leToUtf8(full_dir, get_full_dir)
    # content = "🔥《臻水岸》🔥类独栋🌼6米客厅挑空.预留电梯井.🏠带花园、双车位、南北双露台🚗毛坯.款清交房.最后10套💰总价约1300-1500万左右🎁稀缺房源,错过再无看房热线:15921824193"
    # encode = content.encode(encoding='utf-8')
    # decode = encode.decode(encoding="utf-8")
Exemplo n.º 6
0
def extra_road():
    # 2.读csv示例
    i = 0
    data = csv_util.read_csv2array("2021-03-29_dict_department_shanghai.csv")
    array = []
    road_strip_array = []
    for index_position, item in enumerate(data):
        road = item['road']
        if road:
            index = road.index(")")
            roadStr = str.strip(road[index + 1:]).replace(',', ',')
            split = [roadStr]
            if ',' in roadStr:
                split = roadStr.split(",")
            elif ';' in roadStr:
                split = roadStr.split(";")
                # print(f"【main(分号分割).roadStr={roadStr}】")
            for split_item in split:
                strip = str.strip(split_item)
                strip = str.strip(split_item).replace('(', '').replace(')', '') \
                    .replace('(', '').replace(')', '')
                road_strip = strip.strip()
                if road_strip:
                    # if len(road_strip) > 0 and ('(' in road_strip or '(' in road_strip):
                    #     i = i + 1
                    #     address = {}
                    #     address['id'] = index_position
                    #     address['road'] = road_strip
                    #     # address['department'] = road_strip
                    #     array.append(address)
                    #     print(f"【is_error_format({i}).road={road_strip}】")
                    road_strip = trim_road(road_strip)
                    # if is_error_format(road_strip):
                    #     i = i + 1
                    #     print(f"【is_error_format({i}).road={road_strip}】")
                    #     continue
                    road_strip = road_strip.replace('(', '').replace(')', '') \
                        .replace('(', '').replace(')', '')
                    if is_road_type(
                            road_strip) and not road_strip in road_strip_array:
                        # if '上海' in item['城市']:
                        if len(road_strip) < 11:
                            address = {}
                            address['id'] = index_position
                            address['city'] = item['city']
                            address['region'] = item['region']
                            address['road'] = road_strip
                            road_strip_array.append(road_strip)
                            address['department'] = item['department']
                            array.append(address)

                    else:
                        # i = i + 1
                        # print(f"【main({i}).road={road_strip}】")
                        pass
        else:
            pass
            print(f"没有道路号的小区【{index_position}.{item}】")
            address = {}
            address['id'] = index_position
            address['city'] = item['city']
            address['region'] = item['region']
            address['road'] = item['department']
            road_strip_array.append(address['road'])
            address['department'] = item['department']
            array.append(address)

    # 1.写csv示例
    # array = [{'name': '小红', 'sex': 12}, {'name': '小王', 'sex': 112}]
    path = 'dict_department_road_street.csv'
    # path = 'dict_department_region.csv'
    keys = csv_util.get_head_from_arr(array)
    print(f"【main().keys={keys}】")
    csv_util.create_csv(path, keys, force=True)
    csv_util.append_csv(path, array)
Exemplo n.º 7
0
 array = csv_util.read_csv2array("dict_department_road_street.csv")
 road_strip_array = []
 road_array = []
 for index, item in enumerate(array):
     road_strip = item['road']
     searchs = re_util.find_texts_by_reg(r'(.*?)\d+', road_strip)
     if searchs:
         road_strip = searchs[0]
         # res = trim_road_name(keywords, road_strip)
         if not road_strip in road_strip_array:
             count += 1
             road_strip_array.append(road_strip)
             item['id'] = count
             item['road_name'] = road_strip
             del item['road']
             del item['department']
             road_array.append(item)
     else:
         count += 1
         road_strip_array.append(road_strip)
         item['id'] = count
         item['road_name'] = road_strip
         del item['road']
         del item['department']
         road_array.append(item)
         print(f"【({count}).road_strip={road_strip}】")
 path = 'dict_department_road_name.csv'
 keys = csv_util.get_head_from_arr(road_array)
 print(f"【main().keys={keys}】")
 csv_util.create_csv(path, keys, force=True)
 csv_util.append_csv(path, road_array)