Python clean_str_list示例

编程语言: Python

命名空间/包名称: utils

方法/功能: clean_str_list

hotexamples.com的示例: 2

Python clean_str_list - 已找到2个示例。这些是从开源项目中提取的最受好评的utils.clean_str_list现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： ngamm.py 项目： tuchang/ngamm

def fetch_post_image_links(url, post_pages):
    print_log('开始收集帖子全部图片链接')
    post_content_reg = re.compile(setting.post_content_pattern, re.S)
    link_reg = re.compile(setting.img_link_pattern)
    link_reg2 = re.compile(setting.img_link_with_third_site_pattern)
    img_links = []
    for page in range(1, post_pages + 1):
        print '当前页数:%s\r' % page,
        curl_page_url = utils.make_url_with_page_num(url, page)
        whole_content = get_url_content(curl_page_url)
        if whole_content:
            content = (re.findall(post_content_reg, whole_content))[0]
            origin_img_links = re.findall(link_reg, content) + re.findall(link_reg2, content)
            img_links += [utils.make_real_img_link(link) for link in origin_img_links]
    print_log('收集链接完毕')
    return sorted(utils.clean_str_list(img_links))

示例#2

显示文件

文件： ngamm.py 项目： tuchang/ngamm

def remove_repeat_img_links(record_file_name, img_links):
    if os.path.exists(record_file_name):
        download_records_file = open(record_file_name, 'r+')
        # 下载过的图片链接
        existed_links = utils.clean_str_list(download_records_file.readlines())
        # 新增链接（所有链接减去已下载链接）
        new_links = sorted(list(set(img_links) - set(existed_links)))
        # 将新增连接追加写入至记录文件 （废弃，改为下好一张存一张的地址）
        # download_records_file.writelines([link + '\n' for link in new_links])
        download_records_file.close()
        return new_links
    else:
        # download_records_file = open(record_file_path, 'w')
        # download_records_file.writelines([link + '\n' for link in img_links])
        # download_records_file.close()
        return img_links