def get_urls(url, selector):
    resp = get_html(url, selector)
    html = fromstring(resp)

    if 'weixin.qq.com' in url:
        image_urls = html.xpath('//img/@data-src')
    else:
        image_urls = html.xpath('//img/@src')
    image_urls = [normalize_image_url(url, image_url) for image_url in image_urls]
    return image_urls
示例#2
0
def get_urls(url, selector):
    resp = get_html(url, selector)
    html = fromstring(resp)

    if 'weixin.qq.com' in url:
        image_urls = html.xpath('//img/@data-src')
    else:
        image_urls = html.xpath('//img/@src')
    image_urls = [
        normalize_image_url(url, image_url) for image_url in image_urls
    ]
    return image_urls
示例#3
0
from upload2cos import upload_image
from watermark import watermark_text, watermark_overlay

# add argument

parser = argparse.ArgumentParser()
parser.add_argument('url', help='target url page')
parser.add_argument('selector', help='target selector')

args = parser.parse_args()
url = args.url
selector = args.selector

# get html, convert it to md

html = get_html(url, selector)
md = html2md(html)
save_md(md)

# find images on the url page

image_urls = get_urls(url, selector)

# upload image to COS, replace with COS access url

for image_url in image_urls:
    print(image_url)
    image_path = download_image(image_url)
    print(image_path)
    new_url = upload_image(image_path)
    print(new_url)