Пример #1
0
def all_properties():
    h = get_pq(PROPERTIES_REFERENCE)
    res = OrderedDict((property_name_from_href(q(a).attr("href")), {
        "title": " ".join(a.text.split()),
        "description": q(a).attr("href")
    }) for a in h('#divContent li a'))
    return res
Пример #2
0
    async def on_callback_query(self, msg):
        query_id, from_id, data = telepot.glance(msg, flavor='callback_query')
        print('Callback query:', query_id, from_id, data)
        if self._message_with_inline_keyboard:
            if data == "appmedia_ssr":
                rank_url = "https://appmedia.jp/fategrandorder/96261"
            if data == "appmedia_sr":
                rank_url = "https://appmedia.jp/fategrandorder/93558"

            r = requests.get(rank_url)
            if r.status_code == 200:
                rq = q(r.content.decode("utf-8"))
                tables = rq(".post-content > table")
                table_rank = q((tables)[6])
                # remove thead
                table_trs = table_rank("tr")[1:]
                reply = "%s\n" % rank_url
                reply += "サーヴァント 総合 高難 周回\n"
                for i in table_trs:
                    qi = q(i)
                    len_children = len(qi.children())
                    if len_children == 5:
                        qic = qi("td")
                        qia = qi("a")
                        qimg = qi("img")[2]
                        sougou = q(qimg).attr("src").split('/')[-1].rstrip('.png')
                        if sougou == "A-1":
                            sougou = "A+"
                        reply += "[{}]({}) {} {} {}\n".format(q(qic[0]).text(), qia.attr("href"), sougou, q(qic[3]).text(), q(qic[4]).text())
            await self.sender.sendMessage(reply, parse_mode="Markdown")
def parse_properties_from_href(href):
    h = tools.get_pq(href)
    dl = h('#main-col-body .variablelist dl').filter(
        lambda i: 'Type :' in q(this).text()
    )
    #pretty_print_element(dl)
    #import pdb; pdb.set_trace()
    pairs = zip(dl.children('dt'), dl.children('dd'))
    pairs = [(q(dt), q(dd)) for dt, dd in pairs]

    properties = OrderedDict(
        (dt.text().split()[0], get_type(dt, dd))
        for dt, dd in pairs
    )

    required = [
        k.text()
        for k, v
        in pairs
        if v('p').filter(
            lambda i: 'Required : Yes' in q(this).text() and
            not 'Yes, for VPC security groups' in q(this).text()
        )
    ]

    return properties, required
Пример #4
0
def get_page_info(url):
    t = ''
    # 获取页面信息
    re = requests.get(url)
    # 使用pyqyery解析得到标题和内容
    info = q(re.text)
    # 获取标题
    title = info('body > div.qq_conent.clearfix > div.LEFT > h1').text()
    t += title + '\n\n' + url + '\n'
    # 我发现页面js中含有一个json串,含有新闻的基本信息,切片获取后用json解析
    # 获取js语句,切片后得到json串
    aaa = info('head > script:last').text()[14:]
    try:
        j = json.loads(aaa, encoding='UTF-8')
    except JSONDecodeError as e:
        print(e)
    # 发表媒体
    media = j['media']
    t += '作者:' + media + '\n'
    # 发布时间
    pubtime = j['pubtime']
    t += '发布时间:' + pubtime + '\n'
    # 新闻标签
    tags = j['tags']
    t += '关键字:' + tags + '\n\n'
    # 新闻内容
    ps = info(
        'body > div.qq_conent.clearfix > div.LEFT > div.content.clearfix > div.content-article > p'
    )
    for p in ps:
        # 每次一段
        t += q(p).text()
        t += '\n\n'
    return t
Пример #5
0
def fit2(url):
    s = q(url, headers=headers)
    r = s('div').filter('.chapter ')
    name1 = s('h2').text()
    for i in r:
        a = q(i)
        name2 = re.findall(r'第\d.+?(?=&)',str(a))
        link = a('a').filter('.J-media-item')
        for i in link:
            link = re.findall(r'(?<=video/).+?(?=")', str(q(i)))
            b = q(i).text()
            name3 = re.findall(r'\d-\d.+(?=\r)', str(b))
            if link:
                url = 'http://www.imooc.com/course/ajaxmediainfo/?mid=' + link[-1] + '&mode=flash'
                r = requests.get(url)
                r = r.json()
                r = r['data']['result']['mpath']
                H = r[-1]#BD
                M = r[-2]#HD
                L = r[-3]#SD
                mkdr = '.\\' + name1 + '\\' + name2[-1]
                if os.path.exists(mkdr)==False:            
                    os.makedirs(mkdr)
                name = name1 + '\\' + name2[-1] + '\\' + name3[-1] + '.mp4'
                thre.download( H, name, blocks=3, proxies={} )
Пример #6
0
def fit3_1(url):
    s = q(url, headers=headers)
    r = s('div').filter('.moco-course-wrap')
    for i in r:
        link = re.findall(r'learn/\d{1,6}(?=")', str(q(i)))
        if link:
            ur = 'http://www.imooc.com/' + link[-1]
            print(ur)
            fit2(ur)
def all_properties():
    h = get_pq(PROPERTIES_REFERENCE)
    res = OrderedDict(
        (
            property_name_from_href(q(a).attr("href")),
            {
                "title": " ".join(a.text.split()),
                "description": q(a).attr("href")
            }
        ) for a in h('#divContent li a'))
    return res
Пример #8
0
def page_get(root_url):
    resp = requests.get(root_url)
    html = resp.text
    query = q(html)

    url_list = []
    for res_page in query.find('.hsbn'):
        href = q(res_page).attr('href')
        url = root_url.replace('futaba.htm', '')
        url_list.append(os.path.join(url, href))

    return url_list
Пример #9
0
def parse_parameters():
    parameters_href = tools.BASE + 'parameters-section-structure.html'
    h = tools.get_pq(parameters_href)
    dl = h('#main-col-body .variablelist dl').eq(0)
    dl = q(dl)
    dl = zip(dl.children('dt'), dl.children('dd'))
    dl = OrderedDict((q(dt).text(), q(dd)) for dt, dd in dl)
    result = OrderedDict()
    result['Type'] = parse_paremeter_types(dl.pop('Type'))
    for dt in dl.keys():
        result[dt] = {'type': 'string'}
    result['AllowedValues']['type'] = 'array'
    result['NoEcho']['type'] = ['string', 'boolean']
    return result
Пример #10
0
 async def auto_join(self, response):
     doc = q(response.content)
     for component in doc('[id][state]'):
         tag_name = component.get('is') or component.tag
         state = json.loads(component.get('state'))
         component_id = self.add_component(tag_name, state)
         await self.send_join(component_id)
Пример #11
0
def all_resource_properties_hrefs():
    h = get_pq(
        'http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-product-property-reference.html'
    )
    res = OrderedDict((a.attr('href'), a.text())
                      for a in [q(a) for a in h('#divContent li a')])
    return res
def get_pq(uri=BASE + 'aws-template-resource-type-ref.html'):
    h = q(uri, headers={
        'user-agent':
        'https://github.com/fungusakafungus/cloudformation-jsonschema'
    })
    h.make_links_absolute()
    return h
Пример #13
0
def parse_paremeter_types(dd):
    types = [q(dt).text() for dt in dd('dl dt')]
    types += ['List<String>']  # undocumented
    result = OrderedDict()
    result['type'] = 'string'
    result['enum'] = types

    return result
Пример #14
0
def get_pq(uri=BASE + 'aws-template-resource-type-ref.html'):
    h = q(uri,
          headers={
              'user-agent':
              'https://github.com/fungusakafungus/cloudformation-jsonschema'
          })
    h.make_links_absolute()
    return h
Пример #15
0
def getUserUploads(username):
	doc = q("https://www.youtube.com/user/%s/videos" % username)
	vids = doc('#channels-browse-content-grid .yt-lockup-title a[href^="/watch"]')
	vids = vids.map(lambda i, e: {
		'id':videoIdPattern.findall(q(e).attr('href')),
		'title':q(e).attr('title')
	})

	for v in vids:
		if len(v['id']) and v['title']:
			if v['id'][0]==latest:
				break
			notifyVideo(v['id'][0],v['title'])

	f = open('latest.sav','w')
	f.write(vids[0]['id'][0])
	f.close()
Пример #16
0
def fit4(url):
    s = q(url, headers=headers)
    r = s('div').filter('.item')
    link = re.findall(r'c=.+?(?=")', str(r))
    for li in link:
        url = 'http://www.imooc.com/course/list?' + li
        print(url)
        fit3(url)
Пример #17
0
def all_resource_hrefs():
    h = get_pq(
        'http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html'
    )
    all_resource_hrefs = OrderedDict(
        (a.text(), a.attr('href'))
        for a in [q(a) for a in h('#divContent li a')])
    return all_resource_hrefs
Пример #18
0
def fit3(url):
    s = q(url, headers=headers)
    r = s('div').filter('.page')
    if r:
        pages = re.findall(r'\d', str(r.text()))
        for page in pages:
            urls = url + '&page=' + page
            fit3_1(urls)
    else:
        fit3_1(url)
def all_resource_properties_hrefs():
    h = get_pq(BASE + 'aws-product-property-reference.html')
    res = OD(
        (a1.attr('href'), a1.text())
        for a1 in [q(a)
                   for a
                   in h('#main-col-body li a')
                   ]
    )
    return res
Пример #20
0
def set_resource_properties(res_type):
    all = all_resource_hrefs()
    h = get_pq(all[res_type])
    schema = load()
    dl = h('#divContent .variablelist dl').eq(0)
    resources = resources_dict(schema)
    pairs = zip(dl('dt'), dl('dd'))
    pairs = [(q(dt), q(dd)) for dt, dd in pairs]
    shortcut = resources[res_type]['properties']
    shortcut['Properties'] = OrderedDict()
    shortcut['Properties']['properties'] = OrderedDict(
        (dt.text(), get_type(dd)) for dt, dd in pairs)
    required = [
        k.text() for k, v in pairs
        if v('p').filter(lambda i: 'Required : Yes' in q(this).text())
    ]
    if required:
        shortcut['Properties']['required'] = required
        resources[res_type]['required'] = ['Properties']
    return schema
def all_res_properties():
    h = tools.get_pq(PROPERTIES_REFERENCE)
    res = OrderedDict()
    for a in h('#main-col-body li a'):
        href = q(a).attr("href")
        res[property_name_from_href(href)] = OD((
            ("title", " ".join(a.text.split())),
            ("descriptionURL", href),
            ("type", "object"),
        ))
    return res
Пример #22
0
def getPageText(base_url, next_page):
    result = requests.get(base_url + next_page)
    result.encoding = 'UTF-8'
    t = q(result.text)
    title = t('div.zhong').text()
    print(title)
    text = t('article#nr').text()
    # TODO 在这里持久化
    serializeToFile('D:\myCode\python\crawler\gyzz.txt', title, text)
    exist_next_page = False
    for aa in t('a.dise'):
        if aa.text == '下一页':
            next_page = q(aa).attr.href
            print(next_page)
            exist_next_page = True
            break

    if not exist_next_page:
        return

    getPageText(base_url, next_page)
Пример #23
0
def fit1(url):
    link = url.split('/', -1)[-1]
    ul = 'http://www.imooc.com/course/ajaxmediainfo/?mid=' + link + '&mode=flash'
    s = q(url, headers=headers)
    r = s('em').text()
    video = re.findall(r'\d-.+?(?=\d)', r)
    name = video[-1] + '.mp4'
    r = requests.get(ul)
    r = r.json()
    r = r['data']['result']['mpath']
    H = r[-1]#BD
    M = r[-2]#HD
    L = r[-3]#SD
    thre.download( H, name, blocks=4, proxies={} )
def set_resource_properties(res_type):
    all = all_resource_hrefs()
    h = get_pq(all[res_type])
    schema = load()
    dl = h('#divContent .variablelist dl').eq(0)
    resources = resources_dict(schema)
    pairs = zip(dl('dt'), dl('dd'))
    pairs = [(q(dt), q(dd)) for dt, dd in pairs]
    shortcut = resources[res_type]['properties']
    shortcut['Properties'] = OrderedDict()
    shortcut['Properties']['properties'] = OrderedDict(
        (dt.text(), get_type(dd))
        for dt, dd in pairs
    )
    required = [
        k.text()
        for k, v
        in pairs
        if v('p').filter(lambda i: 'Required : Yes' in q(this).text())
    ]
    if required:
        shortcut['Properties']['required'] = required
        resources[res_type]['required'] = ['Properties']
    return schema
def get_type(dt, dd_):
    dd = dd_('p').filter(lambda x: q(this).text().startswith('Type'))
    t = dd.text().lower()
    for pattern, schema_fragment in type_patterns:
        if pattern in t:
            return schema_fragment
    if dd('a'):
        return property_ref(dt, dd_, dd, t)
    if dd_('.type') and len(dd_('.type')):
        if (dd_('.type').text() == 'AWS::EC2::SecurityGroup' and
                'list of' in t):
            return {"$ref": "basic_types.json#/definitions/list<string>"}

    ind = t.find('type :')
    extract = t[ind:ind + 50]
    log.warning('Could not parse resource property type: "%s"\n"%s"', extract, dd_.html())
    return {'description': dd_.html()}
Пример #26
0
    def apply_diff(self, html_diff):
        html = []
        cursor = 0
        for diff in html_diff:
            if isinstance(diff, str):
                html.append(diff)
            elif diff < 0:
                cursor -= diff
            else:
                html.append(self.last_received_html[cursor:cursor + diff])
                cursor += diff
        self.last_received_html = ''.join(html)
        self.doc = q(self.last_received_html)

        state = self.doc.attr['state']
        if state:
            self.state = json.loads(state)
def get_type(dd_):
    dd = dd_('p').filter(lambda x: q(this).text().startswith('Type'))
    t = dd.text().lower()
    if 'type : string' in t:
        return {'type': 'string'}
    if 'list of strings' in t:
        return {'type': 'array', 'items': {'type': 'string'}}
    if 'type : integer' in t:
        return {'type': 'integer'}
    if 'type : boolean' in t:
        return {'type': 'boolean'}
    if dd('a'):
        return property_ref_from_href(dd('a').attr('href'))
    if dd_('.type') and len(dd_('.type')):
        if (dd_('.type').text() == 'AWS::EC2::SecurityGroup' and
                'list of' in t):
            return {'type': 'array', 'items': {'type': 'string'}}

    warn('Could not parse resource property type: "%s"', dd_.html())
    return {}
Пример #28
0
def get_type(dd_):
    dd = dd_('p').filter(lambda x: q(this).text().startswith('Type'))
    t = dd.text().lower()
    if 'type : string' in t:
        return {'type': 'string'}
    if 'list of strings' in t:
        return {'type': 'array', 'items': {'type': 'string'}}
    if 'type : integer' in t:
        return {'type': 'integer'}
    if 'type : boolean' in t:
        return {'type': 'boolean'}
    if dd('a'):
        return property_ref_from_href(dd('a').attr('href'))
    if dd_('.type') and len(dd_('.type')):
        if (dd_('.type').text() == 'AWS::EC2::SecurityGroup'
                and 'list of' in t):
            return {'type': 'array', 'items': {'type': 'string'}}

    warn('Could not parse resource property type: "%s"', dd_.html())
    return {}
Пример #29
0
def find_route(src, dst, dt, options):
    def _opt_to_char(opt):
        return '1' if options[opt] else '0'

    head_info = '{} ==> {} at {}'.format(src, dst, dt.strftime('%Y-%m-%d %H:%M'))
    print(head_info)
    dt_str = dt.strftime('%Y%m%d%H%M')
    root = q(str(requests.get(
        'https://transit.yahoo.co.jp/search/result',
        params={
            'flatlon': '',
            'from': src,
            'tlatlon': '',
            'to': dst,
            'viacode': '',
            'via': '',
            'y': dt_str[:4],
            'm': dt_str[4:6],
            'd': dt_str[6:8],
            'hh': dt_str[8:10],
            'm1': dt_str[10:11],
            'm2': dt_str[11:12],
            'ticket': 'ic',
            'expkind': '1',
            'ws': '3',
            's': '0',
            'al': '1',
            'shin': _opt_to_char('shinkansen'),
            'ex': _opt_to_char('limited_express'),
            'hb': '1',
            'lb': '1',
            'sr': '1',
            'kw': '',
        }).content, 'utf-8'))

    RouteDisplay(options).format_root(root)

    print('=' * 80)
    print(head_info)
Пример #30
0
 def __init__(self, *args, **kwargs):
     super(Fgobot, self).__init__(*args, **kwargs)
     self._count = 0
     self._servants = OrderedDict()
     self._message_with_inline_keyboard = None
     s_table = requests.get("https://www9.atwiki.jp/f_go/pages/671.html")
     q_table = q(s_table.content.decode("utf-8"))
     q_table = q_table("#wikibody table")
     t_list = list(q_table('tr').items())
     for i in t_list:
         sclass = q(i('td')[0]).text()
         if sclass:
             self._servants[sclass] = OrderedDict()
             for n, j in enumerate(i('td')[1:]):
                 t_list_tr_text = q(j).text()
                 if t_list_tr_text:
                     servants_text = ""
                     pqj = q(j)
                     for s in pqj('a'):
                         servants_text += "%s\nhttps:%s\n" % (q(s).text(), q(s).attr("href"))
                     self._servants[sclass][n] = servants_text
Пример #31
0
def all_resource_hrefs():
    h = get_pq(BASE + 'aws-template-resource-type-ref.html')
    all_resource_hrefs = OD((a1.text().strip(), a1.attr('href'))
                            for a1 in [q(a) for a in h('#main-col-body li a')])
    return all_resource_hrefs
Пример #32
0
def all_resource_hrefs():
    h=get_pq('http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html')
    all_resource_hrefs = OrderedDict((a.text(), a.attr('href')) for a in [q(a) for a in h('#divContent li a')])
    return all_resource_hrefs
Пример #33
0
def parse(target_url_list):
    for url in target_url_list:
        resp = requests.get(url)
        html = resp.text
        query = q(html)
        dig_image(query)
Пример #34
0
def _no_comment(i):
    for o in i:
        if isinstance(o, HtmlComment):
            continue
        yield q(o)
Пример #35
0
    def reply_text(self, text):
        reply = "meow"
        markup = ReplyKeyboardRemove()
        if text.startswith("/appmedia"):
            reply = "Which ranking?"
            markup = InlineKeyboardMarkup(inline_keyboard=[
                [InlineKeyboardButton(text='Appmedia SSR Ranking', callback_data='appmedia_ssr')],
                [InlineKeyboardButton(text='Appmedia SR Ranking', callback_data='appmedia_sr')]
            ])
        if text.startswith("/servant"):
            keyboards = []
            for i in self._servants.keys():
                keyboards.append([KeyboardButton(text="★{} {}".format(j, i)) for j in self._servants[i].keys()])
            markup = ReplyKeyboardMarkup(keyboard=keyboards, one_time_keyboard=True)

            # "Saber", "Archer", "Lancer", "Rider", "Caster", "Assassin", "Berserker", "Shielder", "Ruler", "Avenger", "MoonCancer", "AlterEgo", "Foreigner"
            reply = "Please choose class and rare"

        if text.startswith("★"):
            fgorare, fgoclass = text.split()
            fgorare = int(fgorare[1])
            reply = self._servants[fgoclass][fgorare]
        if text.startswith("/hougu"):
            reply = """
早见表
https://docs.google.com/spreadsheets/d/1ru35rHQ9DMsQcBXHPgUD5XDO-mSvR_j1fFTB_V507zw/htmlview

fc2 宝具計算
fgotouka.web.fc2.com
国人宝具計算
https://xianlechuanshuo.github.io/fgo2/calc4.html
            """
        if text.startswith("/drop"):
            reply = "FGO効率劇場\nhttps://docs.google.com/spreadsheets/d/1TrfSDteVZnjUPz68rKzuZWZdZZBLqw03FlvEToOvqH0/htmlview?sle=true#"
        if text.startswith("/wiki"):
            servant_name = " ".join(text.split()[1:])
            if servant_name:
                query_page_url = "https://www9.atwiki.jp/f_go/?cmd=wikisearch&keyword={}".format(servant_name)
                r = requests.get(query_page_url)
                if r.status_code == 200:
                    rq = q(r.content.decode("utf-8"))
                    links = rq("#wikibody li a")
                    filtered_links = [q(x) for x in links if not any(("コメント" in q(x).text(), "ボイス" in q(x).text(), "性能" in q(x).text()))][:10]
                    reply = ""
                    for i in filtered_links:
                        reply += "{}\nhttps:{}\n\n".format(i.html(), i.attr("href"))
                else:
                    reply = "connection timeout {}".format(r.status_code)
            else:
                reply = "https://www9.atwiki.jp/f_go/pages/671.html"

        if text.startswith("/price"):
            reply = "google: 9800 JPY = "
            google_finance_url = "https://finance.google.com/finance/converter?a={}&from={}&to={}".format(9800, "JPY", "CNY")
            result = requests.get(google_finance_url)
            if result.status_code == 200:
                rcontent = q(result.content)
                rcontent = rcontent("#currency_converter_result .bld").text()
                reply += rcontent
            jeanne_h5_url = "http://h5.m.taobao.com/awp/core/detail.htm?id=553971150031"
            reply += "\nJeanne {}".format(jeanne_h5_url)

            tu_jihua_url = "https://item.taobao.com/item.htm?spm=2013.1.w4023-16844942798.13.5692e503t594AU&id=558505049792"
            reply += "\n秃计划 {}".format(tu_jihua_url)

        if text.startswith("/gamewith"):
            reply = "https://gamewith.jp/fgo/article/show/62409"

        if text.startswith("/summon"):
            simulator_url = "https://konatasick.github.io/test_simulator/pool.html?"
            simulator_js = "https://konatasick.github.io/test_simulator/js/index.js"
            reply = "Summon list\n%s\n\n" % simulator_url
            r = requests.get(simulator_js)
            if r.status_code == 200:
                rcontent = r.content.decode("utf-8")
                rcontent = rcontent.replace("\n", "")
                summon_json = re.findall('"sites"\:(.*)\/\*', rcontent)[0].rstrip('}')
                summon_json = json.loads(summon_json)
                summon_json_last_ten = summon_json[::-1][:10]
                for i in summon_json_last_ten:
                    reply += "{} {}{}\n".format(i["name"], simulator_url, i["info"])

        if text.startswith("/help") or text.startswith("/start"):
            reply = """
Author: @fdb713
/appmedia - appmedia ranking
/drop - drop statistics
/gamewith - gamewith ranking link
/hougu - hougu damage quick reference
/price - compare JPY to CNY and 3rd-party charge
/servant - send link of servants by rare and class from atwiki
/summon - simulate summon
/wiki - search and send link of servant or other keywords on atwiki page
/help or /start - show this message
            """

        return reply, markup
Пример #36
0
def readTextFromXmlFile():
    xmlFilePath = os.path.join(os.getcwd(), 'assets', 'picf.xml')
    with open(xmlFilePath, 'r', encoding='utf8') as f:
        qxml = q(f.read())
    qxml("p xref").replaceWith(lambda i, e: ' ' + qxml(e).text() + ' ')
    text = qxml("p:not(caption)")
Пример #37
0
def get_pq(
    uri='http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html'
):
    h = q(uri)
    h.make_links_absolute()
    return h
Пример #38
0
def all_resource_properties_hrefs():
    h = get_pq(BASE + 'aws-product-property-reference.html')
    res = OD((a1.attr('href'), a1.text())
             for a1 in [q(a) for a in h('#main-col-body li a')])
    return res
def all_resource_hrefs():
    h = get_pq(BASE + 'aws-template-resource-type-ref.html')
    all_resource_hrefs = OD(
        (a1.text().strip(), a1.attr('href'))
        for a1 in [q(a) for a in h('#main-col-body li a')])
    return all_resource_hrefs
Пример #40
0
def get_pq(uri='http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html'):
    h=q(uri)
    h.make_links_absolute()
    return h
Пример #41
0
def all_resource_properties_hrefs():
    h=get_pq('http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-product-property-reference.html')
    res = OrderedDict((a.attr('href'),a.text()) for a in [q(a) for a in h('#divContent li a')])
    return res
Пример #42
0
def dig_image(query):
    p = re.compile('.*\/src\/.*\.jpg$')
    for link_list in query.find('a'):
        href = q(link_list).attr('href')
        if p.match(href):
            save_image(href)