Python getUrl примеры, common.getUrl Python примеры использования

Пример #1

0

Показать файл

Файл: listparser.py Проект: snow212-cn/wwqLyParse

 def Parse_lib_m(self,input_text):
     html = PyQuery(common.getUrl(input_text))
     
     """
     album_items = html('div.clearfix').children('li.album_item')
     title = html('h1.main_title').children('a').text()
     i =0
     data = {
         "data": [],
         "more": False,
         "title": title,
         "total": i,
         "type": "list"
     }
     for album_item in album_items:
         no = '第'+str(i+1)+'集'
         name = title+'('+no+')'
         url = PyQuery(album_item).children('a').attr('href')
         subtitle = ''
         info = {
             "name": name,
             "no": no,
             "subtitle": subtitle,
             "url": url
         }
         data["data"].append(info)
         i = i+1
     total = i
     data["total"] = total
     """
     data = {
         "data": [],
         "more": False,
         "title": '',
         "total": 0,
         "type": "list",
         "caption": "271视频全集"
     }
     
     data_doc_id = html('span.play_source').attr('data-doc-id')
     ejson_url = 'http://rq.video.iqiyi.com/aries/e.json?site=iqiyi&docId='+data_doc_id+'&count=100000'
     ejson = json.loads(common.getUrl(ejson_url))
     ejson_datas = ejson["data"]["objs"]
     data["total"] = ejson_datas["info"]["total_video_number"]
     data["title"] = ejson_datas["info"]["album_title"]
     album_items = ejson_datas["episode"]["data"]
     for album_item in album_items:
         no = '第'+str(album_item["play_order"])+'集'
         name = album_item["title"]
         url = album_item["play_url"]
         subtitle = album_item["desciption"]
         info = {
             "name": name,
             "no": no,
             "subtitle": subtitle,
             "url": url
         }
         data["data"].append(info)
     #print(ejson)
     return data

Пример #2

0

Показать файл

Файл: pvideoparser.py Проект: togitss/wwqLyParse

def _close_handwich_bridge():
    c = HANDWICH_BRIDGE_CONFIG
    if IsOpen(c['ip'], c['port']):
        url = _make_handwich_base_url() + 'exit'
        if c['key'] != None:
            url += '?key=' + c['key']
        # just send the exit command
        try:
            getUrl(url, allowCache=False, usePool=False)
        except Exception as e:
            logging.error(e)  # ignore error

Пример #3

0

Показать файл

Файл: listparser.py Проект: snow212-cn/wwqLyParse

 def get_vinfo_list(aid):
     vlist = []
     # request each page
     page_n = 0
     urls = []
     while True:
         # make request url
         page_n += 1
         url = make_port_url(aid, page_n)
         # get text
         raw_text = common.getUrl(url)
         
         # get list
         sub_list = parse_one_page(raw_text)
         for sub in sub_list:
             url = sub['url']
             if url in urls:
                 sub_list = []
             else:
                 urls.append(url)
         if len(sub_list) > 0:
             vlist += sub_list
         else:    # no more data
             break
     # get full vinfo list done
     return vlist

Пример #4

0

Показать файл

Файл: jumpurlhandle.py Проект: v1-hermit/wwqLyParse

 def urlHandle(self,input_text):
     html = PyQuery(common.getUrl(input_text))
     a = html.children('a')
     a = PyQuery(a)
     url = a.attr("href")
     print('urlHandle:"'+input_text+'"-->"'+url+'"')
     return url

Пример #5

0

Показать файл

 def urlHandle(self, input_text):
     html = PyQuery(common.getUrl(input_text))
     a = html.children('a')
     a = PyQuery(a)
     url = a.attr("href")
     print('urlHandle:"' + input_text + '"-->"' + url + '"')
     return url

Пример #6

0

Показать файл

 def Parse_le(self, input_text):
     html = PyQuery(common.getUrl(input_text))
     items = html('dt.d_tit')
     title = "LETV"
     i = 0
     data = {
         "data": [],
         "more": False,
         "title": title,
         "total": i,
         "type": "collection"
     }
     for item in items:
         a = PyQuery(item).children('a')
         name = a.text()
         no = a.text()
         subtitle = a.text()
         url = a.attr('href')
         if url is None:
             continue
         if not re.match('^http://www\.le\.com/.+\.html', url):
             continue
         info = {
             "name": name,
             "no": no,
             "subtitle": subtitle,
             "url": url,
             "caption": "首页地址列表"
         }
         data["data"].append(info)
         i = i + 1
     total = i
     data["total"] = total
     return data

Пример #7

0

Показать файл

Файл: indexparser.py Проект: v1-hermit/wwqLyParse

 def Parse_le(self,input_text):
     html = PyQuery(common.getUrl(input_text))
     items = html('dt.d_tit')
     title = "LETV"
     i =0
     data = {
         "data": [],
         "more": False,
         "title": title,
         "total": i,
         "type": "collection"
     }
     for item in items:
         a = PyQuery(item).children('a')
         name = a.text()
         no = a.text()
         subtitle = a.text()
         url = a.attr('href')
         if url is None:
             continue
         if not re.match('^http://www\.le\.com/.+\.html',url):
             continue
         info = {
             "name": name,
             "no": no,
             "subtitle": subtitle,
             "url": url,
             "caption": "首页地址列表"  
         }
         data["data"].append(info)
         i = i+1
     total = i
     data["total"] = total
     return data

Пример #8

0

Показать файл

Файл: mvtvparser.py Проект: snow212-cn/wwqLyParse

 def ParseURL(self, input_text, label, min=None, max=None):
     data = {
         "protocol": "http",
         "urls": [""],
         #"args" : {},
         #"duration" : 1111,
         #"length" : 222222,
         #"decrypt" : "KanKan",
         #"decryptData" : {},
         #"adjust" : "KanKan",
         #"adjustData" : { },
         #"segmentSize": 1024,
         #"maxDown" : 5,
         #"convert" : "",
         #"convertData" : "",
     }
     id = re.match('^http://[^\s]+/[^\s]+/([^\s]+)\.html',
                   input_text).group(1)
     ejson_url = 'http://v.api.mgtv.com/player/video?retry=1&video_id=' + id
     ejson = common.getUrl(ejson_url)
     ejson = json.loads(ejson)
     if ejson["status"] != 200:
         return
     edata = ejson["data"]
     estream = edata["stream"]
     estream_domain = edata["stream_domain"]
     i = int(label) - 1
     stream = estream[i]
     stream_domain = estream_domain[i]
     host = str(stream_domain)
     url = str(stream["url"])
     aurl = url.split('?')
     a = aurl[0].strip('/playlist.m3u8')
     b = aurl[1].split('&')
     u = host + '/' + a + '?pno=1031&' + b[3] + '&' + b[4]
     op1 = common.getUrl(u)
     data1 = json.loads(op1)
     eurl = data1['info']
     data["urls"] = eurl
     info = {
         "label": i,
         "code": i,
         #"ext" : "",
         #"size" : "",
         #"type" : "",
     }
     return [data]

Пример #9

0

Показать файл

Файл: mvtvparser.py Проект: v1-hermit/wwqLyParse

 def ParseURL(self,input_text,label,min=None,max=None):
     data = {
         "protocol" : "http", 
         "urls" : [""],
         #"args" : {},
         #"duration" : 1111,
         #"length" : 222222,
         #"decrypt" : "KanKan",
         #"decryptData" : {},
         #"adjust" : "KanKan", 
         #"adjustData" : { },
         #"segmentSize": 1024,
         #"maxDown" : 5,
         #"convert" : "",
         #"convertData" : "",
     }
     id = re.match('^http://[^\s]+/[^\s]+/([^\s]+)\.html',input_text).group(1)
     ejson_url = 'http://v.api.mgtv.com/player/video?retry=1&video_id=' + id
     ejson = common.getUrl(ejson_url)
     ejson = json.loads(ejson)
     if ejson["status"] != 200:
         return
     edata = ejson["data"]
     estream = edata["stream"]
     estream_domain = edata["stream_domain"]
     i = int(label)-1
     stream = estream[i]
     stream_domain = estream_domain[i]
     host = str(stream_domain)
     url = str(stream["url"])
     aurl = url.split('?')
     a = aurl[0].strip('/playlist.m3u8')
     b = aurl[1].split('&')
     u = host+'/'+a+'?pno=1031&'+b[3]+'&'+b[4]
     op1 = common.getUrl(u)
     data1 = json.loads(op1)
     eurl = data1['info']
     data["urls"] = eurl
     info = { 
         "label" : i,   
         "code" : i,
         #"ext" : "",   
         #"size" : "",
         #"type" : "",
     }
     return [data]

Пример #10

0

Показать файл

Файл: listparser.py Проект: snow212-cn/wwqLyParse

 def Parse_v(self,input_text):
     print(input_text)
     html = PyQuery(common.getUrl(input_text))
     datainfo_navlist = PyQuery(html("#datainfo-navlist"))
     for a in datainfo_navlist.children('a'):
         a = PyQuery(a)
         url = a.attr("href")
         if re.search('www.iqiyi.com/(a_|lib/m)',url):
             return self.Parse(url)

Пример #11

0

Показать файл

Файл: mvtvparser.py Проект: snow212-cn/wwqLyParse

 def Parse(self, input_text, types=None):
     if (types is None) or ("formats" in types):
         data = {
             "type": "formats",
             "name": "",
             "icon": "http://xxx.cn/xxx.jpg",
             "provider": "芒果TV",
             "caption": "芒果TV解析",
             #"warning" : "提示信息",
             "sorted": 1,
             "data": []
         }
         id = re.match('^http://[^\s]+/[^\s]+/([^\s]+)\.html',
                       input_text).group(1)
         ejson_url = 'http://v.api.mgtv.com/player/video?retry=1&video_id=' + id
         ejson = common.getUrl(ejson_url)
         #print(ejson)
         ejson = json.loads(ejson)
         if ejson["status"] != 200:
             return
         edata = ejson["data"]
         #don't parse vip
         if JUDGE_VIP and (edata["user"]["isvip"] != "0"):
             return
         einfo = edata["info"]
         estream = edata["stream"]
         estream_domain = edata["stream_domain"]
         data["name"] = einfo["title"]
         data["icon"] = einfo["thumb"]
         length = len(estream)
         #1=标清，2=高清,3=超清
         if length >= 3:
             data["data"].append({
                 "label": "超清",
                 "code": 3,
                 #"ext" : "",
                 #"size" : "",
                 #"type" : "",
             })
         if length >= 2:
             data["data"].append({
                 "label": "高清",
                 "code": 2,
                 #"ext" : "",
                 #"size" : "",
                 #"type" : "",
             })
         if length >= 1:
             data["data"].append({
                 "label": "标清",
                 "code": 1,
                 #"ext" : "",
                 #"size" : "",
                 #"type" : "",
             })
         return data

Пример #12

0

Показать файл

Файл: listparser.py Проект: snow212-cn/wwqLyParse

 def get_vinfo_list(aid):
     vlist = []
     # make request url
     url = make_port_url(aid)
     # get text
     raw_text = common.getUrl(url)
     # get list
     vlist = parse_one_page(raw_text)
     # get full vinfo list done
     return vlist

Пример #13

0

Показать файл

Файл: mvtvparser.py Проект: v1-hermit/wwqLyParse

 def Parse(self,input_text,types=None):
     if (types is None) or ("formats" in types):
         data = {
             "type" : "formats",
             "name" : "",   
             "icon" : "http://xxx.cn/xxx.jpg",
             "provider" : "芒果TV",
             "caption" : "芒果TV解析",
             #"warning" : "提示信息",
             "sorted" : 1,
             "data" : []
         }
         id = re.match('^http://[^\s]+/[^\s]+/([^\s]+)\.html',input_text).group(1)
         ejson_url = 'http://v.api.mgtv.com/player/video?retry=1&video_id=' + id
         ejson = common.getUrl(ejson_url)
         #print(ejson)
         ejson = json.loads(ejson)
         if ejson["status"] != 200:
             return
         edata = ejson["data"]
         #don't parse vip
         if JUDGE_VIP and (edata["user"]["isvip"] != "0"):
             return
         einfo = edata["info"]
         estream = edata["stream"]
         estream_domain = edata["stream_domain"]
         data["name"] = einfo["title"]
         data["icon"] = einfo["thumb"]
         length = len(estream)
         #1=标清，2=高清,3=超清
         if length >= 3:
             data["data"].append({ 
                 "label" : "超清",   
                 "code" : 3,
                 #"ext" : "",   
                 #"size" : "",
                 #"type" : "",
             })
         if length >= 2:
             data["data"].append({ 
                 "label" : "高清",   
                 "code" : 2,
                 #"ext" : "",   
                 #"size" : "",
                 #"type" : "",
             })
         if length >= 1:
             data["data"].append({ 
                 "label" : "标清",   
                 "code" : 1,
                 #"ext" : "",   
                 #"size" : "",
                 #"type" : "",
             })
         return data

Пример #14

0

Показать файл

Файл: pvideoparser.py Проект: togitss/wwqLyParse

    def check_core_loaded():
        core_about_url = _make_call_core_url(c_id, 'about')
        # info = json.loads(getUrl(core_about_url, allowCache=False))
        # FIXME
        text = getUrl(core_about_url, allowCache=False, usePool=False)
        logging.debug("core_about raw return:" + text)
        # print('DEBUG: core_about raw return')
        # print(text)
        info = json.loads(text)

        if info[0] != 'ret':
            logging.debug('core not loaded, ' + str(info))
            return False
        logging.debug('core ' + str(c_id) + ', ' + str(info[1]))
        return True

Пример #15

0

Показать файл

Файл: pvideoparser.py Проект: togitss/wwqLyParse

def _init_handwich_bridge():
    c = HANDWICH_BRIDGE_CONFIG
    ip = c['ip']
    port = c['port']
    key = c['key']
    # TODO start handwich_bridge
    if not IsOpen(ip, port):
        argv = [
            _get_rel_path(BIN_ADL),
            _get_rel_path(HANDWICH_BRIDGE_BIN), '--'
        ]
        argv += ['--ip', str(ip), '--port', str(port)]
        if key != None:
            argv += ['--key', str(key)]
        logging.debug(' start handwich_bridge --> ' + str(argv))
        subprocess.Popen(argv, shell=False, close_fds=True)
    # wait and check bridge started successfully
    init_ok = False
    for i in range(3):
        if not IsOpen(ip, port):
            time.sleep(i + 1)
            continue
        url = _make_handwich_base_url() + 'version'
        if key != None:
            url += '?key=' + str(key)
        try:
            info = getUrl(url, allowCache=False, usePool=False)
            logging.debug('handwich_bridge version: ' + info)
            init_ok = True
            break
        except Exception as e:
            logging.warning(e)
            time.sleep(i + 1)
    if not init_ok:
        raise Exception('start handwich_bridge failed')
    # check core loaded and load core
    l = LOAD_CORE
    c_id = l['id']
    c_path = os.path.abspath(_get_rel_path(l['path']))

    def check_core_loaded():
        core_about_url = _make_call_core_url(c_id, 'about')
        # info = json.loads(getUrl(core_about_url, allowCache=False))
        # FIXME
        text = getUrl(core_about_url, allowCache=False, usePool=False)
        logging.debug("core_about raw return:" + text)
        # print('DEBUG: core_about raw return')
        # print(text)
        info = json.loads(text)

        if info[0] != 'ret':
            logging.debug('core not loaded, ' + str(info))
            return False
        logging.debug('core ' + str(c_id) + ', ' + str(info[1]))
        return True

    if not check_core_loaded():
        load_core_url = _make_handwich_base_url() + 'load_core?id=' + str(c_id)
        if c['key'] != None:
            load_core_url += '&key=' + str(c['key'])
        load_core_url += '&path=' + urllib.parse.quote(c_path)

        info = json.loads(
            getUrl(load_core_url, allowCache=False, usePool=False))
        if info[0] == 'done':
            logging.debug('core loaded, ' + str(info))
        else:
            raise Exception('can not load core', info)
    if not check_core_loaded():
        raise Exception('core not loaded')

Пример #16

0

Показать файл

Файл: listparser.py Проект: snow212-cn/wwqLyParse

    def Parse_a(self,input_text):
        # modity from sceext2's list271.py
        def get_list_info_api1(html_text):
            RE_GET_AID = ' albumId: ([0-9]+),'    # albumId: 202340701,
            # http://cache.video.qiyi.com/jp/avlist/202340701/2/
            URL_JS_API_PORT = 'http://cache.video.qiyi.com/jp/avlist/'
            # get info from 271 javascript API port
            def get_info_from_js_port(html_text):
                # get album id
                aid = get_aid(html_text)
                # get info list
                vlist = get_vinfo_list(aid)
                # done
                return vlist

            # get album id
            def get_aid(html_text):
                m = re.findall(RE_GET_AID, html_text)
                return m[0]

            # make js API port URL
            def make_port_url(aid, page_n):
                url = URL_JS_API_PORT + str(aid) + '/' + str(page_n) + '/'
                #print(url)
                return url

            # get vinfo list, get full list from js API port
            def get_vinfo_list(aid):
                vlist = []
                # request each page
                page_n = 0
                urls = []
                while True:
                    # make request url
                    page_n += 1
                    url = make_port_url(aid, page_n)
                    # get text
                    raw_text = common.getUrl(url)
                    
                    # get list
                    sub_list = parse_one_page(raw_text)
                    for sub in sub_list:
                        url = sub['url']
                        if url in urls:
                            sub_list = []
                        else:
                            urls.append(url)
                    if len(sub_list) > 0:
                        vlist += sub_list
                    else:    # no more data
                        break
                # get full vinfo list done
                return vlist

            # parse one page info, parse raw info
            def parse_one_page(raw_text):
                # remove 'var tvInfoJs={' before json text, and json just ended with '}'
                json_text = '{' + raw_text.split('{', 1)[1]
                # load as json text
                info = json.loads(json_text)
                
                # check code, '"code":"A00000"' is OK, and '"code":"A00004"' is out of index
                if info['code'] == 'A00004':
                    return []    # just return null result
                
                # get and parse video info items
                vlist = info['data']['vlist']
                out = []    # output info
                for v in vlist:
                    one = {}
                    
                    one['no'] = v['pd']
                    one['title'] = v['vn']
                    one['subtitle'] = v['vt']
                    one['url'] = v['vurl']
                    
                    # get more info
                    one['vid'] = v['vid']
                    one['time_s'] = v['timeLength']
                    one['tvid'] = v['id']
                    
                    out.append(one)
                # get video info done
                return out
            # get info from js API port
            info2 = get_info_from_js_port(html_text)
            # replace vlist with js port data
            vlist = []
            for i in info2:
                one = {}
                one['no'] = "第"+str(i['no'])+"集 "+str(i['subtitle'])
                one['subtitle'] = i['subtitle']
                one['url'] = i['url']
                vlist.append(one)
            # done
            return vlist
        
        def get_list_info_api2(html_text):
            RE_GET_AID = ' albumId: ([0-9]+),'    # albumId: 203342201,
            # http://cache.video.qiyi.com/jp/sdvlst/6/203342201/
            URL_JS_API_PORT = 'http://cache.video.qiyi.com/jp/sdvlst/6/'
            # get info from 271 javascript API port
            def get_info_from_js_port(html_text):
                # get album id
                aid = get_aid(html_text)
                # get info list
                vlist = get_vinfo_list(aid)
                # done
                return vlist

            # get album id
            def get_aid(html_text):
                m = re.findall(RE_GET_AID, html_text)
                return m[0]

            # make js API port URL
            def make_port_url(aid):
                url = URL_JS_API_PORT + str(aid) + '/'
                #print(url)
                return url

            # get vinfo list, get full list from js API port
            def get_vinfo_list(aid):
                vlist = []
                # make request url
                url = make_port_url(aid)
                # get text
                raw_text = common.getUrl(url)
                # get list
                vlist = parse_one_page(raw_text)
                # get full vinfo list done
                return vlist

            # parse one page info, parse raw info
            def parse_one_page(raw_text):
                # remove 'var tvInfoJs={' before json text, and json just ended with '}'
                json_text = '{' + raw_text.split('{', 1)[1]
                # load as json text
                info = json.loads(json_text)
                
                # check code, '"code":"A00000"' is OK, and '"code":"A00004"' is out of index
                if info['code'] == 'A00004':
                    return []    # just return null result
                
                # get and parse video info items
                vlist = info['data']
                out = []    # output info
                for v in vlist:
                    one = {}
                    
                    one['no'] = v['desc']
                    one['title'] = v['desc']
                    one['subtitle'] = v['shortTitle']
                    one['url'] = v['vUrl']
                    
                    # get more info
                    one['vid'] = v['vid']
                    one['time_s'] = v['timeLength']
                    one['tvid'] = v['tvId']
                    
                    out.append(one)
                # get video info done
                return out
            # get info from js API port
            info2 = get_info_from_js_port(html_text)
            # replace vlist with js port data
            vlist = []
            for i in info2:
                one = {}
                one['no'] = i['no']
                one['subtitle'] = i['subtitle']
                one['url'] = i['url']
                vlist.append(one)
            # done
            return vlist
        
        def get_list_info_html(html):
            #print("get_list_info_html")
            data = []
            album_items = html('ul.site-piclist').children('li')
            for album_item in album_items:
                album_item = PyQuery(album_item)
                site_piclist_info = PyQuery(album_item.children('div.site-piclist_info'))
                site_piclist_info_title = PyQuery(site_piclist_info.children('p.site-piclist_info_title'))
                site_piclist_info_title_a = PyQuery(site_piclist_info_title.children('a'))
                site_piclist_info_title_fs12 = PyQuery(site_piclist_info.children('p.fs12'))
                site_piclist_info_title_fs12_a = PyQuery(site_piclist_info_title_fs12.children('a'))
                no = site_piclist_info_title_a.text()
                #if re.search("预告",no):
                    #continue
                name = site_piclist_info_title_fs12_a.text()
                url = site_piclist_info_title_fs12_a.attr('href')
                if url is None:
                    continue
                subtitle = site_piclist_info_title_fs12_a.text()
                info = {
                    "name": name,
                    "no": no,
                    "subtitle": subtitle,
                    "url": url
                }
                data.append(info)
                i = i+1
            return data
        #print("2"+input_text)
        def run(queue,get_list_info,html_text):
            try:
                result = get_list_info(html_text)
                if result != []:
                    queue.put(result)
            except Exception as e:
                #import traceback  
                #traceback.print_exc()  
                print(e)
        html_text = common.getUrl(input_text)
        html = PyQuery(html_text)
        title = html('h1.main_title').children('a').text()
        for a in html('div.crumb-item').children('a'):
            a = PyQuery(a)
            if a.attr('href') in input_text:
                title = a.text()    
        i =0
        data = {
            "data": [],
            "more": False,
            "title": title,
            "total": i,
            "type": "list",
            "caption": "271视频全集"
        }
        results = []
        parser_threads = []
        q_results = queue.Queue()
        parser_threads.append(threading.Thread(target=run, args=(q_results,get_list_info_api1,html_text)))
        parser_threads.append(threading.Thread(target=run, args=(q_results,get_list_info_api2,html_text)))
        for parser_thread in parser_threads:
            parser_thread.start()
        for parser_thread in parser_threads:
            parser_thread.join()
        while not q_results.empty():
            data["data"] =q_results.get()
            break
        if data["data"] == []:
            try:
                data["data"] = get_list_info_html(html)
            except Exception as e:
                #import traceback  
                #traceback.print_exc()  
                print(e)
            
        data["total"] = len(data["data"])
        
        return data

Пример #17

0

Показать файл

Файл: postfixurlhandle.py Проект: snow212-cn/wwqLyParse

 def getUrl(queue, url):
     queue.put(common.getUrl(url))

Пример #18

0

Показать файл

Файл: postfixurlhandle.py Проект: v1-hermit/wwqLyParse

 def getUrl(queue,url):
     queue.put(common.getUrl(url))

Пример #19

0

Показать файл

Файл: anypageparser.py Проект: v1-hermit/wwqLyParse

    def Parse(self,input_text,types=None):
        if (types is not None) and ("collection" not in types):
            return
        html = PyQuery(common.getUrl(input_text))
        items = html('a')
        title = html('title').text()
        data = {
            "data": [],
            "more": False,
            "title": title,
            "total": 0,
            "type": "collection"
        }
        urls = []
        for item in items:
            a = PyQuery(item)
            name = a.attr('title')
            if name is None:
                name = a.text()
            no = name
            subtitle = name
            url = a.attr('href')
            if url is None:
                continue
            if name is None or name == "":
                continue    
            if re.match('^(http|https|ftp)://.+\.(mp4|mkv|ts|avi)',url):
                url = 'direct:'+url
            if not re.match('(^(http|https)://.+\.(shtml|html|mp4|mkv|ts|avi))|(^(http|https)://.+/video/)',url):
                continue
            if re.search('(list|mall|about|help|shop|map|vip|faq|support|download|copyright|contract|product|tencent|upload|common|index.html|v.qq.com/u/|open.baidu.com|www.iqiyi.com/lib/s_|www.iqiyi.com/dv/|top.iqiyi.com)',url):
                continue
            if re.search('(下载|播 放|播放|投诉|评论|(\d{1,2}:\d{1,2}))',no):
                continue
            unsure = False
                        
            for temp in urls:
                if temp == str(url):
                    #print("remove:"+url)
                    url = None
                    break
            if url is None:
                continue
            
            urls.append(url)

                    
            if re.search('(www.iqiyi.com/a_)|(www.le.com/comic)',url):
                unsure = True
                
            info = {
                "name": name,
                "no": no,
                "subtitle": subtitle,
                "url": url,
                "unsure": unsure           
            }
            data["data"].append(info)
        if self.TWICE_PARSE:
            try:
                from . import listparser
            except Exception as e:
                import listparser
            try:
                from .. import run
            except Exception as e:
                import run
            def runlist_parser(queue,parser,url):
                url2 = urlHandle(url)
                try:
                    result = parser.Parse(url2)
                    if (result is not None) and (result != []) and (result["data"] is not None) and (result["data"] != []):
                        queue.put({"result":result,"url":url})
                except Exception as e:
                    #continue
                    print(e)
                    #import traceback  
                    #traceback.print_exc() 
            list_parser = listparser.ListParser()
            urlHandle = run.urlHandle
            parser_threads = []
            parse_urls = []
            t_results = []
            q_results = queue.Queue()
            for url in urls:
                for filter in list_parser.getfilters():
                    if re.search(filter,url):
                        parser_threads.append(threading.Thread(target=runlist_parser, args=(q_results,list_parser,url)))
            for parser_thread in parser_threads:
                parser_thread.start()
            for parser_thread in parser_threads:
                parser_thread.join()
            while not q_results.empty():
                t_results.append(q_results.get())
                
            oldddata = data["data"]
            data["data"] = []
            for t_result in t_results:
                parse_urls.append(t_result["url"])
                for tdata in t_result["result"]["data"]:
                    tdata["no"] = t_result["result"]["title"] +" "+ tdata["no"]
                data["data"].extend(t_result["result"]["data"])
            for ddata in oldddata:
                if ddata["url"] not in parse_urls:
                    #print(ddata["url"])
                    data["data"].append(ddata)
        data["total"] = len(data["data"])
        data["caption"] = "全页地址列表"
        return data

Python getUrl примеры использования