Пример #1
0
def url():
    url = "http://music.taihe.com/"
    response = requests.get(url, headers=sj_User_Agent())
    return response.text


# print(url())
# with open("音乐.html", "w", encoding="utf-8")as file:
#     file = file.write(response.text)
Пример #2
0
def wai():
    while True:
        try:
            url = "https://erds.58.com/zufang/"
            response = requests.get(url, headers=sj_User_Agent())
            print(response)
            with open("租房.html", "w", encoding="utf-8") as file:
                return response.text
            break
        except Exception as e:
            print("这个请求头不可用!!!%s" % (e))
Пример #3
0
def w():
    while True:
        try:
            url = "http://sc.chinaz.com/"
            header = sj_User_Agent()
            response = requests.get(url, headers=header)
            # print(response)
            response.encoding = "utf-8"
            # with open("素材.html", "w", encoding="utf-8")as file:
            #     file = file.write(response.text)
            return response.text
            break
        except Exception as e:
            print("这个请求头不可用!!!")
Пример #4
0
cursor = db.cursor()
list = []
t = 0
for j in range(1,30):
    print("第%s页开始"%(j))
    yifile = etree.HTML(wai())
    yifile = yifile.xpath('//li[@class="house-cell"]/div[@class="img-list"]/a/@href')
    print(yifile,len(yifile))
    for i in yifile:
        # print(i)
        while True:
            sj = random.randint(1,5)
            time.sleep(sj)
            try:
                murl = i
                erresponse = requests.get(murl, headers=sj_User_Agent())
                erresponse.encoding = "utf-8"
                print(erresponse)
                erresponse = erresponse.text
                # with open("3.html", "w", encoding="utf-8")as filel:
                # filel = filel.write(erresponse)
                break
            except Exception as e:
                print("这个<二级>请求头不可用!!!%s"%(e))
        # with open("3.html" , "w" , encoding="utf-8")as filel:
        #     filel = filel.write(erresponse)
        t += 1
        # try:
        z(filename, erresponse, t)
        # except Exception as e:
        #     print("访问频繁%s"%(e))
Пример #5
0
def we(j):
    filename = "素材.csv"
    t = 0
    for m in range(len(url_l(w())[0])):
        t += 1
        s = "html/" + url_l(w())[1][m] + ".html"
        # print(s)
        # with open(s, "r", encoding="utf-8")as file:
        #     mfile = file.read()
        sj = random.randint(1, 3)
        time.sleep(sj)
        urll = url_l(w())[0][m]
        mresponse = requests.get(urll, headers=sj_User_Agent())
        mresponse.encoding = "utf-8"
        mfile = etree.HTML(mresponse.text)

        def hh(m):
            if m + 1 == 1:
                # 矢量
                sl_url = mfile.xpath(
                    '//div//div[@class="text_left"]//div[@class="box picblock col3"]/div/a/img/@src2'
                )
                # for m in sl_url:
                #     return m
                return sl_url
            elif m + 1 == 2:
                # 高清图片
                tp_url = mfile.xpath(
                    '//div[@id="container"]/div[@class="box picblock col3"]/div/a/img/@src2'
                )
                # for m in tp_url:
                #     return m
                return tp_url
            elif m + 1 == 3:
                # 图标
                tb_url = mfile.xpath(
                    '//ul[@class="pngblock imgload"]/li/p/a/img/@src2')
                # for m in tb_url:
                #     return m
                return tb_url
            elif m + 1 == 4:
                # PSD素材
                psd_url = mfile.xpath(
                    '//div[@class="box col3 ws_block"]/a/img/@src')
                # for m in psd_url:
                #     return m
                return psd_url
            elif m + 1 == 5:
                # 字体
                zt_url = mfile.xpath(
                    '//div//div[@class="index_font_list clearfix"]//li[@class="font"]/div/a/img/@src'
                )
                # for m in zt_url:
                #     return m
                return zt_url
            elif m + 1 == 6:
                # 英文字体
                ywzt_url = mfile.xpath('//li[@class="font"]/div/a/img/@src')
                # for m in ywzt_url:
                #     return m
                return ywzt_url
            elif m + 1 == 7:
                # 音效
                yx_url = mfile.xpath(
                    '//div[@class="music_block"]/p[@class="n1"]/@thumb')
                # for m in yx_url:
                #     return m
                return yx_url
            elif m + 1 == 8:
                # PPT模板
                ppt_url = mfile.xpath(
                    '//div[@class="sc_warp  mt20"]/div[@id="main"]/div/div/a/img/@src'
                )
                # for m in ppt_url:
                #     return m
                return ppt_url
            elif m + 1 == 9:
                # 简历模板
                jl_url = mfile.xpath(
                    '//div[@class="sc_warp  mt20"]/div[@id="main"]/div/div/a/img/@src'
                )
                # for m in jl_url:
                #     return m
                return jl_url
            else:
                pass

        dic = {
            '矢量': hh(m),
            '高清图片': hh(m),
            '图标': hh(m),
            'PSD素材': hh(m),
            '字体': hh(m),
            '英文字体': hh(m),
            '音效': hh(m),
            'PPT模板': hh(m),
            '简历模板': hh(m)
        }
        # print(dic)
        list.append(dic)
        bccsv(filename, dic)
        hh = ",".join(hh(m))
        print(hh, type(hh))
        sql = "insert into sc (矢量,高清图片,图标,PSD素材,字体,英文字体,音效,PPT模板,简历模板) values ('%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (
            hh, hh, hh, hh, hh, hh, str(hh), hh, hh)
        cursor.execute(sql)
        with open("素材.json", "w", encoding="utf-8") as json_file:
            json_file = json.dump({
                "total": t * j,
                "data": list
            },
                                  json_file,
                                  ensure_ascii=False,
                                  indent=4)
        print("爬取%s条" % (t))
Пример #6
0
def url():
    url = "http://music.taihe.com/"
    response = requests.get(url, headers=sj_User_Agent())
    return response.text
Пример #7
0
list = []
filename = "音乐.csv"
header_csv = [
    '最热图片地址', '最热热度', '最热歌曲名', '最热来自', '最热类型', '最新图片地址', '最新热度', '最新歌曲名',
    '最新来自', '最新类型'
]
with open(filename, "w", encoding="utf-8-sig", newline="") as csv_file:
    csv_file = csv.DictWriter(csv_file, header_csv)
    csv_file.writeheader()
for i in range(1, 180):
    print("第%s页开始" % (i))
    if i == 1:
        sj = random.randint(1, 10)
        yiurl = "http://music.taihe.com%s" % (r)
        try:
            yiheader = sj_User_Agent()
            time.sleep(sj)
            yiresponse = requests.get(yiurl, headers=yiheader)
            yiresponse.encoding = "utf-8"
            print(yiresponse)
            cs(yiresponse.text, i)
        except Exception as e:
            print("这个请求头不可用!!!%s" % (e))
    else:
        sj = random.randint(1, 5)
        s = 20 * (i - 1)
        yiurl = "http://music.taihe.com/songlist/tag/%E5%85%A8%E9%83%A8?orderType=1&third_type="
        page = {"offset": s}
        try:
            yiheader = sj_User_Agent()
            time.sleep(sj)