Пример #1
0
def pconedetect():

    url = "https://www.pcone.com.tw/api/merchant/products?items_per_page=1000&merchant_id=2945567&page=1"
    my_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
    gotrq = requests.get(url,headers = my_headers)
    data = json.loads(gotrq.text)  #轉成python dict
    pconeIDAry = []
    for i in range(len(data["products"])):
        pconeIDAry.append(data["products"][i]["display_id"])
    
    GOTData = getsheet('商品ID!O:O') #獲取試算表所有資料
    GOTDataAry = []
    errorID = []
    for i in range(len(GOTData)):
        try:
            GOTDataAry.append(GOTData[i][0])  #提取試算表資料
        except:
            GOTDataAry.append("") #空直例外
    for i in range(len(pconeIDAry)):#找網頁上架ID在不在 試算表中
        try:
            GOTDataAry.index(pconeIDAry[i])
        except: #找不到的話
            errorID.append(pconeIDAry[i])
    for i in range(len(errorID)):
        output.insert(1.0,str(errorID[i])+"\n")
    output.insert(1.0,"偵測完成\n")
Пример #2
0
def getALLpcd():
    prdidData = getsheet('商品ID!K:L')  #獲取試算表所有資料
    wiAry = []
    tiAry = []
    wrval = []

    for row in range(len(prdidData)):
        wrval.append("")

    for row in range(len(prdidData)):
        print(str(row) + "/" + str(len(prdidData)))
        try:
            if prdidData[row][0]:
                time.sleep(0.5)  #PC會檔大量讀取
                gotstock = getpcd(prdidData[row][0])  #gotstock 得到PC網頁的資料
                for i in range(len(gotstock["prd"])):  #依網頁資料款式的種類數量迴圈
                    if gotstock["prd"][i][0] == prdidData[row][
                            1]:  #獲取的款式ID 等於 試算表上的款式ID
                        wrval[row] = [gotstock["prd"][i][1]]  #指派數量資料
                if wrval[row] == "":
                    wrval[row] = ["款式ID錯誤"]
            else:
                wrval[row] = ["商品ID錯誤"]
        except:
            try:
                if prdidData[row][0]:
                    wrval[row] = ["錯誤或下架"]
            except:
                wrval[row] = [""]
    wrval[0][0] = "PC梓原" + time.strftime("%m/%d", time.localtime())
    delsheet("商品ID!V:V")
    writesheet("商品ID!V1", wrval)
    print("OK")
Пример #3
0
def getALLpcone():
    prdidData = getsheet('商品ID!O:P') #獲取試算表所有資料
    wiAry=[]
    tiAry= []
    wrval = []

    for row in range(len(prdidData)):
        wrval.append("")
    
    for row in range(len(prdidData)):
        print(str(row)+"/"+str(len(prdidData)))
        try:
            if prdidData[row][0]:
                gotstock = getpcone(prdidData[row][0])
                for i in range(len(gotstock["prd"])):
                    if gotstock["prd"][i][0] == prdidData[row][1]:
                        wrval[row] = [gotstock["prd"][i][1]]
                if wrval[row] == "":
                    wrval[row] = ["款式ID錯誤"]
        except:
            try:
                if prdidData[row][0]:
                    wrval[row] = ["錯誤或下架"]
            except:
                wrval[row] = [""]
    wrval[0][0] = "Pcone" + time.strftime("%m/%d", time.localtime())
    delsheet("商品ID!Q:Q")
    writesheet("商品ID!Q1",wrval)
    print("OK")
Пример #4
0
def pcddetect():
    allid = []
    for page in range(1, 200):
        #登錄後才能訪問的網頁
        url = 'http://seller.pcstore.com.tw/S188431702/plist_dt.htm?s=S188431702&c=&skw=&pg=' + str(
            page) + '&sr=1&pp=50'

        #瀏覽器登錄後得到的cookie,也就是剛才複製的字符串
        cookie_str = r'cbj=IqhLsP6..LOKMq6kVcXtnWgBVcXtnqojj'  #######################這邊要研究
        my_headers = {
            'user-agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
        }

        #把cookie字符串處理成字典,以便接下來使用
        cookies = {}
        for line in cookie_str.split(
                ';'):  #cookie_str.split(';') 依分號 分割成陣列 並列出

            key, value = line.split('=', 1)  #將陣列依序指派給key value
            cookies[key] = value  #新增物件
        session = requests.Session()
        res = session.get(url, cookies=cookies, headers=my_headers)
        soup = BeautifulSoup(
            res.text, "html.parser")  #"html.parser" html解析器 將html 轉為bs4格式操作
        html = soup.find_all('a', 'hotProdList')
        for link in html:
            linkID = link['href'].replace('/S188431702/',
                                          '').replace('.htm', '')
            allid.append(linkID)

        if len(html) < 50:
            break
    output.insert(1.0, "\n")
    GOTData = getsheet('商品ID!M:M')
    takeid = []
    for i in range(len(GOTData)):
        try:
            takeid.append(GOTData[i][0])
        except:
            continue
    errorid = []
    for i in range(len(allid)):
        try:
            takeid.index(allid[i])  #看試算表尚存不存在網頁ID
        except:
            output.insert(1.0, str(allid[i]) + "\n")
    output.insert(1.0, "偵測完成\n")
Пример #5
0
def yahoodetect():
    allid = []
    for page in range(1, 200):
        try:
            output.insert(1.0, ".")
            url = "https://tw.bid.yahoo.com/booth/Green-Forest-Y3489416698?bfe=1&page=" + str(
                page)
            my_headers = {
                'user-agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
            }
            gotrq = requests.get(url, headers=my_headers)
            soup = BeautifulSoup(
                gotrq.text,
                "html.parser")  #"html.parser" html解析器 將html 轉為bs4格式操作
            yahoohtml = soup.find_all('div', 'item-wrap')
            if len(yahoohtml) == 0:  #找完跳出
                break
            for i in range(len(yahoohtml)):
                allid.append(yahoohtml[i]['data-mid'])

        except:
            print("end")
            break
    output.insert(1.0, "\n")
    GOTData = getsheet('商品ID!E:E')
    takeid = []
    for i in range(len(GOTData)):
        try:
            takeid.append(GOTData[i][0])
        except:
            continue
    errorid = []
    for i in range(len(allid)):
        try:
            takeid.index(allid[i])  #看試算表尚存不存在網頁ID
        except:
            output.insert(1.0, str(allid[i]) + "\n")
    output.insert(1.0, "偵測完成\n")
Пример #6
0
def rutendetect():
    allid = []
    for page in range(1, 200):
        output.insert(1.0, ".")
        url = "http://class.ruten.com.tw/user/index00.php?s=ting865290&p=" + str(
            page)
        my_headers = {
            'user-agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
        }
        gotrq = requests.get(url, headers=my_headers)
        soup = BeautifulSoup(
            gotrq.text, "html.parser")  #"html.parser" html解析器 將html 轉為bs4格式操作
        rutenhtml = soup.find_all('div',
                                  'rt-product-tag-container tagging-class')
        end = soup.find_all("div", class_="item-img-wrap")
        for i in range(len(rutenhtml)):
            try:  #若裡面沒有重複值則新增
                allid.index(rutenhtml[i]['name'])
            except:
                allid.append(rutenhtml[i]['name'])
        if len(end) == 0:
            break

    output.insert(1.0, "\n")
    GOTData = getsheet('商品ID!G:G')
    takeid = []
    for i in range(len(GOTData)):
        try:
            takeid.append(GOTData[i][0])
        except:
            continue
    errorid = []
    for i in range(len(allid)):
        try:
            takeid.index(allid[i])  #看試算表尚存不存在網頁ID
        except:
            output.insert(1.0, str(allid[i]) + "\n")
    output.insert(1.0, "偵測完成\n")
Пример #7
0
def getALLRuten():
    prdidData = getsheet('商品ID!G:H') #獲取試算表所有資料
    wiAry=[]
    tiAry= []
    wrval = [] #寫入的陣列

    for row in range(len(prdidData)):#新增同列數陣列
        wrval.append([""])
    
    for row in range(len(prdidData)):
    #for row in range(5,12):
        print(str(row)+"/"+str(len(prdidData))) #進度
        try:
            if prdidData[row][0]:
                gotstock = getruten(prdidData[row][0]) #讀取網頁
                if type(gotstock['prd']) == type(""): #如果prd為文字則為無款式
                     wrval[row] = [gotstock['prd']]
                else:#有款式
                    for i in range(len(gotstock["prd"])):
                        if gotstock["prd"][i][0] == prdidData[row][1]: #有找到款式ID
                            wrval[row] = [gotstock["prd"][i][1]] #寫入資料
                            if gotstock["prd"][i][3] == "N": #如果發現款式是關閉
                                wrval[row]=["款式關閉"]
                    if wrval[row] == "":#都沒有找到
                        wrval[row] = ["款式ID錯誤"]
                if gotstock["state"] == True:
                    wrval[row] = ["下架"]
        except: #有可能ID錯誤,有可能ID無值
            try:
                if prdidData[row][0]:#ID錯誤
                    wrval[row] = ["商品ID錯誤或無款式ID"]
            except:
                wrval[row] = [""]
    wrval[0][0] = "Ruten" + time.strftime("%m/%d", time.localtime())#寫入第一列
    delsheet("商品ID!T:T") #刪除原有資料
    writesheet("商品ID!T1",wrval) #寫入資料
    return wrval
    print("OK")
Пример #8
0
def getALLYahoo():
    prdidData = getsheet('商品ID!E:F')  #獲取試算表所有資料
    wiAry = []
    tiAry = []
    wrval = []  #寫入的陣列

    for row in range(len(prdidData)):  #新增同列數陣列
        wrval.append([""])

    for row in range(len(prdidData)):
        #for row in range(5,12):
        print(str(row) + "/" + str(len(prdidData)))  #進度
        try:
            if prdidData[row][0]:
                gotstock = getyahoo(prdidData[row][0])
                if type(gotstock['prd']) == type(""):  #如果prd為文字則為無款式
                    wrval[row] = [gotstock['prd']]
                else:  #有款式
                    for i in range(len(gotstock["prd"])):
                        if gotstock["prd"][i][0] == prdidData[row][1]:
                            wrval[row] = [gotstock["prd"][i][1]]
                    if wrval[row] == "":  #都沒有找到
                        wrval[row] = ["款式ID錯誤"]
                if gotstock["state"] == '3':
                    wrval[row] = ["下架"]
        except:  #有可能ID錯誤,有可能ID無值
            try:
                if prdidData[row][0]:  #ID錯誤
                    wrval[row] = ["商品ID錯誤或無款式ID"]
            except:
                wrval[row] = [""]
    wrval[0][0] = "Yahoo" + time.strftime("%m/%d", time.localtime())
    delsheet("商品ID!S:S")
    writesheet("商品ID!S1", wrval)
    return wrval
    print("OK")