def PostNewPicturesUrl(driver, itemUrl):

	myDB = itemUrlDb('itemUrlDb')
	USER_ID = 'JLkR444G'
	logging.debug('try to get pictures from URL: %s ', itemUrl)

	picturesUrlList = getPicturesUrl(driver, itemUrl)
	print(picturesUrlList)
	if picturesUrlList == None:
		return
	for picturesUrl in picturesUrlList:

		if myDB.queryIfPicUrlUpload(picturesUrl):

		# if picturesUrl in picturesUrlFile.keys():
		# 	if picturesUrlFile[picturesUrl] == 'uploaded':
			logging.debug('had been post to bmob')
			print('had been post to bmob')
			return
		#saveImgToBmob(USER_ID, picturesUrl, itemUrl)
		# 保存到数据库
		print('faked post to bmob')
		#picturesUrlFile[picturesUrl] = 'uploaded'
		myDB.addPictureUrl(picturesUrl)

	myDB.addItem(itemUrl, 'saved')
def findAllShopNewItemUrl():
    allShopNewItemUrlList = []
    myDB = itemUrlDb('itemUrlDb')
    browser, cookies = getCookies()

    for page in range(100):
        # scroll down to bottom
        for i in range(3):
            browser.execute_script(
                "window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3)  # seconds
            print('scroll down to get more shop')
        try:
            lookMoreClasses = browser.find_elements_by_link_text('查看更多')
            logging.debug('found %s lookMoreUrl in page %',
                          len(lookMoreClasses), page)
            print('found ' + str(len(lookMoreClasses)) +
                  ' lookMoreUrl in page ' + str(page))
            for lookMore in lookMoreClasses:
                myDB.saveLookMoreUrl(lookMore.get_attribute('href'))
                # TODO: if url had been saved
                allShopNewItemUrlList.append(lookMore.get_attribute('href'))
            # 点击下一页
            browser.find_element_by_xpath(
                "//a[@class='dpl-paginator-next J_NextPage J_HotPoint']"
            ).click()
            time.sleep(3)
        except NoSuchElementException as e:
            print('except:', e)
            logging.debug('get the last page %s, find %s shop item URL', page,
                          len(allShopNewItemUrlList))
            break
    browser.quit()
Пример #3
0
    def setUp(self):
        cookies = getShouChangCookies()
        print('cookies: ')
        print(cookies)
        import pickle
        with open('cookies.pickle', 'wb') as f:
            pickle.dump(cookies, f)
        with open('cookies.pickle', 'rb') as f:
            self.assertEqual(pickle.load(f), cookies)

        self.myDB = itemUrlDb('itemUrlDb')
        self.lookMoreUrlList = []
        for i in self.myDB.queryAllLookMoreUrl():
            self.lookMoreUrlList.append(i[0])
Пример #4
0
 def setUp(self):
     self.picUrl = 'https://gd1.alicdn.com/imgextra/i3/0/TB10lxmNVXXXXbiXFXXXXXXXXXX_!!0-item_pic.jpg_400x400.jpg'
     self.myDB = itemUrlDb('itemUrlDb')
Пример #5
0
 def setUp(self):
     self.myDB = itemUrlDb('itemUrlDb')
def saveTodayNewItemUrl(lookMoreUrl, cookies):
    myDB = itemUrlDb('itemUrlDb')
    print('lookMoreUrl:')
    print(lookMoreUrl)
    #browser = webdriver.PhantomJS()
    browser = webdriver.Firefox()
    # 先get,添加cookies后在get,否则报错
    browser.get(lookMoreUrl)
    for cookie in cookies:
        browser.add_cookie(cookie)
    browser.get(lookMoreUrl)
    try:
        # 找到 上新 按钮
        shangXin = browser.find_element_by_xpath(
            "//ul[@class='gallery-album-menu-list clearfix']"
        ).find_element_by_tag_name('li')
        shangXin.click()
    # browser2.quit()
    # 找到 上新日期
    except NoSuchElementException as e:
        print('shangXin url not matched')
        browser.quit()
        return

    time.sleep(2)
    # shangxinDate = browser2.find_element_by_xpath("//li[@class='gallery-album-title clearfix']")
    try:
        shangxinDate = browser.find_element_by_xpath(
            "//li[@class='gallery-album-title clearfix']")
        print(shangxinDate.text)

        #newItemUrlClassList = browser2.find_elements_by_xpath("//*[starts-with(name(), 'J_FavListItem g-gi-item fav-item fav-item-promotion')]")
        newItemUrlClassList = browser.find_elements_by_xpath(
            "//div[@class='img-controller-img-box']")
        # save all the new item url,TODO: save new item by date
        if len(newItemUrlClassList) >= 1:
            #if len(newItemUrlClassList) >= 1 and shangxinDate.text.startswith('今天'):
            newItemCnt = int(shangXin.text.split()[-1], 10)
            print('begint to find new itemUrl')
            print(newItemCnt)
            for i in range(newItemCnt):
                print('find new itemUrl')
                newItemUrl = newItemUrlClassList[i].find_element_by_tag_name(
                    'a').get_attribute('href')
                print(newItemUrl)

                newItemPicUrl = newItemUrlClassList[
                    i].find_element_by_tag_name('a').find_element_by_tag_name(
                        'img').get_attribute('src')

                #newItemPicUrl = newItemUrlClassList[i].find_element_by_class_name('img-controller-img').get_attribute('src')

                print(newItemPicUrl)
                if not myDB.queryIfItemUrlSaved(
                        newItemUrl) == True and newItemPicUrl != None:
                    myDB.addItem(newItemUrl, 'saved')
                    print('save item url')
                    # upload to bmob
                    if savePictureUrlToBmob('JLkR444G', newItemPicUrl,
                                            newItemUrl):
                        #print('faked post to bmob')
                        #save item pic url
                        myDB.addPictureUrl(newItemPicUrl, "upload")
                    else:
                        myDB.addPictureUrl(newItemPicUrl, "notUpload")
                else:
                    print('had been saved')
        else:
            print('Today no new item.')
    except NoSuchElementException as e:
        print('no shangxin')
        print('except:', e)
    except IndexError as e:
        print('except:', e)
    finally:
        print('finally quit browser.')
        browser.quit()