示例#1
0
    def __init__(self, debug=False):
        self._api = 'https://api.bitkub.com/api'
        self._key = os.getenv('BITKUB_API_KEY', '')
        self._secret = os.getenv('BITKUB_API_SECRET', '').encode()

        self._debug = debug
        self._browser = Browser(debug=self._debug)
示例#2
0
 def __init__(self, host):
     """Init object with forum url (host) and Browser object."""
     self.host = host
     try:
         self.browser = Browser()
     except HTTPError as e:
         print(e)
         sys.exit(1)
示例#3
0
    def __init__(self, debug=False):
        self._api = 'https://api.satang.pro/api'
        self._uid = os.getenv('SATANG_USER_ID', '')
        self._key = os.getenv('SATANG_API_KEY', '')
        self._secret = os.getenv('SATANG_API_SECRET', '').encode('utf-8')

        self._debug = debug
        self._browser = Browser(debug=self._debug)
示例#4
0
def runCrawl(limitNum=0, queryList=[], is_all_comments=False):
    browser = Browser("driver/chromedriver")
    for query in queryList:
        browser.clearLink()
        makeDir("data")
        makeDir("data/" + query)
        mUrl = ""
        if query[0] == "#":
            mUrl = "https://www.instagram.com/explore/tags/" + query[
                1:] + "/?hl=en"
        else:
            mUrl = "https://www.instagram.com/" + query + "/?hl=en"
        browser.goToPage(mUrl)
        print("collecting url of " + query + "...")
        browser.scrollPageToBottomUntilEnd(browser.collectDpageUrl, limitNum)
        print("finish scoll collecting!")

        print("collecting data...")
        slist = list(set(browser.urlList))
        for url in tqdm(slist):
            dirName = url.split("/")[4]
            # skip if already crawled
            if not makeDir("data/" + query + "/" + dirName):
                continue
            browser.goToPage(url)
            if is_all_comments:
                browser.expandComments()
            cur = browser.getPageSource()
            writeToFile("data/" + query + "/" + dirName + "/raw.html", [cur])
            infoData = cur.split("<meta content=")[1].split(" ")
            # extract data
            lang = extractLang(cur)
            # likes = extractLikes(infoData, lang)
            likes = extractLikes_cur(cur)
            comments = extractComments(infoData, lang)
            caption = extractCaption(cur)
            dateTime = extractDateTime(cur)
            commentMessages = extractCommentsMessage(cur)
            # print("likes:",likes," comments:", comments," caption:", caption,
            #     "commentMessages:", commentMessages, "dateTime:", dateTime)
            writeToFile("data/" + query + "/" + dirName + "/info.txt", [
                "likes: ", likes, "", "comments: ", comments, "", "caption: ",
                caption, "", "commentMessages: ", commentMessages, "",
                "dateTime: ", dateTime, ""
            ])
            # download image
            imageUrl = html.unescape(
                cur.split('meta property="og:image" content="')[1].split('"')
                [0])
            downloadImage(imageUrl,
                          "data/" + query + "/" + dirName + "/image.jpg")
            time.sleep(1)
        print("query " + query + " collecting finish")

    time.sleep(2)
    browser.driver.quit()
    print("FINISH!")
示例#5
0
    def setUp(self) -> None:
        self.driver = Browser("chrome", r".\tools\chromedriver.exe")
        self.driver.open_browser("http://www.baidu.com")
        logging.info("打开浏览器")
        logging.info(
            f"浏览器名称:{self.driver.browser_name},浏览器版本:{self.driver.browser_version}"
        )

        self.homepage = HomePage(self.driver)
        self.newspage = NewsPage(self.driver)
示例#6
0
	def setUp(self):
		self.driver = Browser().get_browserdriver()
		self.login_page = LoginPage(self.driver)
		self.config = Config().get('ZPC')
		self.login_page.url = self.config.get('url')
		self.login_page.visit()
		self.login_page.wait(5)
		self.login_page.set_value(element=self.login_page.rec_user_input(), text=self.config.get('user'))
		self.login_page.set_value(element=self.login_page.rec_passwd_input(), text=self.config.get('pwd'))
		self.main_page = self.login_page.click_login_btn()
示例#7
0
文件: rename.py 项目: Blamo27/Rename
 def setPath(self):
     """
     Define current path with Browser (utils)
     """
     file = Browser()
     self.get('dirname').set(file.get())
示例#8
0
def runCrawl(limitNum=0, queryList=[], is_all_comments=False, userinfo={}):
    browser = Browser("driver/chromedriver")
    if userinfo != {}:
        print('Start logging in')
        browser.goToPage('https://www.instagram.com/accounts/login/?hl=en')
        if browser.log_in(userinfo):
            print('Success to log in')
        else:
            print('Fail to log in')
            return
    else:
        print('Continue Without logging in')
    for query in queryList:
        browser.clearLink()
        makeDir("data")
        makeDir("data/" + query)
        mUrl = ""
        if query[0] == "#":
            mUrl = "https://www.instagram.com/explore/tags/" + query[
                1:] + "/?hl=en"
        else:
            mUrl = "https://www.instagram.com/" + query + "/?hl=en"
        browser.goToPage(mUrl)
        print("collecting url of " + query + "...")
        browser.scrollPageToBottomUntilEnd(browser.collectDpageUrl, limitNum)
        print("finish scoll collecting!")

        print("collecting data...")
        slist = list(set(browser.urlList))
        for url in tqdm(slist):
            dirName = url.split("/")[4]
            # skip if already crawled
            if not makeDir("data/" + query + "/" + dirName):
                continue
            browser.goToPage(url)
            if is_all_comments:
                browser.expandComments()
            cur = browser.getPageSource()
            writeToFile("data/" + query + "/" + dirName + "/raw.html", [cur])
            infoData = BeautifulSoup(cur, "lxml")
            imageData = infoData.find("img", class_="FFVAD")
            # extract data
            likes = extractLikes(infoData)
            comments_list = extractComments(infoData)
            comments = comments_list.__len__()
            caption = extractCaption(imageData)
            dateTime = extractDateTime(infoData)
            commentMessages = extractCommentsMessage(comments_list)
            # print("likes:",likes," comments:", comments," caption:", caption,
            #     "commentMessages:", commentMessages, "dateTime:", dateTime)
            writeToFile("data/" + query + "/" + dirName + "/info.txt", [
                "likes: ", likes, "", "comments: ", comments, "", "caption: ",
                caption, "", "commentMessages: ", commentMessages, "",
                "dateTime: ", dateTime, ""
            ])
            # download image
            imageUrl = imageData.get("srcset")
            downloadImage(imageUrl,
                          "data/" + query + "/" + dirName + "/image.jpg")
            time.sleep(1)
        print("query " + query + " collecting finish")

    time.sleep(2)
    browser.driver.quit()
    print("FINISH!")
示例#9
0
 def __init__(self, platform):
     self.browser = Browser()
     self.config = Config(platform)
     self.email_alert = EmailAlert()
     self.By = By