예제 #1
0
 def __init__(self, username):
     self.driver = config_driver()
     self.username = str(username)
     self.name = 'douban_' + self.username
     config_log(self.name)
     load_cookies(self.driver, self.name)
     self.posts = douban['posts'].get(self.username)
     self.up_posts()
예제 #2
0
파일: hupu.py 프로젝트: broholens/up-posts
 def __init__(self, username):
     self.driver = config_driver()
     self.post_count = hupu['posts_count']
     self.user_id = hupu['user_id']
     self.commentaries = hupu['commentaries']
     self.comment_count = 0
     self.posts = Queue()
     self.max_error_num = 5
     self.username = str(username)
     self.name = 'hupu_' + self.username
     config_log(self.name)
     load_cookies(self.driver, self.name)
     self.comment_posts()
예제 #3
0
def analysis_share_page(detail_url: str) -> str:
    rid = detail_url.split('/')[-1]
    logging.info("rid is %s", rid)

    res = s.post(SHARE_URL, data={"rid": rid}, cookies=load_cookies()).json()
    share_code = res['data'].split('/')[-1]
    logging.info("Share code is %s", share_code)
    share_url = SHARE_WEB.format(code=share_code)
    logging.info("Share url %s", share_url)
    return share_url
예제 #4
0
def analysis_share_page(detail_url: str) -> (str, dict):
    rid = detail_url.split('/')[-1]

    res = s.post(SHARE_URL, data={"rid": rid}, cookies=load_cookies()).json()
    share_code = res['data'].split('/')[-1]
    share_url = SHARE_WEB.format(code=share_code)
    logging.info("Share url is %s", share_url)

    # get api response
    api_response = s.get(SHARE_API.format(code=share_code)).json()
    return share_url, api_response
예제 #5
0
def get_search_html(kw: str) -> str:
    if not os.path.exists(cookie_file):
        logging.warning("Cookie file not found")
        login()
    if not is_cookie_valid():
        login()
    cookie = load_cookies()
    logging.info("searching for %s", kw)
    r = s.get(SEARCH_URL.format(kw=kw), cookies=cookie)

    r.close()
    return r.text
예제 #6
0
 def __init__(self, hunter):
     post_data.update({
         'fulltext': hunter['keyword'],
         'exparea': hunter['area']
     })
     self.post_data = post_data
     self.case_id = hunter['case_id']
     self.username = hunter['username']
     filename = generate_filename_by_username(self.username)
     if not os.path.exists(filename):
         get_cookies()
     self.cookie = load_cookies(filename)
예제 #7
0
    logging.info('Total tasks: %s' % len(local_args))
    success,failure = [],[]

    for provider,arg in local_args:
        sources = download_sources(provider,arg)                
        if arg['no_upload']:
            logger.warn('Not uploading - no_upload sepceified on this resource')
        else:
            result,dirty = upload_sources(sources,arg,report_progress if global_args['show_progress'] else lambda current,max:None )
            if not dirty:success.append((arg,result))
            else: failure.append((arg,None))
    if not failure:sys.exit(0)
    logging.warning('Dirty flag set,not all tasks are done properly')
    sys.exit(1)

if __name__ == "__main__":
    setup_logging()
    global_args,local_args = prase_args(sys.argv)
    '''Parsing args'''
    save_cookies(global_args['cookies'])
    '''Saving / Loading cookies'''    
    if not setup_session(load_cookies()):
        logging.fatal('Unable to set working directory,quitting')
        sys.exit(2)    
    else:
        self_info = sess.Self
        if not 'uname' in self_info['data']:
            logger.error('Invalid cookies: %s' % self_info['message'])        
            sys.exit(2)            
        logger.warning('Bilibili-toolman - operating as %s'  % self_info['data']['uname'])        
        __tasks__()
예제 #8
0
def is_cookie_valid() -> bool:
    cookie = load_cookies()
    r = s.get(GET_USER, cookies=cookie)
    logging.info("cookie valid? %s", r.json())
    return r.json()['status'] == 1
예제 #9
0
def is_cookie_valid() -> bool:
    cookie = load_cookies()
    r = s.get(GET_USER, cookies=cookie)
    return r.json()['status'] == 1
예제 #10
0
    def __init__(self,
                 directory=None,
                 profile=None,
                 hashtag=None,
                 add_metadata=False,
                 get_videos=False,
                 videos_only=False,
                 jobs=16,
                 template="{id}",
                 url_generator=default,
                 dump_json=False,
                 dump_only=False,
                 extended_dump=False):
        """Create a new looter instance.

        Keyword Arguments:
            directory (`str`): where downloaded medias will be stored
                **[default: None]**
            profile (`str`): a profile to download media from
                **[default: None]**
            hashtag (`str`): a hashtag to download media from
                **[default: None]**
            add_metadata (`bool`): Add date and comment metadata to
                the downloaded pictures. **[default: False]**
            get_videos (`bool`): Also get the videos from the given
                target **[default: False]**
            videos_only (`bool`): Only download videos (implies
                ``get_videos=True``). **[default: False]**
            jobs (`bool`): the number of parallel threads to use to
                download media (12 or more is advised to have a true parallel
                download of media files) **[default: 16]**
            template (`str`): a filename format, in Python new-style-formatting
                format. See the the Template page of the documentation
                for available keys. **[default: {id}]
            url_generator (`function`): a callable that takes a media
                dictionary as argument and returs the URL it should
                download the picture from. The default tries to get
                the best available size. **[default: `urlgen.default`]**
            dump_json (`bool`): Save each resource metadata to a
                JSON file next to the actual image/video. **[default: False]**
            dump_only (`bool`): Only save metadata and discard the actual
                resource. **[default: False]**
            extended_dump (`bool`): Attempt to fetch as much metadata as
                possible, at the cost of more time. Set to `True` if, for
                instance, you always want the top comments to be downloaded
                in the dump. **[default: False]**
        """
        if profile is not None and hashtag is not None:
            raise ValueError("Give only a profile or an hashtag, not both !")

        if profile is not None:
            self.target = profile
            self._page_name = 'ProfilePage'
            self._section_name = 'user'
            self._base_url = "https://www.instagram.com/{}/"
        elif hashtag is not None:
            self.target = hashtag
            self._page_name = 'TagPage'
            self._section_name = 'tag'
            self._base_url = "https://www.instagram.com/explore/tags/{}/"
        else:
            self.target = None
        # Create self.directory if it doesn't exist.
        if directory is not None and not os.path.exists(directory):
            os.makedirs(directory)

        self.template = template
        self._required_template_keys = self._RX_TEMPLATE.findall(template)

        self.url_generator = url_generator
        if not callable(url_generator):
            raise ValueError("url_generator must be a callable !")

        self.directory = directory
        self.add_metadata = add_metadata
        self.get_videos = get_videos or videos_only
        self.videos_only = videos_only
        self.dump_json = dump_json or dump_only
        self.dump_only = dump_only
        self.extended_dump = extended_dump
        self.jobs = jobs

        self.session = requests.Session()
        self.session.cookies = six.moves.http_cookiejar.LWPCookieJar(
            self.COOKIE_FILE)
        load_cookies(self.session)

        self.user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; WOW64; "  # seems legit
            "rv:50.0) Gecko/20100101 Firefox/50.0")

        self.dl_count = 0
        self.metadata = {}
        self._workers = []
        self.dl_count_lock = threading.Lock()

        self.session.headers.update({
            'User-Agent': self.user_agent,
            'Accept': 'text/html',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Host': 'www.instagram.com',
            'DNT': '1',
            'Upgrade-Insecure-Requests': '1',
        })

        atexit.register(self.__del__)
예제 #11
0
def login_get_cookies():
    cookies = utils.load_cookies()
    if cookies:
        session = requests.Session()
        session.headers['User-Agent'] = User_Agent
        session.cookies = requests.utils.cookiejar_from_dict(cookies)
        return session,cookies
    chrome_options = Options()
    chrome_options.add_argument("--headless")

    driver = webdriver.Chrome(executable_path='./chromedriver_win32/chromedriver.exe')
    url = 'https://accounts.douban.com/passport/login'
    while 1:
        driver.get(url)
        link = driver.find_element_by_xpath("//ul[@class='tab-start']/li[@class='account-tab-account']")
        link.click()

        name_input = driver.find_element_by_xpath("//div[@class='account-form-field']/input[@id='username']")
        pass_input = driver.find_element_by_xpath("//div[@class='account-form-field']/input[@id='password']")
        remember_input = driver.find_element_by_xpath("//div[@class='account-form-ft']/p[@class='account-form-remember']/input[@id='account-form-remember']")
        login_botton = driver.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account']")
        name_input.clear()
        name_input.send_keys(username)
        pass_input.clear()
        pass_input.send_keys(password)
        remember_input.click()
        login_botton.click()

        start_ts = time.time()
        print("start..",start_ts)

        try:
            WebDriverWait(driver,15).until_not(lambda x:x.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account btn-active']").is_displayed())
            WebDriverWait(driver, 15).until_not(lambda x: x.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account']").is_displayed()) #wait until login
        except:
            import tracback
            print(tracback.format_exc())
            utils.save_html('exc_inex.html',driver.page_source)
            import os
            os._exit(-1)

        print ('end..',time.time()-start_ts)
        driver.save_screenshot('submit.png')
        utils.save_html('index.html',driver.page_source)

        if u'douban' in driver.page_source:
            selenium_cookies = driver.get_cookies()
            print ("selenium_cookies:",selenium_cookies)
            driver.close()
            break
        else:
            driver.close()

    #handle cookies
    session = requests.Session()
    session.headers['User-Agent'] = User_Agent
    for i in selenium_cookies:
        requests.utils.add_dict_to_cookiejar(session.cookies, {i['name']: i['value']})
    cookies = requests.utils.dict_from_cookiejar(session.cookies)
    utils.save_cookies(cookies)
    return session,cookies