def main():
    try:
        start = sys.argv[1]
    except:
        print('ERROR: Requires URL as the first argument.')
        quit(0)

    # Constants
    ALLDROPDOWN = '//*[@id="selectReadType"]/option[2]'
    ACTUALIMAGES = '//*[@id="divImage"]//img'
    IMGGROUPS = '.listing a'
    TITLE = '.bigChar'
    NEXT = '//*[(@id = "btnNext")]//src'

    s = Session(
        webdriver_path='C:\\Webdrivers\\chromedriver', browser='chrome'
    )  # ,webdriver_options={'arguments': ['headless', 'disable-gpu']}

    s.driver.get(start)
    s.driver.ensure_element_by_css_selector(TITLE)
    title = s.driver.find_element_by_css_selector(TITLE).text
    groups = s.driver.find_elements_by_css_selector(IMGGROUPS)
    s.transfer_driver_cookies_to_session()
    begin = to_attribute_list(groups, 'href').pop()
    response = s.get(begin).xpath(ACTUALIMAGES)
    print(response)
    s.close()
    quit(2)
示例#2
0
class ChromeTestCase(unittest.TestCase):
    def setUp(self):
        self.s = Session(
            'chromedriver',
            browser='chrome',
            default_timeout=15,
            webdriver_options={'arguments': ['headless', 'disable-gpu']})

    def test_cookie_transfer_to_requests(self):
        """Tested on http://testing-ground.scraping.pro/login"""

        self.s.driver.get('http://testing-ground.scraping.pro/login')
        self.s.driver.find_element_by_id('usr').send_keys('admin')
        self.s.driver.ensure_element_by_id('pwd').send_keys(
            '12345', Keys.ENTER)
        self.s.driver.ensure_element_by_xpath(
            '//div[@id="case_login"]/h3[@class="success"]')

        self.s.transfer_driver_cookies_to_session()
        response = self.s.get(
            'http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = response.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()'
        ).extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)',
            'Failed to transfer cookies from Selenium to Requests')

    def test_cookie_transfer_to_selenium(self):
        self.s.get('http://testing-ground.scraping.pro/login')
        self.s.cookies.set('tdsess',
                           'TEST_DRIVE_SESSION',
                           domain='testing-ground.scraping.pro')

        self.s.transfer_session_cookies_to_driver()
        self.s.driver.get(
            'http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = self.s.driver.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()'
        ).extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)',
            'Failed to transfer cookies from Requests to Selenium')

    def tearDown(self):
        self.s.driver.close()
示例#3
0
def main(url):
    session = Session(
        webdriver_path='../Chrome Canary/chromedriver.exe',
        browser='chrome',
        default_timeout=6,
        webdriver_options={'arguments': ['disable-logging', 'headless']})

    session.driver.get(url)
    div_content = WebDriverWait(session.driver, 5).until(
        EC.presence_of_element_located((By.XPATH, "//div[@id='content']")))
    print('######## FROM SELENIUM ########')
    print(div_content.text)

    print('######## COPYING SESSION FROM SELENIUM TO REQUESTS ########')
    session.transfer_driver_cookies_to_session()
    final_response = session.get(url,
                                 headers={'user-agent': 'custom requestium'})

    soup = BeautifulSoup(final_response.text, 'html.parser')
    print('######## FROM REQUESTS ########')
    body_text = soup.find(id="content")
    print(body_text.text)
示例#4
0
class ChromeTestCase(unittest.TestCase):
    def setUp(self):
        self.s = Session('chromedriver',
                         browser='chrome',
                         default_timeout=15,
                         webdriver_options={'arguments': ['headless', 'disable-gpu']})

    def test_cookie_transfer_to_requests(self):
        """Tested on http://testing-ground.scraping.pro/login"""

        self.s.driver.get('http://testing-ground.scraping.pro/login')
        self.s.driver.find_element_by_id('usr').send_keys('admin')
        self.s.driver.ensure_element_by_id('pwd').send_keys('12345', Keys.ENTER)
        self.s.driver.ensure_element_by_xpath('//div[@id="case_login"]/h3[@class="success"]')

        self.s.transfer_driver_cookies_to_session()
        response = self.s.get('http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = response.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()').extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)', 'Failed to transfer cookies from Selenium to Requests')

    def test_cookie_transfer_to_selenium(self):
        self.s.get('http://testing-ground.scraping.pro/login')
        self.s.cookies.set('tdsess', 'TEST_DRIVE_SESSION', domain='testing-ground.scraping.pro')

        self.s.transfer_session_cookies_to_driver()
        self.s.driver.get('http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = self.s.driver.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()').extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)', 'Failed to transfer cookies from Requests to Selenium')

    def tearDown(self):
        self.s.driver.close()
示例#5
0
def login_Getcookie(myaccount,
                    mypassword):  #webdriver_option来确定是否使用浏览器显示或者静默登陆
    rq = Session(
        webdriver_path=
        'C:\Program Files (x86)\Google\Chrome\Application\chromedriver',
        browser='chrome',
        default_timeout=15,
        #webdriver_options={'arguments': ['headless']}
    )
    rq.driver.get("https://passport.weibo.cn/signin/login")

    inputname = rq.driver.find_element("xpath", '//*[@id="loginName"]')
    password = rq.driver.find_element_by_xpath('//*[@id="loginPassword"]')
    login_button = rq.driver.find_element_by_xpath('//*[@id="loginAction"]')
    rq.driver.implicitly_wait(10)
    inputname.send_keys(myaccount)
    password.send_keys(mypassword)
    login_button.click()
    rq.driver.implicitly_wait(15)
    # ver_button = rq.driver.find_element_by_xpath('//*[@id="embed-captcha"]/div/div[2]/div[1]/div[3]')
    # ver_button.click()
    #验证码不知道啥时候会有,反正登陆几次把
    rq.transfer_driver_cookies_to_session()
    return rq
示例#6
0
class AuM(object):
    def __init__(self):
        # Create a session and authenticate
        self._s = Session(
            webdriver_path='/usr/lib/chromium-browser/chromedriver',
            browser='chrome',
            webdriver_options={"arguments": ["--headless"]})
        self._s.headers.update({
            'User-Agent':
            'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0'
        })
        self._s.get('https://www.adopteunmec.com')  # Maybe not needed
        # Register a new account
        rand_s = ''.join(
            random.choice(string.ascii_lowercase + string.digits)
            for _ in range(6))
        print('email used: francois_%[email protected]' % rand_s)
        r = self._s.post('https://www.adopteunmec.com/register/index',
                         data={
                             'sex': '1',
                             'day': '03',
                             'month': '4',
                             'year': '1997',
                             'email': '*****@*****.**' % rand_s,
                             'password': '******',
                             'password_check': 'Adottami1',
                             'country': 'fr',
                             'zipcode': '06000',
                             'city': 'Nice',
                             'confirm_city': '0',
                             'pseudo': 'RedoAA',
                             'cgu': '1',
                             'reg_submit': '',
                             'by_popup': '1',
                             'PreventChromeAutocomplete': ''
                         },
                         headers={
                             "X-Requested-With": "XMLHttpRequest",
                             "Origin": "https://www.adopteunmec.com/",
                             "Referer": "https://www.adopteunmec.com/"
                         })
        status = r.json()
        # If registration were successful, go to redirect page to confirm account
        if (status['success'] == 1):
            self._s.get(status['redirect'])
        else:
            print('Something went wrong....')

        self._common_names = (
            'loic', 'marc', 'anthony', 'tom', 'jordan', 'florian', 'jean',
            'manu', 'seb', 'alex', 'lilian', 'angelo', 'fred', 'valent',
            'fabrice', 'fabien', 'nico', 'thomas', 'sylvain', 'tim', 'karim',
            'robin', 'pierre', 'arnaud', 'max', 'luc', 'mike', 'yann', 'oliv',
            'yvan', 'jerem', 'michel', 'mat', 'kev', 'damien', 'vinc', 'eric',
            'gilles', 'jona', 'bruno', 'simon', 'adri', 'serge', 'tony', 'jul',
            'quentin', 'leo', 'step', 'gab', 'david', 'paul', 'killian',
            'alvaro', 'ronan', 'anto', 'jb', 'jp', 'jon', 'patrick', 'virgile',
            'juju', 'stef', 'franck', 'alan', 'alain', 'albin', 'alban',
            'fran', 'cyril', 'laure', 'phil', 'jacques', 'jack', 'ludo',
            'chris', 'vic', 'jo', 'charles', 'geoffrey', 'igor', 'ciro',
            'erwan', 'fabio', 'guillaume', 'thibaut', 'romain', 'rafa',
            'lionel', 'cedric', 'xavier')

    def _common_name(self, name):
        return len(
            filter(lambda x: x is False,
                   map(lambda n: name.lower().find(n) < 0,
                       self._common_names))) > 0

    def search_by_region(self, age_min=20, age_max=30, region=1, sex=0):
        return self.search({
            'age[min]': age_min,
            'age[max]': age_max,
            'by': 'region',
            'region': region,
            "sex": sex
        })

    def search_by_disance(self, age_min=20, age_max=30, distance=40, sex=0):
        return self.search({
            'age[min]': age_min,
            'age[max]': age_max,
            'by': 'distance',
            'distance[max]': distance,
            "sex": sex
        })

    def search(self, criteria=None):
        if criteria is None:
            return []

        # Go to search page
        self._s.get('https://www.adopteunmec.com/mySearch')
        # POST a request
        r = self._s.post('https://www.adopteunmec.com/mySearch/save',
                         data=criteria)

        time.sleep(3)  # Wait a bit...
        # Trasnfer cookie to selenium, refresh the page, scroll to end 10 times, and get profiles
        self._s.transfer_session_cookies_to_driver()
        self._s.driver.get('https://www.adopteunmec.com/mySearch/results')
        for i in range(10):
            self._s.driver.find_element_by_tag_name('html').send_keys(Keys.END)
            time.sleep(.1)
        html = BeautifulSoup(
            self._s.driver.execute_script("return document.body.innerHTML"),
            'lxml')
        self._s.transfer_driver_cookies_to_session()
        self._s.driver.close()  # Might be done before ?

        # Look for <div> tags containing user info
        blocks = html.find_all('div', {'class': 'user-infos'})
        # Get all <a> tags in a same list
        all_a = [a for sl in [b.find_all('a') for b in blocks] for a in sl]
        # Extract profiles ID doing common name checks to avoid visit too may profiles later
        profiles = [
            l.get('href').split('/')[-1] for l in all_a
            if isinstance(l.get('href'), str)
            and l.get('href').find('profile') > 0 and len(l.get_text()) > 2
            and not self._common_name(l.get_text())
        ]
        return profiles

    def update_db(self,
                  profiles=[],
                  max_p=None,
                  filename='data/justemenemoi.json'):
        db = {}
        try:
            with open(filename, 'r') as in_f:
                db = json.load(in_f)
        except:
            pass

        visited = 0
        for uid in profiles:
            # Check if profile already in db
            if uid not in db:
                if max_p is not None and visited >= max_p:
                    break
                visited += 1

                url = "https://www.adopteunmec.com/profile/" + uid
                page = self._s.get(url)
                html = BeautifulSoup(
                    page.content.decode('utf-8', 'xmlcharrefreplace'))

                name = html.find('div', {'class': 'username'}).get_text()
                desc = html.find(text='Description').find_parent('div').find(
                    'p').get_text()
                shop = html.find(text='Shopping List').find_parent('div').find(
                    'p').get_text()
                # Profile Filtering
                if desc.find("non renseign") >= 0 or shop.find(
                        "non renseign") >= 0 or len(desc) < 20 or len(
                            shop) < 20:
                    continue

                img_url = html.find(id='img-current-pic')['src']
                img_name = img_url.split('/')[-1]
                db[uid] = {
                    "profile": url,
                    "name": name,
                    "img": img_name,
                    "age": html.find('span', {
                        'class': 'age'
                    }).get_text(),
                    "city": html.find('span', {
                        'class': 'city'
                    }).get_text(),
                    "desc": desc,
                    "shop": shop
                }

                # Download and save profile pic
                pic = self._s.get(img_url, stream=True)
                pic.raw.decode_content = True
                with open("data/pics/" + img_name, 'wb') as f:
                    shutil.copyfileobj(pic.raw, f)

                time.sleep(.5)  # Bit of rest...

        # Write back json
        json_s = json.dumps(
            db)  # Dump as a string, to write to file and as JS var
        with open(filename, 'w') as out_f:
            out_f.write(json_s)
        with open(filename + '.js', 'w') as out_f:
            out_f.write("data = ")
            out_f.write(json_s)
class Downloader():
    def __init__(self,
                 username,
                 password,
                 driver_path=None,
                 download_path=None,
                 headless=True,
                 logger=None):
        if not logger:
            logging.basicConfig(level=logging.DEBUG)
            self.logger = logging.getLogger(__name__)
            self.logger.setLevel('DEBUG')
        else:
            self.logger = logger
        self._username = username
        self._password = password
        self.driver_path = driver_path
        self.download_path = download_path
        self.logger = logging.getLogger('odigo_downloader.downloader')
        self.url = 'https://enregistreur.prosodie.com/odigo4isRecorder/EntryPoint?serviceName=LoginHandler'
        self.headless = headless
        self.validated = False
        self.active = False

    def __str__(self):
        return f"\nDOWNLOAD PATH: {self.download_path}\nOPTIONS: {self.webdriver_options}\n" \
            f"DRIVER PATH: {self.driver_path}\nUSERNAME: {self._username}\nURL: {self.url}"

    def setup_selenium_browser(self):
        if self.active:
            return f"Session/Browser already active. Cannot have two concurrent sessions/browsers"
        options = webdriver.ChromeOptions()
        prefs = {
            'download.default_directory': self.download_path,
            'download.prompt_for_download': False,
            'download.directory_upgrade': True,
            'safebrowsing.enabled': False,
            'safebrowsing.disable_download_protection': True
        }
        options.add_experimental_option('prefs', prefs)

        if self.headless:
            options.add_argument('--headless')

        self.browser = webdriver.Chrome(self.driver_path, options=options)

        if self.headless:
            self.browser.command_executor._commands["send_command"] = (
                "POST", '/session/$sessionId/chromium/send_command')
            params = {
                'cmd': 'Page.setDownloadBehavior',
                'params': {
                    'behavior': 'allow',
                    'downloadPath': self.download_path
                }
            }
            command_result = self.browser.execute("send_command", params)
            for key in command_result:
                self.logger.debug("result:" + key + ":" +
                                  str(command_result[key]))

        self.active = True

    def setup_requestium_session(self):
        if self.active:
            return f"Session/Browser already active. Cannot have two concurrent sessions/browsers"
        if self.headless:
            webdriver_options = {'arguments': ['headless']}
        else:
            webdriver_options = {}
        self.logger.debug(
            f"Creating Session object with values: {webdriver_options}")
        self.session = Session(webdriver_path=self.driver_path,
                               browser='chrome',
                               default_timeout=15,
                               webdriver_options=webdriver_options)
        self.active = True

    def login_requestium(self):
        if self.active:
            raise CustomException(f"Cannot have two active sessions/browsers")
        self.setup_requestium_session()
        self.logger.debug(f"Going to URL: {self.url}")
        self.session.driver.get(self.url)
        self.logger.debug(f"Entering credentials")
        self.session.driver.ensure_element_by_name('mail').send_keys(
            self._username)
        self.session.driver.ensure_element_by_name('password').send_keys(
            self._password)
        self.session.driver.ensure_element_by_name('valider').click()
        self.validated = True

    def login_selenium(self):
        if self.active:
            raise CustomException(f"Cannot have two active sessions/browsers")
        self.setup_selenium_browser()
        self.browser.get(self.url)
        self.browser.find_element_by_name('mail').send_keys(username)
        self.browser.find_element_by_name('password').send_keys(password)
        self.browser.find_element_by_name('valider').click()
        return

    def download_mp3(self, path=None, ref=None, xpath=None):
        self.logger.info(
            f"\ndownload_mp3 called with:\nPATH: {path},\nREF: {ref},\nXPATH: {xpath}"
        )
        if ref is not None and xpath is None:
            self.session.driver.ensure_element_by_class_name(
                'x-action-col-icon').click()
        elif xpath is not None and ref is None:
            self.session.driver.ensure_element_by_xpath(xpath).click()
        else:
            self.logger.error("Cannot use both reference number and xpath")
            return
        self.session.driver.switch_to.frame('result_frame')
        time.sleep(1)
        # Get URL of mp3 file
        src = self.session.driver.ensure_element_by_id(
            'messagePlayer').get_attribute('src')
        # Selenium --> Requests
        self.session.transfer_driver_cookies_to_session()
        # Download
        r = self.session.get(src, stream=True)
        if path is None:
            if ref is None:
                # Get ref number
                soap = BeautifulSoup(self.session.driver.page_source, 'lxml')
                ref = soap.findAll('div', class_='x-grid-cell-inner')[1].text
            path = '%s.mp3' % ref
        if r.status_code == 200:
            with open(path, 'wb') as f:
                for chunk in r.iter_content(1024 * 2014):
                    f.write(chunk)
        else:
            return 1
        # Requests --> Selenium
        self.session.transfer_session_cookies_to_driver()
        self.session.driver.switch_to.default_content()
        return

    def download_mp3_by_ref(self, ref, path=None):
        self.login_requestium()
        self.search_by_ref(ref)
        result = self.download_mp3(path, ref)
        if result == 1:
            return 1
        self.session.driver.close()

    def download_mp3_by_csv(self, csv_path, download_dir=None):
        if download_dir is None:
            download_dir = self.download_path
        self.login_requestium()
        refs = pd.read_csv(csv_path, sep=';').Name
        length = len(refs)
        for i, ref in enumerate(refs):
            sys.stdout.write('\r')
            sys.stdout.write('downloading: %s/%s' % (i + 1, length))
            sys.stdout.flush()
            self.search_by_ref(ref)
            mp3_path = None
            if download_dir is not None:
                file_name = '%s.mp3' % ref
                mp3_path = os.path.join(download_dir, file_name)
            result = self.download_mp3(path=mp3_path, ref=ref)
            if result == 1:
                return 1
        sys.stdout.write('\n')
        sys.stdout.flush()
        self.session.driver.close()
        return "Finished"

    def search_by_ref(self, ref):
        self.session.driver.get(self.url)
        self.session.driver.ensure_element_by_name('refEr').send_keys(ref)
        self.session.driver.ensure_element_by_id('button-1009').click()

    def change_date_format(self, date):
        try:
            correct_string = date.strptime(str(date.date()),
                                           '%Y-%m-%d').strftime('%d-%m-%Y')
            return correct_string
        except Exception as e:
            raise e

    def change_time_format(self, date):
        try:
            correct_string = date.strptime(
                str(date.hour) + ':' + str(date.minute),
                "%H:%M").strftime("%I:%M %p")
            if correct_string[0] == "0":
                return correct_string[1::]
            else:
                return correct_string
        except Exception as e:
            raise e

    def ceil_dt(self, dt, delta):
        """Round up to the nearest half hour"""
        return dt + (datetime.datetime.min - dt) % delta

    def set_range(self, now):
        """
        Takes current datetime and finds the nearest, previous half hour.
        Returns the appropriate start and end times and date
        """
        # Format: '10-19-2018'
        # Format: '12:00 AM'
        hour_ago = now - datetime.timedelta(minutes=60)
        rounded = self.ceil_dt(hour_ago, datetime.timedelta(minutes=30))

        start_date = self.change_date_format(rounded)
        start_time = self.change_time_format(rounded)
        thirty_mins = datetime.timedelta(minutes=30)
        end_date = start_date
        end_time = self.change_time_format(rounded + thirty_mins)
        return (start_date, start_time, end_date, end_time)

    def search_by_range(self, start_date, start_time, end_date, end_time):
        """ Doesn't work correctly. Date seems to work but time not so much.

        Search records on www.prosodie.com by date range and return session.
        Input:
            s -- Requestium session (required |
                type: requestium.requestium.Session);
            start_date -- start date (not required | type: str). Format:
                        'mm:dd:yyyy'. Example: '03-05-1991';
            start_time -- start time (not required | type: str). Example:
                        '12:00 AM';
            end_date -- end date (not required | type: str). Format:
                        'mm:dd:yyyy'. Example: '03-05-1991';
            end_time -- end time (not required | type: str). Example: '12:00 PM'.
        Output:
            s -- Requestium session (type: requestium.requestium.Session).

        """
        if start_date:
            self.browser.find_element_by_name('dateDebut').send_keys(
                start_date)
        if start_time:
            self.browser.find_element_by_name('heureDebut').send_keys(
                start_time)
        if end_date:
            self.browser.find_element_by_name('dateFin').send_keys(end_date)
        if end_time:
            self.browser.find_element_by_name('heureFin').send_keys(end_time)
        self.browser.find_element_by_id('button-1009').click()
        return

    def download_all_half_hour(self):
        self.logger.debug(f"Downloading calls from last half hour")
        self.logger.debug(f"Login check...")
        if not self.validated:
            self.logger.debug(f"Not logged in. Validating")
            self.login_selenium()
        self.logger.debug(f"Logged in.")
        self.logger.debug(f"Getting search range")
        search_range = self.set_range(datetime.datetime.now())
        sleep(2)
        self.logger.debug(f"Applying filters")
        self.browser.find_element_by_id("criteres-inputEl").send_keys('_EN')
        self.search_by_range(*search_range)
        sleep(5)
        self.logger.debug(f"Downloading results to {self.download_path}")
        csvB = self.browser.find_element_by_id("csvButton")
        csvB.click()
        self.browser.find_element_by_id("button-1006").click()
        self.browser.switch_to.window(self.browser.window_handles[1])
        sleep(5)
        self.logger.debug(f"Ending session")
示例#8
0
print('Waiting for elements to load...')
s.driver.ensure_element_by_class_name(
    "desktop-onboarding-sign-up__form-toggler", state='visible').click()

if reddit_user_name:
    s.driver.ensure_element_by_id('user_login').send_keys(reddit_user_name)
    s.driver.ensure_element_by_id('passwd_login').send_keys(Keys.BACKSPACE)
print('Please log-in in the chrome browser')

s.driver.ensure_element_by_class_name("desktop-onboarding__title",
                                      timeout=60,
                                      state='invisible')
print('Thanks!')

if not reddit_user_name:
    reddit_user_name = s.driver.xpath(
        "//span[@class='user']//text()").extract_first()

if reddit_user_name:
    s.transfer_driver_cookies_to_session()
    response = s.get(
        "https://www.reddit.com/user/{}/".format(reddit_user_name))
    cmnt_karma = response.xpath(
        "//span[@class='karma comment-karma']//text()").extract_first()
    reddit_golds_given = response.re_first(r"(\d+) gildings given out")
    print("Comment karma: {}".format(cmnt_karma))
    print("Reddit golds given: {}".format(reddit_golds_given))
else:
    print("Couldn't get user name")
示例#9
0
def gen(num, limit):
    s = Session(webdriver_path='chromedriver.exe', browser='chrome')
    s.driver.get("https://privacy.com/login")
    time.sleep(3)
    s.driver.find_element_by_xpath(
        '//*[@id="steps"]/div/form/div[2]/label[1]/input').send_keys(
            config['username'])
    s.driver.find_element_by_xpath(
        '//*[@id="steps"]/div/form/div[2]/label[2]/input').send_keys(
            config['password'])
    time.sleep(1)
    s.driver.find_element_by_xpath(
        '//*[@id="steps"]/div/form/div[3]/button').click()
    time.sleep(2)
    s.transfer_driver_cookies_to_session()
    s.driver.quit()
    url1 = "https://privacy.com/api/v1/card"

    for i in range(int(num)):
        h1 = {
            'Accept':
            'application/json, text/plain, */*',
            'Accept-Encoding':
            'gzip, deflate, br',
            'Accept-Language':
            'en-US,en;q=0.9',
            'Authorization':
            'Bearer {}'.format(s.cookies['token']),
            'Connection':
            'keep-alive',
            'Content-Type':
            'application/json;charset=UTF-8',
            'Cookie':
            'sessionID={}; ETag="ps26i5unssI="; waitlist_cashback=%7B%22refByCode%22%3A%22favicon.ico%22%2C%22isPromotional%22%3Afalse%7D; landing_page=extension-rewards-landing; token={}'
            .format(s.cookies['sessionID'], s.cookies['token']),
            'Host':
            'privacy.com',
            'Origin':
            'https://privacy.com',
            'Pragma':
            'no-cache',
            'Referer':
            'https://privacy.com/home',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
        }

        pay = {
            "type": "MERCHANT_LOCKED",
            "spendLimitDuration": "MONTHLY",
            "memo": genName(),
            "meta": {
                "hostname": ""
            },
            "style": 'null',
            "spendLimit": int(limit),
            "reloadable": 'true'
        }

        r = s.post(url1, json=pay, headers=h1)

        if r.status_code == requests.codes.ok:
            print("[{}] !~Created Card~!".format(r.json()['card']['cardID']))
            with open("cards.txt", "a+") as file:
                file.write("{}:{}/{}:{}\n".format(r.json()['card']['pan'],
                                                  r.json()['card']['expMonth'],
                                                  r.json()['card']['expYear'],
                                                  r.json()['card']['cvv']))
        else:
            print("Error Creating Card")
class Driver(object):
    def __init__(self):
        # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式
        self.s = Session(
            webdriver_path='./chromedriver',
            browser='chrome',
            default_timeout=15,
            #webdriver_options={'arguments': ['headless']}
        )
        self.category_mapping = None

        path = os.path.join(os.getcwd(), FILENAME)
        if os.path.exists(path):
            self.category_mapping = ujson.load(open(path))
            #pprint(self.category_mapping)

    def close(self):
        if self.s.driver is not None:
            self.s.driver.quit()
        if self.s is not None:
            self.s.close()

    def login(self):
        """
        使用driver登录到启信宝
        """
        login_url = 'http://www.qixin.com/auth/login?return_url=%2F'
        self.s.driver.get(login_url)

        # 使用requestium中的ensure_*方法定位元素
        username_xpath = '//input[@class="form-control input-lg input-flat input-flat-user"]'
        user_element = self.s.driver.ensure_element_by_xpath(username_xpath)
        for c in USERNAME:
            # 间歇输入Username和Password
            user_element.send_keys(c)
            time.sleep(random.randint(0, 2))

        password_xpath = '//input[@class="form-control input-lg input-flat input-flat-lock"]'
        password_element = self.s.driver.ensure_element_by_xpath(
            password_xpath)
        for c in PASSWORD:
            password_element.send_keys(c)
            time.sleep(random.random())
        password_element.send_keys(Keys.ENTER)
        self.s.driver.implicitly_wait(10)

    def process_cookies(self):
        """
        使用requests抓取页面
        """
        # 将driver的cookies转给requests的session
        tmp_url = 'http://www.qixin.com/search?area.province=12&page=1&scope[]=1'
        self.s.driver.get(tmp_url)
        self.s.transfer_driver_cookies_to_session()
        self.s.copy_user_agent_from_driver()

        # 判断category mapping是否存在
        if self.category_mapping is None:
            req = self.s.get('http://www.qixin.com')
            self.category_mapping = {}
            for element in req.xpath('//div[@class="grid-item"]'):
                category_l1 = element.xpath(
                    './div/text()').extract_first().strip()
                category_l2 = element.xpath('./a/text()').extract()
                self.category_mapping[category_l1] = category_l2
                ujson.dump(self.category_mapping,
                           open(os.path.join(os.getcwd(), FILENAME), 'w'))

    def fetch_page(self):
        # 获取cookies之后,使用requests的session开始抓取数据
        result = []
        self.s.proxies.update({
            'http': 'http://forward.xdaili.cn:80',
            'https': 'https://forward.xdaili.cn:80'
        })
        for page in range(1, 11):
            url = 'http://www.qixin.com/search?area.province=12&page=%s&scope[]=1&sorter=4' % page
            self.s.headers.update({'Proxy-Authorization': sign()})
            req = self.s.get(url)
            for element in req.xpath(
                    "//div[contains(@class, 'company-item')]"):
                result.append({
                    'title':
                    element.xpath(".//div[@class='company-title']/a/text()"
                                  ).extract_first().strip(),
                    'legal_owner':
                    element.xpath(".//div[@class='legal-person'][1]/text()"
                                  ).re_first(r'法定代表人:(\w*)').strip(),
                    'status':
                    element.xpath(
                        ".//div[@class='company-tags']/span[1]/text()").
                    extract_first().strip(),
                    'capital':
                    element.xpath(".//div[contains(@class, 'col-3-1')]/text()"
                                  ).extract_first().strip(),
                    'date':
                    element.xpath(".//div[contains(@class, 'col-3-2')]/text()"
                                  ).extract_first().strip(),
                    'url':
                    element.xpath(".//div[@class='company-title']/a/@href"
                                  ).extract_first().strip()
                })
            time.sleep(10)
        return result

    def process_search_condition(self):
        """
        构建搜索条件
        * URL: http://www.qixin.com/search?
        * param 地区: area.province=12, area.district=120101-120119
        * param 搜索范围: scope[]=1
        * param 排序: sorter=3 | 4
        * param 注册资本: capital: 1-5
        * param 所属行业: industry.l1 一级行业, industry.l2 二级行业
        * param 注册年份: year: 1-5
        * param page: 页码,最大不超过500, 只能看5000条搜索结果
        http://www.qixin.com/search?area.district=120101&area.province=12&capital=2&industry.l1=%E5%86%9C%E3%80%81%E6%9E%97%E3%80%81%E7%89%A7%E3%80%81%E6%B8%94%E4%B8%9A&industry.l2=%E5%86%9C%E4%B8%9A&page=1&scope[]=1&sorter=4&year=5
        """
        pass
示例#11
0
class Driver(object):
    def __init__(self):
        # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式
        self.s = Session(
            webdriver_path='./chromedriver',
            browser='chrome',
            default_timeout=15,
            #webdriver_options={'arguments': ['headless']}
        )
        # self.category_mapping = None

        # path = os.path.join(os.getcwd(), FILENAME)
        # if os.path.exists(path):
        #     self.category_mapping = ujson.load(open(path))
        #     pprint(self.category_mapping)

    def close(self):
        if self.s.driver is not None:
            self.s.driver.quit()
        if self.s is not None:
            self.s.close()

    def login(self):
        """
        使用driver登录到启信宝
        """
        login_url = 'http://www.qixin.com/auth/login?return_url=%2F'
        self.s.driver.get(login_url)

        # 使用requestium中的ensure_*方法定位元素
        user_element = self.s.driver.ensure_element_by_xpath(
            LOGIN_XPATH['username'])
        for c in USERNAME:
            # 间歇输入Username和Password
            user_element.send_keys(c)
            time.sleep(random.randint(0, 2))

        password_element = self.s.driver.ensure_element_by_xpath(
            LOGIN_XPATH['password'])
        for c in PASSWORD:
            password_element.send_keys(c)
            time.sleep(random.random())
        password_element.send_keys(Keys.ENTER)
        self.s.driver.implicitly_wait(20)

    def process_cookies(self):
        """
        使用requests抓取页面
        """
        # 将driver的cookies转给requests的session
        tmp_url = 'http://www.qixin.com/search?area.province=12&page=1&scope[]=1'
        self.s.driver.get(tmp_url)
        self.s.transfer_driver_cookies_to_session()
        self.s.copy_user_agent_from_driver()

        # 判断category mapping是否存在
        if self.category_mapping is None:
            req = self.s.get('http://www.qixin.com')
            self.category_mapping = {}
            for element in req.xpath(CATEGORY_XPATH['info']):
                category_l1 = element.xpath(
                    CATEGORY_XPATH['l1']).extract_first().strip()
                category_l2 = element.xpath(CATEGORY_XPATH['l2']).extract()
                self.category_mapping[category_l1] = category_l2
                ujson.dump(self.category_mapping,
                           open(os.path.join(os.getcwd(), FILENAME), 'w'))

    def fetch_page_with_chrome(self, url):
        self.s.transfer_session_cookies_to_driver()
        self.s.driver.get(url)

    def fetch_page_with_requests(self, url):
        """
        url = 'http://www.qixin.com/search?area.province=12&page=%s&scope[]=1&sorter=4' % page
        :param url:请求的URL
        :param return: 返回list
        """
        # 获取cookies之后,使用requests的session开始抓取数据
        self.s.proxies.update({
            'http': 'http://forward.xdaili.cn:80',
            'https': 'https://forward.xdaili.cn:80'
        })
        self.s.headers.update({'Proxy-Authorization': sign()})
        req = self.s.get(url)
        result = parse_list(req)
        return result
示例#12
0
class Github:
    def __init__(self, proxy=None):
        self.cookies = None
        self.sess = Session("/usr/local/phantomjs",
                            "phantomjs",
                            default_timeout=15)
        if proxy:
            self.sess.proxies['https'] = proxy
            self.sess.proxies['http'] = proxy
        self.proxy = proxy
        self.sess = Session(webdriver_path='/usr/local/bin/chromedriver',
                            browser='phantomjs',
                            default_timeout=15,
                            webdriver_options={'arguments': ['headless']})
        if proxy:
            self.proxies['http'] = proxy
            self.proxies['https'] = proxy
        self.user = None

    def save_session(self, name, password, cookie):
        gprint("save cred and session")
        with open(GITHUB_LOGIN, "wb") as fp:
            u = {"user": name, "pass": password}
            pickle.dump(u, fp)

        with open(GITHUB_SESSION, 'wb') as fp:
            pickle.dump(cookie, fp)

    def load_session(self):
        gprint("load seesion form github")
        if os.path.exists(GITHUB_SESSION):
            with open(GITHUB_SESSION, 'rb') as fp:
                self.cookies = pickle.load(fp)
                self.sess.cookies.update(self.cookies)
                self.sess.get("https://github.com")
                self.sess.transfer_session_cookies_to_driver()

            with open(GITHUB_LOGIN, 'rb') as fp:
                u = pickle.load(fp)
                self.user = u['user']

        elif os.path.exists(GITHUB_LOGIN):
            with open(GITHUB_LOGIN, 'rb') as fp:
                u = pickle.load(fp)
                self.login(name=u['user'], password=u['pass'])
        else:
            name = input('Github name:')
            passwd = getpass.getpass("Github pass:"******"https://github.com/login")
        self.sess.driver.find_element_by_css_selector(
            "input[name=login]").send_keys(name)
        self.sess.driver.find_element_by_css_selector(
            "input[name=password]").send_keys(password)
        self.sess.driver.find_element_by_css_selector(
            "input[name=commit]").click()

        self.sess.transfer_driver_cookies_to_session()
        self.cookies = self.sess.cookies.get_dict()
        gprint(str(self.cookies))
        self.save_session(name, password, self.cookies)

    def weak_search(self, key):
        self.load_session()
        self.search(key, "smtp")
        self.search(key, "ssh")
        # with ThreadPoolExecutor(max_workers=10) as exe:
#
# for k in ['smtp', 'ssh', 'email']:
#
# s1 = exe.submit(self.search,key, k)
# s1.add_done_callback(print)

    def search(self, *key):
        gprint(key[-1])
        if not self.cookies:
            self.load_session()

        res = requests.get("https://github.com/{}/product".format(self.user))
        self.cookies = res.cookies.get_dict()
        gprint(str(self.cookies))
        url = "https://github.com/search?q={}&type=code".format("+".join(key))
        self.sess.driver.get(url)
        res = self.sess.driver.page_source
        b = BeautifulSoup(res, 'lxml')

        codes = b.select(".code-list-item")
        if len(codes) > 0:
            gprint("Found : %d" % len(codes))
        else:
            gprint("Not found:")
            rprint(b.text.replace("\n", ""))
            # for i in b.select("a"):
            # gprint(str(i))
        ss = {}
        for code in codes:

            k = code.select(".text-bold")[0].text
            v = {
                colored(str(n), 'green'): i.text.replace("\n", "")
                for n, i in enumerate(code.select("td.blob-code"))
            }
            gprint(colored(k, "blue"))
            Tprint(v)
示例#13
0
class Charme(object):
    def __init__(self):
        # Create a session and authenticate
        self._s = Session(
            webdriver_path='/usr/lib/chromium-browser/chromedriver',
            browser='chrome')  #,
        #webdriver_options={"arguments": ["--headless"]})
        self._s.headers.update({
            'User-Agent':
            'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0'
        })

        # Login
        r = self._s.post('https://www.adopteunmec.com/auth/login',
                         data={
                             'username': '******',
                             'password': '******'
                         })
        if not r.ok:
            raise 'Something wrong in login'
        else:
            time.sleep(2)

    def search_by_distance(self, age_min=20, age_max=30, distance=40, sex=1):
        return self.search({
            'age[min]': age_min,
            'age[max]': age_max,
            'by': 'distance',
            'distance[max]': distance,
            "sex": sex
        })

    def search(self, criteria=None):
        if criteria is None:
            return []

        # Go to search page
        self._s.get('https://www.adopteunmec.com/mySearch')
        time.sleep(1)
        # POST a request
        r = self._s.post('https://www.adopteunmec.com/mySearch/save',
                         data=criteria)

        time.sleep(3)  # Wait a bit...
        # Trasnfer cookie to selenium, refresh the page, scroll to end 10 times, and get profiles
        self._s.transfer_session_cookies_to_driver()
        self._s.driver.get('https://www.adopteunmec.com/mySearch/results')
        for i in range(10):
            self._s.driver.find_element_by_tag_name('html').send_keys(Keys.END)
            time.sleep(.1)
        html = BeautifulSoup(
            self._s.driver.execute_script("return document.body.innerHTML"),
            'lxml')
        self._s.transfer_driver_cookies_to_session()
        self._s.driver.close()  # Might be done before ?

        # Look for <div> tags containing user info
        blocks = html.find_all('div', {'class': 'user-infos'})
        # Get all <a> tags in a same list
        all_a = [a for sl in [b.find_all('a') for b in blocks] for a in sl]
        # Extract profiles ID doing common name checks to avoid visit too may profiles later
        profiles = [
            l.get('href').split('/')[-1] for l in all_a
            if isinstance(l.get('href'), str)
            and l.get('href').find('profile') > 0 and len(l.get_text()) > 2
        ]
        return profiles

    def charme(self, profiles=[], max_p=10, filename='data/charme.json'):
        db = {}
        try:
            with open(filename, 'r') as in_f:
                db = json.load(in_f)
        except:
            pass

        visited = 0
        for uid in profiles:
            # Check if profile already in db
            if uid not in db:
                if max_p is not None and visited >= max_p:
                    break
                visited += 1

                url = "https://www.adopteunmec.com/profile/" + uid
                print "Visiting", url
                page = self._s.get(url)
                html = BeautifulSoup(
                    page.content.decode('utf-8', 'xmlcharrefreplace'), 'lxml')

                img_url = html.find(id='img-current-pic')['src']
                img_name = img_url.split('/')[-1]
                date = datetime.datetime.now().strftime("%m-%d %H:%M")
                db[uid] = {
                    "profile":
                    url,
                    "name":
                    html.find('div', {
                        'class': 'username'
                    }).get_text(),
                    "img":
                    img_name,
                    "age":
                    html.find('span', {
                        'class': 'age'
                    }).get_text(),
                    "city":
                    html.find('span', {
                        'class': 'city'
                    }).get_text(),
                    "desc":
                    html.find(text='Description').find_parent('div').find(
                        'p').get_text(),
                    "shop":
                    html.find(text='Shopping List').find_parent('div').find(
                        'p').get_text(),
                    "charmed":
                    date
                }

                # Download and save profile pic
                pic = self._s.get(img_url, stream=True)
                pic.raw.decode_content = True
                with open("data/pics/" + img_name, 'wb') as f:
                    shutil.copyfileobj(pic.raw, f)

                time.sleep(20)  # Bit of rest...

                # Send a charme
                url = "https://www.adopteunmec.com/events/charm?id=" + uid
                r = self._s.get(url)
                if r.json()['member']['id'] != uid:
                    raise 'Something wrong in response'

        # Write back json
        json_s = json.dumps(
            db)  # Dump as a string, to write to file and as JS var
        with open(filename, 'w') as out_f:
            out_f.write(json_s)