def get_token(self):
        """ Generate the token that will be used for case detail api

        This function returns an object.
        """
        # get site_key for google recaptcha

        try:
            site_key = self.GLOBAL_SESSION.get(self.SITE_KEY_URL).text.replace(
                '"', '').strip()
        except requests.ConnectionError as e:
            print("Connection failure : " + str(e))
            print("Verification with InsightFinder credentials Failed")
            return {'error': str(e)}
        print(site_key)

        # get captcha_id with captcha_key, site_key and page_url on the website that has CAPTCHAs
        recaptcha_answer = get_recaptcha_answer(site_key, self.SEARCH_URL)
        print(recaptcha_answer)

        # get case_token with recaptcha_answer

        self.GLOBAL_SESSION = InitializedSession(
            headers={'recaptcha': recaptcha_answer})
        try:
            r = self.GLOBAL_SESSION.get(self.TOKEN_URL)
        except requests.ConnectionError as e:
            print("Connection failure : " + str(e))
            print("Verification with InsightFinder credentials Failed")
            return {'error': str(e)}

        if 'token' in json.loads(r.text):
            return {'token': json.loads(r.text)['token']}
        else:
            return {'token': ''}
    def get_cookie(self, input_string):
        """ Get cookie from Search Result Page by using firstName and lastName.

        input_string(firstName+lastName) will be entered to Search Input in Search Page of the website automatically on Selenium chromedriver.
        To submit the Search Form with this input, we should pass the google recaptcha with sitekey.
        In this website, they don't send the input string like firstName and lastName with form_data or parameters.
        They get that information from cookie that is returned by server.(when we click submit button)

        This function returns an object.
        """
        driver = webdriver.Chrome('./chromedriver.exe')
        # driver = webdriver.Chrome(ChromeDriverManager().install())

        driver.get(self.HOME_URL)

        # fill up the search input with input_string
        search_form = driver.find_element_by_css_selector(
            '#SearchCriteriaContainer input.form-control')
        driver.execute_script(
            """arguments[0].value = '{}'""".format(input_string), search_form)

        # get site_key for google recaptcha
        site_key = driver.find_element_by_class_name(
            'g-recaptcha').get_attribute('data-sitekey')
        # get recaptcha_answer with 2captcha service
        recaptcha_answer = get_recaptcha_answer(site_key, self.HOME_URL)
        print(recaptcha_answer)

        # fill up the recaptcha_answer to recaptcha_response textarea to overcome the recaptcha
        recaptcha_response = driver.find_element_by_class_name(
            'g-recaptcha-response')
        driver.execute_script(
            """arguments[0].innerHTML = '{}'""".format(recaptcha_answer),
            recaptcha_response)

        # go to the search results page by clicking submit button
        submit_button = driver.find_element_by_css_selector('#btnSSSubmit')
        submit_button.click()

        # build the cookie list
        cookies_list = driver.get_cookies()
        cookies = {}
        for cookie in cookies_list:
            cookies[cookie['name']] = cookie['value']
        print(cookies)
        return cookies
示例#3
0
    def get_case_detail(self, case_number, page_number, last_name, first_name):
        """ Get every information of case detail with given case number and page number

        This function returns an object.
        """
        recaptcha_answer = get_recaptcha_answer(
            self.SITE_KEY, self.CASE_DETAIL_URL)
        print(recaptcha_answer)
        LUA_SCRIPT = '''
            function main(splash)
                assert(splash:autoload("https://code.jquery.com/jquery-2.1.3.min.js"))
                treat = require("treat")
                local url = splash.args.url
                local case_number = splash.args.case_number
                local page = splash.args.page
                local captcha_response = splash.args.captcha_response

                assert(splash:go(url))
                assert(splash:wait(5))
                local form = splash:select('#frmDefault')
                local values = assert(form:form_values())
                values.txtDKTNAME = splash.args.name
                assert(form:fill(values))
                local element = splash:select('#btnGO')
                local bounds = element:bounds()
                assert(element:mouse_click{x=bounds.width/3, y=bounds.height/3})
                assert(splash:wait(1))
                local page_links = splash:select_all('tr.mypager:nth-child(1) td')
                local cases = {}
                local case_count = 0

                if page > 1 then
                    local element = splash:select('tr.mypager:nth-child(1) td:nth-child('..page..') a')
                    if element then
                        local bounds = element:bounds()
                        assert(element:mouse_click{x=bounds.width/3, y=bounds.height/3})
                        assert(splash:wait(1))
                    end
                end
                local rows = splash:select_all('table#gvDocket > tbody > tr')
                for j, row in ipairs(rows) do
                    local case_number_element = splash:select('table#gvDocket > tbody > tr:nth-child('..j..')  > td:nth-child(2) span')
                    if case_number_element then
                        if case_number_element:text() == case_number then
                            local view_button = splash:select('table#gvDocket > tbody > tr:nth-child('..j..')  > td:nth-child(1) input')
                            local bounds = view_button:bounds()
                            assert(view_button:mouse_click{x=bounds.width/3, y=bounds.height/3})
                            assert(splash:wait(1))
                        end
                    end
                end
                local sitekey = splash:select('div.g-recaptcha'):getAttribute('data-sitekey')
                assert(splash:wait(1))
                local form1 = splash:select('#Form1')
                local values1 = form1:form_values()
                values1['g-recaptcha-response'] = captcha_response
                assert(form1:fill(values1))
                assert(form1:submit())
                assert(splash:wait(1))
                return {
                    url = splash:url(),
                    html = splash:html(),
                    sitekey=sitekey,
                    values=values1
                }
            end
            '''
        try:
            r = self.GLOBAL_SESSION.post(SPLASH_URL, auth=(SPLASH_USERNAME, SPLASH_PASSWORD),
                                         json={'url': self.SEARCH_URL,
                                               'lua_source': LUA_SCRIPT,
                                               'case_number': case_number,
                                               'page': page_number + 1,
                                               'captcha_response': recaptcha_answer,
                                               'name': last_name + '/' + first_name
                                               })
            if 'html' in json.loads(r.text):
                return self.parse_case_detail(BeautifulSoup(json.loads(r.text)['html'], features="html.parser"))
            else:
                return {}
        except requests.ConnectionError as e:
            print("Connection failure : " + str(e))
            print("Verification with InsightFinder credentials Failed")
            return {'error': str(e)}
    def search_in_orange_fl(self, first_name, last_name, dob):
        """ Scrape the web site using the given search criteria.

        This function either returns an object with
        a field called "result" which is an array of cases, or
        an object with a field called "error" with a error string
        e.g. { "result": [...] } or { "error": "..." }
        """

        first_name = NameNormalizer(first_name).normalized()
        last_name = NameNormalizer(last_name).normalized()
        if dob:
            dob = dob.strip()
        self.FIRST_NAME = first_name
        self.LAST_NAME = last_name
        self.DOB = dob

        try:
            r = self.GLOBAL_SESSION.get(self.SEARCH_URL)
            soup = BeautifulSoup(r.text, features="html.parser")
            if soup.find('form', class_='form-horizontal'):
                self.SITE_KEY = soup.find(
                    'div', class_='g-recaptcha').attrs['data-sitekey']
                captcha_response = get_recaptcha_answer(
                    self.SITE_KEY, self.SEARCH_URL)
            else:
                return {'error': 'Server Error'}

            LUA_SCRIPT = '''
                function main(splash)
                    assert(splash:autoload("https://code.jquery.com/jquery-2.1.3.min.js"))
                    treat = require("treat")

                    assert(splash:go(splash.args.url))
                    assert(splash:wait(2))
                    assert(splash:runjs('$("input[name=FirstName]").val("'..splash.args.first_name..'")'))
                    assert(splash:runjs('$("input[name=LastName]").val("'..splash.args.last_name..'")'))
                    assert(splash:runjs('$("textarea[name=g-recaptcha-response]").val("'..splash.args.captcha_response..'")'))
                    assert(splash:wait(1))
                
                    local form = splash:select('.form-horizontal')
                    local values = assert(form:form_values())
                    assert(form:submit())
                    assert(splash:wait(5))
                    local search_input = splash:select('input[type=search]')
                    search_input:send_text(splash.args.dob)
                    assert(splash:wait(3))
                    return {
                        url = splash:url(),
                        html = splash:html(),
                        values = values
                    }
                end
            '''

            r = self.GLOBAL_SESSION.post(SPLASH_URL,
                                         auth=(SPLASH_USERNAME,
                                               SPLASH_PASSWORD),
                                         json={
                                             'url': self.SEARCH_URL,
                                             'lua_source': LUA_SCRIPT,
                                             'first_name': self.FIRST_NAME,
                                             'last_name': self.LAST_NAME,
                                             'dob': self.DOB,
                                             'captcha_response':
                                             captcha_response
                                         })
            print(r.text)
            if 'html' not in json.loads(r.text):
                return {'error': "Internal Server Error"}
            cases = self.parse_search_results(
                BeautifulSoup(json.loads(r.text)['html'],
                              features="html.parser"))
            for case in cases:
                case['case_detail'] = self.get_case_detail(case['case_number'])

        except requests.ConnectionError as e:
            print("Connection failure : " + str(e))
            print("Verification with InsightFinder credentials Failed")
            return {'error': str(e)}
        return {'result': cases}
    def get_case_detail(self, case_number):
        """ Get every information of case detail with given case number

        This function returns an object.
        """
        try:
            captcha_response = get_recaptcha_answer(self.SITE_KEY,
                                                    self.SEARCH_URL)
            LUA_SCRIPT = '''
                    function main(splash)
                        assert(splash:autoload("https://code.jquery.com/jquery-2.1.3.min.js"))
                        treat = require("treat")

                        assert(splash:go(splash.args.url))
                        assert(splash:wait(2))
                        assert(splash:runjs('$("input[name=FirstName]").val("'..splash.args.first_name..'")'))
                        assert(splash:runjs('$("input[name=LastName]").val("'..splash.args.last_name..'")'))
                        assert(splash:runjs('$("textarea[name=g-recaptcha-response]").val("'..splash.args.captcha_response..'")'))
                        assert(splash:wait(1))

                        local form = splash:select('.form-horizontal')
                        local values = assert(form:form_values())
                        assert(form:submit())
                        assert(splash:wait(5))
                        local search_input = splash:select('input[type=search]')
                        search_input:send_text(splash.args.dob)
                        assert(splash:wait(3))

                        local rows = splash:select_all('table#caseList tbody tr')
                        local case_number = ''
                        for j, row in ipairs(rows) do
                            local case_number_element = splash:select('table#caseList tbody tr:nth-child('..j..')  a.caseLink')
                            if case_number_element then
                                if case_number_element:text() == splash.args.case_number then
                                    case_number = case_number_element:text()
                                    local bounds = case_number_element:bounds()
                                    assert(case_number_element:mouse_click{x=bounds.width/3, y=bounds.height/3})
                                    assert(splash:wait(5))
                                end
                            end
                        end
                        return {
                            url = splash:url(),
                            html = splash:html(),
                            case_number = case_number
                        }
                    end
                '''

            r = self.GLOBAL_SESSION.post(SPLASH_URL,
                                         auth=(SPLASH_USERNAME,
                                               SPLASH_PASSWORD),
                                         json={
                                             'url': self.SEARCH_URL,
                                             'lua_source': LUA_SCRIPT,
                                             'first_name': self.FIRST_NAME,
                                             'last_name': self.LAST_NAME,
                                             'dob': self.DOB,
                                             'captcha_response':
                                             captcha_response,
                                             'case_number': case_number
                                         })
            print(case_number)

            if 'html' in json.loads(r.text):
                print(json.loads(r.text)['html'])
                print(json.loads(r.text)['case_number'])
                return self.parse_case_detail(
                    BeautifulSoup(json.loads(r.text)['html'],
                                  features="html.parser"))
            else:
                print(r.text)
                return {}
        except requests.ConnectionError as e:
            print("Connection failure : " + str(e))
            print("Verification with InsightFinder credentials Failed")
            return {}