Пример #1
0
    def search (self, row_h):
        """def: search"""
        for search_term in self.search_term_gen (row_h):
            self._do_search (search_term)
            if not re.search("gav ingen träff", cp1252(self.mech.response().read().decode('utf-8'))):
                break

        if re.search("gav ingen träff", cp1252(self.mech.response().read().decode('utf-8'))):
            result_h = {}
            result_h ['SEARCH_NAME'] = row_h ['PARENT_WORKPLACE_NAME']
            result_h ['URL'] = "NOT FOUND"
            result_h ['DATE'] = datetime.datetime.today().strftime('%Y%m%d')
            csv_hitta_eniro.write_row_h(result_h)

        for result_h in self._scrape_result():
            result_h ['SEARCH_NAME'] = row_h ['PARENT_WORKPLACE_NAME']
            result_h ['MATCHING_CRITERIA'] = search_term
            result_h ['DATE'] = datetime.datetime.today().strftime('%Y%m%d')
            csv_hitta_eniro.write_row_h(result_h)
Пример #2
0
    def search (self, vrow_h):
        """def: search"""
        search_term = vrow_h['PARENT_WORKPLACE_NAME'] + ',' + vrow_h['PARENT_WORKPLACE_ADDRESS']
        self._do_search (search_term)

        #-------------------------------------------------------------------------------
        # Special Case
        if vrow_h['PARENT_WORKPLACE_NAME'] == 'Familjeläkarna i Sverige AB':
            vrow_h['PARENT_WORKPLACE_NAME'] = 'Familjeläkarna'
        #-------------------------------------------------------------------------------

        if re.search('inget resultat', self.mech.response().read()):
            logger.dump ("NO RESULT FOUND!!")
            search_term = vrow_h['PARENT_WORKPLACE_NAME']
            self._do_search (search_term)
            if re.search('inget resultat', self.mech.response().read()):
                logger.dump ("NO RESULT FOUND!!")
                logger.dump ("Searching with address, zip city!!!!")
                search_term = "%(PARENT_WORKPLACE_ADDRESS)s, %(PARENT_WORKPLACE_ZIP)s %(PARENT_WORKPLACE_CITY)s" % vrow_h
                self._do_search (search_term)
                if re.search('inget resultat', self.mech.response().read()):
                    logger.dump ("NO RESULT FOUND!!")
                    logger.dump ("Searching with address, city!!!!")
                    search_term = "%(PARENT_WORKPLACE_ADDRESS)s, %(PARENT_WORKPLACE_CITY)s" % vrow_h
                    self._do_search (search_term)
                    if re.search('inget resultat', self.mech.response().read()):
                        logger.dump ("NO RESULT FOUND!! Skipping...")
                        row_h = {}
                        row_h['URL'] = 'NOT FOUND'
                        row_h['SEARCH_NAME'] = vrow_h['PARENT_WORKPLACE_NAME']
                        csv_hitta_eniro.write_row_h(row_h)
                        return

        link_found = 0
        for link in self._company_links():
            link_found = 1

            while True:
                success = 0
                try:
                    logger.dump ("Result Get: " + link)
                    self.mech.open(link)
                    success = 1
                except Exception as error:
                    logger.dump (error)
                if success == 1: break

            row_h = self._scrape_result()
            row_h['URL'] = link
            row_h['SEARCH_NAME'] = vrow_h['PARENT_WORKPLACE_NAME']
            row_h['MATCHING_CRITERIA'] = search_term
            row_h['DATE'] = datetime.datetime.today().strftime('%Y%m%d')
            csv_hitta_eniro.write_row_h(row_h)

        if link_found == 0:
            row_h = self._scrape_result()
            row_h['URL'] = self.url
            row_h['SEARCH_NAME'] = vrow_h['PARENT_WORKPLACE_NAME']
            row_h['MATCHING_CRITERIA'] = search_term
            row_h['DATE'] = datetime.datetime.today().strftime('%Y%m%d')
            csv_hitta_eniro.write_row_h(row_h)