def scrap_cmp(doctor): max_retries = 3 logger.info('\tStarting') logger.info('\tQuerying {}'.format(doctor.id)) proxy_server = get_proxy(service='cmp') if not proxy_server: logger.info('\tError: Out of proxies!') logger.info('\tFinished') return processed = False retries = 0 while not processed: if retries == max_retries: logger.info('\tGiving up!') # Update doctor status record doctor.status = 3 # Error doctor.save() break try: logger.info('\tUsing proxy {}'.format(proxy_server)) driver = get_driver(proxy_server) record = scrap_and_recognize(driver, doctor) if record['cmp'] != doctor.id: record['status'] = 2 # Invalid # Update doctor fields and create specialties if applicable doctor.name = record['name'] doctor.surname = record['surname'] doctor.state = record['state'] doctor.email = record['email'] doctor.region = record['region'] doctor.notes = record['notes'] doctor.image_path = record['image_path'] doctor.status = record['status'] doctor.save() if record['specialties']: for specialty in record['specialties']: DoctorSpecialty.create( doctor=doctor, name=specialty['name'], type=specialty['type'], code=specialty['code'], end_date=specialty['end_date'], ) processed = True logger.info('\tProcessed') except Exception as e: logger.info('\tError: {}'.format(e)) processed = True finally: try: driver.quit() except Exception: pass logger.info('\tFinished')
def scrap_movistar_lines(rrll): max_retries = 3 logger.info('\tStarting') logger.info('\tQuerying {}'.format(rrll.ruc)) proxy_server = get_proxy(service='movistar_line') if not proxy_server: logger.info('\tError: Out of proxies!') logger.info('\tFinished') return processed = False retries = 0 while not processed: if retries == max_retries: logger.info('\tGiving up!') # Update rrll status record rrll.status = 3 # Error rrll.save() break try: logger.info('\tUsing proxy {}'.format(proxy_server)) driver = get_driver(proxy_server) v_json = scrap_and_recognize(driver, rrll) if v_json['ruc'] != rrll.ruc and v_json['dni'] != rrll.dni: v_json['status'] = 2 # Invalid # Update rrll fields and create rrll records rrll.status = v_json['status'] rrll.save() if v_json['records']: for record in v_json['records']: TelephoneLine.create( rrll=rrll, modality=record['modality'], telephone=record['telephone'], ) processed = True logger.info('\tProcessed') except Exception as e: logger.info('\tError: {}'.format(e)) rrll.status = 3 # Error rrll.save() processed = True finally: try: driver.quit() except Exception: pass logger.info('\tFinished')
def scrap_document_number(graduate): max_retries = 3 logger.info('\tStarting') logger.info('\tQuerying {}'.format(graduate.id)) proxy_server = get_proxy(service='sunedu') if not proxy_server: logger.info('\tError: Out of proxies!') logger.info('\tFinished') return processed = False retries = 0 while not processed: if retries == max_retries: logger.info('\tGiving up!') # Update graduate status record graduate.status = 3 # Error graduate.save() break try: logger.info('\tUsing proxy {}'.format(proxy_server)) driver = get_driver(proxy_server) record = scrap_and_recognize(driver, graduate) if record['id'] != graduate.id: record['status'] = 2 # Invalid # Update graduate fields and create graduate records graduate.status = record['status'] graduate.save() if record['records']: for record in record['records']: GraduateRecord.create( graduate=graduate, name=record['name'], grade=record['grade'], institution=record['institution'], ) processed = True logger.info('\tProcessed') except Exception as e: logger.info('\tError: {}'.format(e)) processed = True finally: try: driver.quit() except Exception: pass logger.info('\tFinished')
def scrap_plate_number(vehicle): max_retries = 3 logger.info('\tStarting') logger.info('\tQuerying {}'.format(vehicle.id)) proxy_server = get_proxy(service='sunarp') if not proxy_server: logger.info('\tError: Out of proxies!') logger.info('\tFinished') return processed = False retries = 0 while not processed: if retries == max_retries: logger.info('\tGiving up!') # Update vechicle status record vehicle.status = 3 # Error vehicle.save() break try: logger.info('\tUsing proxy {}'.format(proxy_server)) driver = get_driver(proxy_server) record = scrap_and_recognize(driver, vehicle) if record['plate_number'] != vehicle.id: record['status'] = 2 # Invalid # Update vehicle fields vehicle.plate_number = record['plate_number'] vehicle.serial_number = record['serial_number'] vehicle.vin_number = record['vin_number'] vehicle.engine_number = record['engine_number'] vehicle.color = record['color'] vehicle.make = record['make'] vehicle.model = record['model'] vehicle.valid_plate_number = record['valid_plate_number'] vehicle.previous_plate_number = record['previous_plate_number'] vehicle.state = record['state'] vehicle.notes = record['notes'] vehicle.branch = record['branch'] vehicle.owners = record['owners'] vehicle.image_path = record['image_path'] vehicle.status = record['status'] vehicle.save() processed = True logger.info('\tProcessed') except NoSuchElementException: logger.info('\tError: Element not found') try: label = driver.find_element_by_xpath( '//span[contains(@id, "MainContent_lblWarning")]') if 'número máximo' in label.text: logger.info('\tError: Max queries reached for {}'.format( proxy_server)) # Save this invalid proxy in table and ask for another one Proxy.create(service='sunarp', ip=proxy_server) proxy_server = get_proxy(service='sunarp') if not proxy_server: logger.info('\tError: Out of proxies!') processed = True else: retries += 1 logger.info('\tRetrying...') except NoSuchElementException: logger.info('\tError: Scraping problem') vehicle.status = 3 # Error vehicle.save() processed = True else: retries += 1 logger.info('\tRetrying...') except JavascriptException: logger.info('\tError: Javascript') logger.info('\tRetrying...') retries += 1 except AttributeError: # Probably something wrong with the license image logger.info('\tError: Invalid image') logger.info('\tRetrying...') retries += 1 finally: try: driver.quit() except Exception: pass logger.info('\tFinished')