def runRoutine(routine): pprint(routine) options = None browser = None if "options" in routine.keys(): options = routine["options"] if "browser" in options.keys(): if options["browser"] == True: chrome_options = webdriver.ChromeOptions() chrome_options.binary_location = os.environ.get( "GOOGLE_CHROME_BIN") chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--no-sandbox") browser = webdriver.Chrome( executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options) options["browser"] = browser try: result = fly(routine) except Exception as e: print('-------------------------------------------') print(e) print('-------------------------------------------') if browser: browser.quit() return routine
def test(): total_cases = len(test_cases) failed = 0 for case in test_cases: itinerary = case["itinerary"] fly(itinerary) if itinerary["results"] == case["correct"]: print(" - (#{}) Case passed.".format(case["id"])) else: print(" - (#{}) Case failed.".format(case["id"])) print("---- Expected ----") pprint(case["correct"]) print("---- Received ----") pprint(itinerary["results"]) print("---- Log ---------") pprint(itinerary) failed += 1 print("---------------------------------------------------") if failed == 0: print("Success: All cases passed.") else: print("Failure: {} out of {} cases failed.".format(failed, total_cases)) print("---------------------------------------------------")
#Based on from ghettobird import fly bird = { "url": "https://www.sherdog.com/fighter/Jon-Jones-27944", "flightpath": { "birth_date": "//span[@itemprop='birthDate']", "age": "//span[@class='item birthday']//strong", "weight_class": "//h6[@class='item wclass']//strong//a", "nationality": "//strong[@itemprop='nationality']", "camp_team": "//span[@itemprop='memberOf']//a//span", "locality": "//span[@itemprop='addressLocality']", "height_ft": "//span[@class='item height']//strong", "weight_lb": "//span[@class='item weight']//strong", "wins": "//span[./span[contains(text(),'Wins')]]//span[2]", "losses": "//span[./span[contains(text(),'Losses')]]//span[2]", "draws": "//span[./span[contains(text(),'Draws')]]//span[2]", "last_fight": "//span[@class='sub_line']" } } scraped = fly(bird) print(scraped['results'])
def scrape(): handleSettings() login(browser) inputSpreadsheet = getSheetData(inputAlias) firms = [] for firmInput in inputSpreadsheet: races = { "Asian,GreaterEastAsian,EastAsian": 0, "Asian,GreaterEastAsian,Japanese": 0, "Asian,IndianSubContinent": 0, "GreaterAfrican,Africans": 0, "GreaterAfrican,Muslim": 0, "GreaterEuropean,British": 0, "GreaterEuropean,EastEuropean": 0, "GreaterEuropean,Jewish": 0, "GreaterEuropean,WestEuropean,French": 0, "GreaterEuropean,WestEuropean,Germanic": 0, "GreaterEuropean,WestEuropean,Hispanic": 0, "GreaterEuropean,WestEuropean,Italian": 0, "GreaterEuropean,WestEuropean,Nordic": 0 } f = { "company": firmInput["company"], "li_link": firmInput["li_link"], "private": 0, "public": 0, "names": "", "formattedNames": [], "li_allstaff": 0, "foreign": 0, "non-foreign": 0 } link_or_names = f["li_link"] if "http://" in link_or_names or "https://" in link_or_names: try: url = link_or_names + "/people?facetGeoRegion=de%3A0" li_roadmap_ROADMAP["url"] = url results = fly(li_roadmap_ROADMAP)["results"] if results["li_allstaff"] == None or results[ "li_allstaff"] == "": results = fly(li_roadmap_ROADMAP)["results"] f["li_allstaff"] = results['li_allstaff'] for name in results["names"]: try: name = name["name"] if name != "LinkedIn Member": f["public"] += 1 f["names"] += name + ", " split = name.split(" ") first = split[0] last = split[1] f["formattedNames"].append({ 'first': first, 'last': last }) else: f["private"] += 1 except: print("~~~~~~~~~~~~~~~~~~~~~~") continue pprint(f["formattedNames"]) analysis = analyzeRace(f["formattedNames"]) pprint(analysis) for race in settings["race_list"]: if race in list(analysis.keys()): races[race] = analysis[race] except Exception as e: print(url) print(e) else: try: names = link_or_names.split(",") f["li_allstaff"] = len(names) f["public"] = len(names) f["names"] += link_or_names for name in names: split = name.split(" ") first = split[0] last = split[1] f["formattedNames"].append({'first': first, 'last': last}) pprint(f["formattedNames"]) analysis = analyzeRace(f["formattedNames"]) for race in settings["race_list"]: if race in list(analysis.keys()): races[race] = analysis[race] except Exception as e: print(link_or_names) print(e) row = [ f["company"], f["li_link"], f["li_allstaff"], f["private"], f["public"], f["names"], "" ] for race in settings["race_list"]: row.append(races[race]) firms.append(row) writeToSheet("Diversity", diversity_header, firms) logExecution()