示例#1
0
def runRoutine(routine):
    pprint(routine)
    options = None
    browser = None
    if "options" in routine.keys():
        options = routine["options"]
        if "browser" in options.keys():
            if options["browser"] == True:
                chrome_options = webdriver.ChromeOptions()
                chrome_options.binary_location = os.environ.get(
                    "GOOGLE_CHROME_BIN")
                chrome_options.add_argument("--headless")
                chrome_options.add_argument("--disable-dev-shm-usage")
                chrome_options.add_argument("--no-sandbox")
                browser = webdriver.Chrome(
                    executable_path=os.environ.get("CHROMEDRIVER_PATH"),
                    chrome_options=chrome_options)
                options["browser"] = browser
    try:
        result = fly(routine)
    except Exception as e:
        print('-------------------------------------------')
        print(e)
        print('-------------------------------------------')
    if browser:
        browser.quit()
    return routine
示例#2
0
def test():
    total_cases = len(test_cases)
    failed = 0
    for case in test_cases:
        itinerary = case["itinerary"]
        fly(itinerary)
        if itinerary["results"] == case["correct"]:
            print(" - (#{}) Case passed.".format(case["id"]))
        else:
            print(" - (#{}) Case failed.".format(case["id"]))
            print("---- Expected ----")
            pprint(case["correct"])
            print("---- Received ----")
            pprint(itinerary["results"])
            print("---- Log ---------")
            pprint(itinerary)
            failed += 1
    print("---------------------------------------------------")
    if failed == 0:
        print("Success: All cases passed.")
    else:
        print("Failure: {} out of {} cases failed.".format(failed, total_cases))
    print("---------------------------------------------------")
示例#3
0
#Based on

from ghettobird import fly

bird = {
    "url": "https://www.sherdog.com/fighter/Jon-Jones-27944",
    "flightpath": {
        "birth_date": "//span[@itemprop='birthDate']",
        "age": "//span[@class='item birthday']//strong",
        "weight_class": "//h6[@class='item wclass']//strong//a",
        "nationality": "//strong[@itemprop='nationality']",
        "camp_team": "//span[@itemprop='memberOf']//a//span",
        "locality": "//span[@itemprop='addressLocality']",
        "height_ft": "//span[@class='item height']//strong",
        "weight_lb": "//span[@class='item weight']//strong",
        "wins": "//span[./span[contains(text(),'Wins')]]//span[2]",
        "losses": "//span[./span[contains(text(),'Losses')]]//span[2]",
        "draws": "//span[./span[contains(text(),'Draws')]]//span[2]",
        "last_fight": "//span[@class='sub_line']"
    }
}

scraped = fly(bird)
print(scraped['results'])
def scrape():
    handleSettings()
    login(browser)
    inputSpreadsheet = getSheetData(inputAlias)
    firms = []
    for firmInput in inputSpreadsheet:
        races = {
            "Asian,GreaterEastAsian,EastAsian": 0,
            "Asian,GreaterEastAsian,Japanese": 0,
            "Asian,IndianSubContinent": 0,
            "GreaterAfrican,Africans": 0,
            "GreaterAfrican,Muslim": 0,
            "GreaterEuropean,British": 0,
            "GreaterEuropean,EastEuropean": 0,
            "GreaterEuropean,Jewish": 0,
            "GreaterEuropean,WestEuropean,French": 0,
            "GreaterEuropean,WestEuropean,Germanic": 0,
            "GreaterEuropean,WestEuropean,Hispanic": 0,
            "GreaterEuropean,WestEuropean,Italian": 0,
            "GreaterEuropean,WestEuropean,Nordic": 0
        }
        f = {
            "company": firmInput["company"],
            "li_link": firmInput["li_link"],
            "private": 0,
            "public": 0,
            "names": "",
            "formattedNames": [],
            "li_allstaff": 0,
            "foreign": 0,
            "non-foreign": 0
        }
        link_or_names = f["li_link"]
        if "http://" in link_or_names or "https://" in link_or_names:
            try:
                url = link_or_names + "/people?facetGeoRegion=de%3A0"
                li_roadmap_ROADMAP["url"] = url
                results = fly(li_roadmap_ROADMAP)["results"]
                if results["li_allstaff"] == None or results[
                        "li_allstaff"] == "":
                    results = fly(li_roadmap_ROADMAP)["results"]
                f["li_allstaff"] = results['li_allstaff']
                for name in results["names"]:
                    try:
                        name = name["name"]
                        if name != "LinkedIn Member":
                            f["public"] += 1
                            f["names"] += name + ", "
                            split = name.split(" ")
                            first = split[0]
                            last = split[1]
                            f["formattedNames"].append({
                                'first': first,
                                'last': last
                            })
                        else:
                            f["private"] += 1
                    except:
                        print("~~~~~~~~~~~~~~~~~~~~~~")
                        continue
                pprint(f["formattedNames"])
                analysis = analyzeRace(f["formattedNames"])
                pprint(analysis)
                for race in settings["race_list"]:
                    if race in list(analysis.keys()):
                        races[race] = analysis[race]
            except Exception as e:
                print(url)
                print(e)
        else:
            try:
                names = link_or_names.split(",")
                f["li_allstaff"] = len(names)
                f["public"] = len(names)
                f["names"] += link_or_names
                for name in names:
                    split = name.split(" ")
                    first = split[0]
                    last = split[1]
                    f["formattedNames"].append({'first': first, 'last': last})
                    pprint(f["formattedNames"])
                    analysis = analyzeRace(f["formattedNames"])
                    for race in settings["race_list"]:
                        if race in list(analysis.keys()):
                            races[race] = analysis[race]
            except Exception as e:
                print(link_or_names)
                print(e)
        row = [
            f["company"], f["li_link"], f["li_allstaff"], f["private"],
            f["public"], f["names"], ""
        ]
        for race in settings["race_list"]:
            row.append(races[race])
        firms.append(row)
    writeToSheet("Diversity", diversity_header, firms)
    logExecution()