def _parse_and_persist_settings(
    afterdate: dt.date,
    beforedate: dt.date,
    outfile: str = "",
    showbrowser: bool = False,
    db: bool = True,
    gs: bool = True,
    county: str = "travis",
):
    pulled_settings = parse_settings(afterdate,
                                     beforedate,
                                     outfile,
                                     showbrowser,
                                     county=county)
    if db:
        import persist

        for setting in pulled_settings:
            persist.rest_setting(setting)
    if gs:
        import gsheet

        gsheet.write_pulled_settings(pulled_settings)

    if outfile:
        json.dump(pulled_settings, outfile)
    return pulled_settings
Exemplo n.º 2
0
def parse_settings_on_cloud(afterdate: str,
                            beforedate: str,
                            write_to_sheets=True):
    """
    Same as `parse_settings()` (see below) but without command line interface and showbrowser option.
    Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True
    """

    logger.info(f"Parsing settings between {afterdate} and {beforedate}.")

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    #maybe make this cleaner in sql? future work
    if write_to_sheets:
        gsheet.write_data(
            gsheet.open_sheet(gsheet.init_sheets(),
                              "Court_scraper_eviction_scheduler",
                              "eviction_scheduler"),
            gsheet.morning_afternoon(
                gsheet.combine_cols(
                    gsheet.filter_df(
                        gsheet.filter_df(pd.DataFrame(pulled_settings),
                                         'setting_type', 'Eviction'),
                        'hearing_type', '(Hearing)|(Trial)'),
                    ['case_number', 'setting_style'],
                    'case_dets').drop_duplicates("case_number", keep="last")))
def parse_settings_on_cloud(
    afterdate: str,
    beforedate: str,
    write_to_sheets=True,
    scraper: Optional[scrapers.FakeScraper] = None,
):
    """
    Same as `parse_settings()` (see below) but without command line interface and showbrowser option.
    Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True
    """
    if scraper is None:
        scraper = scrapers.TravisScraper()
    logger.info(f"Parsing settings between {afterdate} and {beforedate}.")

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = scraper.make_setting_list(days_to_pull)
    if scraper is None:
        scraper = scrapers.TravisScraper(headless=True)
    if isinstance(scraper, scrapers.TravisScraper):
        import persist

        for setting in pulled_settings:
            persist.rest_setting(setting)
    # maybe make this cleaner in sql? future work
    if write_to_sheets:
        import gsheet

        gsheet.write_pulled_settings(pulled_settings)
Exemplo n.º 4
0
def parse_settings(afterdate, beforedate, outfile, showbrowser=False):
    # If showbrowser is True, use the default selenium driver
    if showbrowser:
        from selenium import webdriver
        fetch_page.driver = webdriver.Firefox()

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    json.dump(pulled_settings, outfile)
Exemplo n.º 5
0
def parse_settings(afterdate, beforedate, outfile, showbrowser=False):
    """Gets data for all settings between `afterdate` and `beforedate` and sends results to PostgreSQL database."""

    # If showbrowser is True, use the default selenium driver
    if showbrowser:
        from selenium import webdriver
        fetch_page.driver = webdriver.Chrome("./chromedriver")

    days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    gsheet.write_data(gsheet.open_sheet(gsheet.init_sheets(),"Court_scraper_eviction_scheduler","eviction_scheduler"),gsheet.morning_afternoon(gsheet.combine_cols(gsheet.filter_df(gsheet.filter_df(pd.DataFrame(pulled_settings),'setting_type','Eviction'),'hearing_type','(Hearing)|(Trial)'),['case_number','setting_style'],'case_dets').drop_duplicates("case_number", keep="last")))
    json.dump(pulled_settings, outfile)
def parse_settings_on_cloud(afterdate, beforedate):
    logger.info(f"Parsing settings between {afterdate} and {beforedate}.")

    days_to_pull = get_days_between_dates(afterdate=afterdate,
                                          beforedate=beforedate)
    pulled_settings = make_setting_list(days_to_pull)
    for setting in pulled_settings:
        persist.rest_setting(setting)

    gsheet.write_data(
        gsheet.open_sheet(gsheet.init_sheets(),
                          "Court_scraper_eviction_scheduler",
                          "eviction_scheduler"),
        gsheet.combine_cols(
            gsheet.filter_df(
                gsheet.filter_df(pd.DataFrame(pulled_settings), 'setting_type',
                                 'Eviction'), 'hearing_type',
                '(Hearing)|(Trial)'), ['case_number', 'setting_style'],
            'case_dets'))
def parse_and_persist_settings(afterdate: str,
                               beforedate: str,
                               outfile: str,
                               showbrowser=False):
    pulled_settings = parse_settings(afterdate, beforedate, outfile,
                                     showbrowser)
    for setting in pulled_settings:
        persist.rest_setting(setting)
    gsheet.write_data(
        gsheet.open_sheet(gsheet.init_sheets(),
                          "Court_scraper_eviction_scheduler",
                          "eviction_scheduler"),
        gsheet.morning_afternoon(
            gsheet.combine_cols(
                gsheet.filter_df(
                    gsheet.filter_df(pd.DataFrame(pulled_settings),
                                     'setting_type', 'Eviction'),
                    'hearing_type', '(Hearing)|(Trial)'),
                ['case_number', 'setting_style'],
                'case_dets').drop_duplicates("case_number", keep="last")))
    json.dump(pulled_settings, outfile)