예제 #1
0
def _parse_pitcher_details(page_content, game, pitcher_id):
    query = Template(T_PITCHER_NAME_XPATH).substitute(id=pitcher_id)
    parsed = page_content.xpath(query)
    if not parsed:
        error = "Failed to parse pitcher name from game log page."
        return Result.Fail(error)

    selected_pitcher = parsed[0]
    indices = [
        n for n in range(len(selected_pitcher))
        if selected_pitcher.find("-", n) == n
    ]
    if not indices or len(indices) < 2:
        error = "Failed to parse pitcher name from game log page."
        return Result.Fail(error)

    indices.reverse()
    name = selected_pitcher[:indices[1]].strip()
    result = _parse_team_ids(game, selected_pitcher)
    if result.failure:
        return result
    id_dict = result.value
    pitcher_dict = {
        "name": name,
        "team_id": id_dict["team_id"],
        "opponent_id": id_dict["opponent_id"],
    }
    return Result.Ok(pitcher_dict)
예제 #2
0
 def validate_new_url_delay_setting(self, new_value):
     is_enabled, is_random, delay_uniform, delay_min, delay_max = new_value
     if not is_enabled:
         return Result.Fail("URL delay cannot be disabled!")
     if not is_random and delay_uniform < 3 or is_random and delay_min < 3:
         return Result.Fail("URL delay min value must be greater than 2 seconds!")
     return Result.Ok()
예제 #3
0
 def execute(self, year):
     self.subscribe_to_events()
     all_patch_results = {}
     audit_report = self.scraped_data.get_audit_report()
     self.audit_report_before = deepcopy(audit_report)
     if year not in self.audit_report_before:
         return Result.Fail(f"No games for MLB {year} season have been scraped.")
     game_ids = self.audit_report_before[year].get("invalid_pfx", [])
     if not game_ids:
         return Result.Fail(f"No games for MLB {year} season have invalid pitchfx data.")
     self.events.patch_all_invalid_pitchfx_started()
     self.initialize_spinner(game_ids)
     for num, game_id in enumerate(game_ids, start=1):
         self.spinner.text = self.get_spinner_text(game_id, num, len(game_ids))
         result = self.patch_invalid_pfx.execute(game_id, no_prompts=True)
         if result.failure:
             self.spinner.stop()
             return result
         patch_results = result.value
         all_patch_results[game_id] = patch_results
         self.spinner.text = self.get_spinner_text(game_id, num + 1, len(game_ids))
     self.spinner.stop()
     audit_report = self.scraped_data.get_audit_report()
     self.audit_report_after = deepcopy(audit_report)
     (successful_change, invalid_pfx_change) = self.calculate_games_changed(year)
     self.events.patch_all_invalid_pitchfx_complete()
     self.unsubscribe_from_events()
     return Result.Ok(
         {
             "all_patch_results": all_patch_results,
             "successful_change": successful_change,
             "invalid_pfx_change": invalid_pfx_change,
         }
     )
예제 #4
0
 def validate_date_range(cls, db_session, start, end):
     if start.year != end.year:
         error = [
             "Start and end dates must both be in the same year and within "
             "the scope of that year's MLB Regular Season."
         ]
         return Result.Fail(error)
     if start > end:
         start_str = start.strftime(DATE_ONLY)
         end_str = end.strftime(DATE_ONLY)
         error = [
             '"start" must be a date before (or the same date as) "end":',
             f"start..: {start_str}",
             f"end....: {end_str}",
         ]
         return Result.Fail(error)
     season = cls.find_by_year(db_session, start.year)
     start_date_valid = cls.is_date_in_season(db_session, start).success
     end_date_valid = cls.is_date_in_season(db_session, end).success
     if not start_date_valid or not end_date_valid:
         error = [
             f"Start and end date must both be within the {season.name}:",
             f"{season.name} Start Date..: {season.start_date_str}",
             f"{season.name} End Date....: {season.end_date_str}",
         ]
         return Result.Fail(error)
     return Result.Ok(season)
예제 #5
0
 def check_for_exact_match(self, pfx):
     # given the invalid pfx data passed as an argument, the process to find an exact match is:
     exact_match = [
         self.get_event_dict(event)
         # iterate through all game events, for each game event:
         for event in self.game_events
         # if game event is missing pitchfx data
         if event["at_bat_pitchfx_audit"]["missing_pitchfx_count"] > 0 and (
             # AND game_event and invalid pfx took place in the same inning
             event["inning_id"][-5:] == pfx["inning_id"] and (
                 # AND game event and invalid pfx have the same batter OR the same pitcher
                 event["batter_id_mlb"] == pfx["batter_id"]
                 or event["pitcher_id_mlb"] == pfx["pitcher_id"])
             # AND number of pitches missing is the same as the number of invalid pfx
             and event["at_bat_pitchfx_audit"]["missing_pitchfx_count"] ==
             pfx["pitch_count"]
             # AND invalid pfx pitch seq. numbers are the same as the missing pitches
             and all(p_num in pfx["invalid_pfx"]
                     for p_num in event["at_bat_pitchfx_audit"]
                     ["missing_pitch_numbers"]))
     ]
     if not exact_match:
         # zero game events matched all the criteria -> NO EXACT MATCH
         return Result.Fail("")
     if len(exact_match) != 1:
         # more than one game event matched all the criteria -> NO EXACT MATCH
         return Result.Fail("")
     # one game event matched all the criteria -> EXACT MATCH
     return Result.Ok(exact_match[0])
예제 #6
0
 def apply(self, data):
     inning_matches = [
         inning for inning in data.innings_list
         if inning.inning_id == self.inning_id
     ]
     if not inning_matches:
         error = f"Unable to locate the inning identified in this patch: {self.inning_id}"
         return Result.Fail(error)
     if len(inning_matches) > 1:
         error = "More than one inning was found that matches the inning identified in this patch: {self.inning_id}"
         return Result.Fail(error)
     inning = inning_matches[0]
     event_matches = [
         event for event in inning.game_events
         if event.pbp_table_row_number == self.pbp_table_row_number
     ]
     if not event_matches:
         error = (
             "Unable to locate the game event identified by pbp_table_row_number in this "
             f"patch: {self.pbp_table_row_number}")
         return Result.Fail(error)
     if len(event_matches) > 1:
         error = (
             "More than one game event was found that matches the pbp_table_row_number "
             f"identified in this patch: {self.pbp_table_row_number}")
         return Result.Fail(error)
     event_matches[0].pitch_sequence = self.new_pitch_sequence
     return Result.Ok(data)
예제 #7
0
 def decode_json_response(self, response):
     query_results = ""
     try:
         resp_json = response.json()
         query_results = resp_json["search_player_all"]["queryResults"]
         num_results = int(query_results["totalSize"])
         if not num_results:
             result = self.try_alternate_url().on_success(
                 request_url_with_retries)
             if result.failure:
                 return result
             response = result.value
             resp_json = response.json()
             query_results = resp_json["search_player_all"]["queryResults"]
             num_results = int(query_results["totalSize"])
         if not num_results:
             return Result.Fail(
                 f"Failed to retrieve any results for player name: {self.name} (Tried 2 URLs)"
             )
         return Result.Ok((query_results, num_results))
     except (JSONDecodeError, KeyError) as e:
         error = f"Failed to decode HTTP response as JSON: {repr(e)}\n{e.response.text}"
         return Result.Fail(error)
     except ValueError:  # pragma: no cover
         error = f"Failed to parse number of results from search response: {query_results}"
         return Result.Fail(error)
예제 #8
0
def execute_nodejs_script(script_file_path, script_args):  # pragma: no cover
    result = validate_file_path(script_file_path)
    if result.failure:
        return result
    valid_filepath = result.value
    if program_is_installed("node"):
        success = execute_js(str(valid_filepath), arguments=script_args)
    elif program_is_installed("nodejs"):
        success = execute_shell_command(f"nodejs {valid_filepath} {script_args}")
    else:
        return Result.Fail("Node.js is NOT installed!")
    return Result.Ok() if success else Result.Fail("nodejs script failed")
예제 #9
0
    def parse_player_data_v2(self, player_data, bbref_id, debut_limit=None):
        try:
            debut = datetime.strptime(player_data.get("mlbDebutDate", ""),
                                      DATE_ONLY).date()
        except ValueError:  # pragma: no cover
            debut = date.min

        if debut_limit and debut.year < debut_limit:
            return Result.Fail("Player debuted before the debut limit")

        try:
            birth_date = datetime.strptime(player_data.get("birthDate", ""),
                                           DATE_ONLY).date()
        except ValueError:  # pragma: no cover
            birth_date = date.min

        match = HEIGHT_REGEX.search(player_data.get("height", r"0' 0\""))
        if not match:
            return Result.Fail("Response JSON was not in the expected format")
        groups = match.groupdict()
        height_total_inches = int(groups["feet"]) * 12 + int(groups["inches"])

        name_given = (
            f'{player_data.get("firstName", "")} {player_data["middleName"]}'
            if "middleName" in player_data else player_data.get(
                "firstName", ""))
        first_name = player_data.get(
            "useName", "") if "useName" in player_data else player_data.get(
                "firstName", "")
        bats = player_data.get("batSide", {})
        throws = player_data.get("pitchHand", {})

        player_dict = {
            "name_first": first_name,
            "name_last": player_data.get("lastName", ""),
            "name_given": name_given,
            "bats": bats.get("code", ""),
            "throws": throws.get("code", ""),
            "weight": player_data.get("weight"),
            "height": height_total_inches,
            "debut": debut,
            "birth_year": birth_date.year,
            "birth_month": birth_date.month,
            "birth_day": birth_date.day,
            "birth_country": player_data.get("birthCountry", ""),
            "birth_state": player_data.get("birthStateProvince", ""),
            "birth_city": player_data.get("birthCity", ""),
            "bbref_id": bbref_id,
            "mlb_id": player_data["id"],
            "add_to_db_backup": True,
        }
        return Result.Ok(player_dict)
예제 #10
0
 def add_data_for_year(self, year):
     report_for_season = self.app.audit_report.get(year)
     if not report_for_season:
         return Result.Fail(
             f"Audit report could not be generated for MLB Season {year}")
     game_ids = report_for_season.get("successful")
     if not game_ids:
         error = f"No games for MLB Season {year} qualify to have PitchFx data imported."
         return Result.Fail(error)
     self.events.add_data_to_db_start(year, game_ids)
     self.add_data_for_games(year, game_ids)
     self.events.add_data_to_db_complete(year)
     return Result.Ok()
예제 #11
0
 def is_date_in_season(cls, db_session, check_date, season_type=SeasonType.REGULAR_SEASON):
     season = cls.find_by_year(db_session, check_date.year)
     if not season:
         error = (
             f"Database does not contain info for the MLB {check_date.year} "
             f'{season_type.replace("_", " ").title()}'
         )
         return Result.Fail(error)
     date_str = check_date.strftime(DATE_ONLY)
     if check_date < season.start_date or check_date > season.end_date:
         error = f"{date_str} is not within the scope of the {season.name}"
         return Result.Fail(error)
     return Result.Ok(season)
예제 #12
0
def _validate_single_date(db_session, game_date):
    season = db.Season.find_by_year(db_session, game_date.year)
    date_is_valid = db.Season.is_date_in_season(db_session, game_date).success
    date_str = game_date.strftime(DATE_ONLY)
    if not date_is_valid:
        error = (f"'{date_str}' is not within the {season.name}:\n"
                 f"season_start_date: {season.start_date_str}\n"
                 f"season_end_date: {season.end_date_str}")
        return Result.Fail(error)
    date_status = db.DateScrapeStatus.find_by_date(db_session, game_date)
    if not date_status:
        error = f"scrape_status_date does not contain an entry for date: {date_str}"
        return Result.Fail(error)
    return Result.Ok(date_status)
예제 #13
0
 def get_sync_parameters(self):
     result = season_prompt(self.db_session,
                            "Select a season to synchronize scraped data:")
     if result.failure:
         return Result.Fail("")
     self.year = result.value.year
     self.file_types = file_types_prompt(
         "Select one or multiple file types to synchronize:")
     for file_type in self.file_types:
         self.sync_tasks[file_type] = self.get_data_sets_to_sync(file_type)
     result = self.sync_direction_prompt()
     if result.failure:
         return Result.Fail("")
     self.sync_direction = result.value
     return Result.Ok()
예제 #14
0
def validate_file_path(input_path: Union[Path, str]):
    if not input_path:
        return Result.Fail("NoneType or empty string is not a valid file path.")
    if isinstance(input_path, str):
        filepath = Path(input_path)
    elif not isinstance(input_path, Path):
        error = f'"input_path" parameter must be str or Path value (not "{type(input_path)}").'
        return Result.Fail(error)
    else:
        filepath = input_path
    if not filepath.exists():
        return Result.Fail(f'File does not exist: "{filepath}"')
    if not filepath.is_file():
        return Result.Fail(f'The provided path is NOT a file: "{filepath}"')
    return Result.Ok(filepath)
예제 #15
0
 def initialize(self):
     if self.scrape_condition == ScrapeCondition.NEVER:
         return Result.Fail("skip")
     signal(
         SIGINT,
         partial(user_cancelled, self.db_session, self.db_job,
                 self.spinner))
     self.spinner.text = "Building URL List..."
     self.spinner.start()
     self.url_tracker = UrlTracker(self.db_job, self.data_set,
                                   self.scraped_data)
     result = self.url_tracker.create_url_set(self.start_date,
                                              self.end_date)
     return result if result.failure else Result.Ok(
     ) if self.url_tracker.total_urls else Result.Fail("skip")
예제 #16
0
파일: util.py 프로젝트: a-luna/vig-api
def download_file(url: str, local_folder: Path):
    file_name = get_file_name_from_url(url)
    local_file_path = local_folder.joinpath(file_name)
    r = requests.head(url)
    remote_file_size = int(r.headers.get("content-length", 0))
    if not remote_file_size:
        return Result.Fail(
            f'Request for "{file_name}" did not return a response containing the file size.'
        )
    local_file_size = 0
    resume_header = None
    fopen_mode = "wb"
    if not local_file_path.exists():
        print(f'"{file_name}" does not exist. Downloading...')
    else:
        local_file_size = local_file_path.stat().st_size
        if local_file_size == remote_file_size:
            print(f'"{file_name}" is complete. Skipping...')
            return Result.Ok(local_file_path)
        print(f'"{file_name}" is incomplete. Resuming...')
        resume_header = {"Range": f"bytes={local_file_size}-"}
        fopen_mode = "ab"

    r = requests.get(url, stream=True, headers=resume_header)
    with open(local_file_path, fopen_mode) as f:
        with tqdm(
                total=remote_file_size,
                unit="B",
                unit_scale=True,
                unit_divisor=1024,
                desc=local_file_path.name,
                initial=local_file_size,
                ascii=True,
                miniters=1,
        ) as pbar:
            for chunk in r.iter_content(32 * CHUNK_SIZE):
                f.write(chunk)
                pbar.update(len(chunk))

    local_file_size = local_file_path.stat().st_size
    if local_file_size == remote_file_size:
        return Result.Ok(local_file_path)
    more_or_fewer = "more" if local_file_size > remote_file_size else "fewer"
    error = (
        f'Recieved {more_or_fewer} bytes than expected for "{file_name}"!\n'
        f"Expected File Size: {remote_file_size:,} bytes\n"
        f"Received File Size: {local_file_size:,} bytes")
    return Result.Fail(error)
예제 #17
0
def validate_brooks_game_id(input_str):
    match = BB_GAME_ID_REGEX.search(input_str)
    if not match:
        raise ValueError(f"String is not a valid bb game id: {input_str}")
    captured = match.groupdict()
    year = int(captured["year"])
    month = int(captured["month"])
    day = int(captured["day"])
    game_number = int(captured["game_num"])

    try:
        game_date = datetime(year, month, day)
    except Exception as e:
        error = f"Failed to parse game_date from game_id:\n{repr(e)}"
        return Result.Fail(error)

    away_team_id = captured["home_team"].upper()
    home_team_id = captured["away_team"].upper()

    game_dict = {
        "game_id": input_str,
        "game_date": game_date,
        "away_team_id": away_team_id,
        "home_team_id": home_team_id,
        "game_number": game_number,
    }
    return Result.Ok(game_dict)
예제 #18
0
def create_urls_for_brooks_pitch_logs_for_date(db_job, scraped_data,
                                               game_date):
    data_set = DataSet.BROOKS_PITCH_LOGS
    req_data_set = DataSet.BROOKS_GAMES_FOR_DATE
    games_for_date = scraped_data.get_brooks_games_for_date(game_date)
    if not games_for_date:
        return Result.Fail(
            get_unscraped_data_error(data_set, req_data_set, game_date))
    urls = []
    for game in games_for_date.games:
        if game.might_be_postponed:
            continue
        for pitcher_id, pitch_log_url in game.pitcher_appearance_dict.items():
            pitch_app_id = f"{game.bbref_game_id}_{pitcher_id}"
            url_data = {
                "url":
                pitch_log_url,
                "url_id":
                pitch_app_id,
                "fileName":
                get_filename(scraped_data, data_set, pitch_app_id),
                "cachedHtmlFolderPath":
                get_cached_html_folderpath(scraped_data, data_set, game_date),
                "scrapedHtmlFolderpath":
                get_scraped_html_folderpath(db_job, data_set),
            }
            urls.append(from_dict(data_class=UrlDetails, data=url_data))
    return Result.Ok(urls)
예제 #19
0
def status_date_range(app, start, end, verbosity):
    """Report status for each date in a specified range.

    Dates can be provided in any format that is recognized by dateutil.parser.
    For example, all of the following strings are valid ways to represent the same date:
    "2018-5-13" -or- "05/13/2018" -or- "May 13 2018"
    """
    report_type = StatusReport.NONE
    if verbosity <= 0:
        error = f"Invalid value for verbosity: {verbosity}. Value must be greater than zero."
        return exit_app(app, Result.Fail(error))
    elif verbosity == 1:
        report_type = StatusReport.DATE_SUMMARY_MISSING_DATA
    elif verbosity == 2:
        report_type = StatusReport.DATE_SUMMARY_ALL_DATES
    elif verbosity == 3:
        report_type = StatusReport.DATE_DETAIL_MISSING_DATA
    elif verbosity == 4:
        report_type = StatusReport.DATE_DETAIL_ALL_DATES
    else:
        report_type = StatusReport.DATE_DETAIL_MISSING_PITCHFX
    result = report_date_range_status(app.db_session, start, end, report_type)
    if result.success:
        report_viewer = result.value
        report_viewer.launch()
    return exit_app(app, result)
예제 #20
0
def ui(app):  # pragma: no cover
    """Menu-driven UI powered by Bullet."""
    try:
        result = MainMenu(app).launch()
        return exit_app(app, result)
    except Exception as e:
        return exit_app(app, Result.Fail(f"Error: {repr(e)}"))
예제 #21
0
def import_id_map_csv(app, csv_folder):
    try:
        id_map_task = tasks.UpdatePlayerIdMapTask(app, csv_folder.joinpath(PLAYER_ID_MAP_CSV))
        player_id_map = id_map_task.read_bbref_player_id_map_from_file()
        with tqdm(
            total=len(player_id_map),
            desc="Populating player_id table.....",
            unit="row",
            mininterval=0.12,
            maxinterval=5,
            unit_scale=True,
            ncols=90,
        ) as pbar:
            for id_map in player_id_map:
                app.db_session.add(
                    db.PlayerId(
                        mlb_id=int(id_map.mlb_ID),
                        mlb_name=id_map.name_common,
                        bbref_id=id_map.player_ID,
                        bbref_name=None,
                    )
                )
                pbar.update()
        return Result.Ok()
    except Exception as e:
        error = f"Error: {repr(e)}"
        app.db_session.rollback()
        return Result.Fail(error)
예제 #22
0
파일: util.py 프로젝트: a-luna/vigorish
def get_pitch_app_status_record(db_session, pitch_app_id):
    pitch_app_status = db.PitchAppScrapeStatus.find_by_pitch_app_id(
        db_session, pitch_app_id)
    if pitch_app_status:
        return Result.Ok(pitch_app_status)
    error = f"scrape_status_pitch_app does not contain an entry for pitch_app_id: {pitch_app_id}"
    return Result.Fail(error)
예제 #23
0
 def check_current_status(self, game_date):
     if self.scrape_condition == ScrapeCondition.ALWAYS:
         return Result.Ok()
     scraped_bbref_boxscores = db.DateScrapeStatus.verify_all_bbref_boxscores_scraped_for_date(
         self.db_session, game_date)
     return Result.Ok() if not scraped_bbref_boxscores else Result.Fail(
         "skip")
예제 #24
0
 def execute(self, trim_data_sets=True):
     self.events.find_eligible_games_start()
     game_ids = db.Season_Game_PitchApp_View.get_all_bbref_game_ids_combined_no_missing_pfx(
         self.db_engine)
     if not game_ids:
         return Result.Fail(
             "No games meet the requirements for this process.")
     self.events.find_eligible_games_complete(game_ids)
     self.events.calculate_pitch_metrics_start()
     pitch_samples = []
     at_bat_samples = []
     inning_samples = []
     for num, game_id in enumerate(game_ids, start=1):
         combined_data = self.scraped_data.get_combined_game_data(game_id)
         if not combined_data:
             continue
         result = self.calc_pitch_metrics(combined_data)
         pitch_samples.extend(result[0])
         at_bat_samples.extend(result[2])
         inning_samples.extend(result[4])
         self.events.calculate_pitch_metrics_progress(num)
     self.events.calculate_pitch_metrics_complete()
     metrics = {
         "time_between_pitches":
         self.process_data_set(pitch_samples, trim=trim_data_sets),
         "time_between_at_bats":
         self.process_data_set(at_bat_samples, trim=trim_data_sets),
         "time_between_innings":
         self.process_data_set(inning_samples, trim=trim_data_sets),
     }
     return Result.Ok(metrics)
예제 #25
0
 def check_current_status(self, game_date):
     if self.scrape_condition == ScrapeCondition.ALWAYS:
         return Result.Ok()
     brooks_games_for_date = db.DateScrapeStatus.verify_brooks_daily_dashboard_scraped_for_date(
         self.db_session, game_date)
     return Result.Ok() if not brooks_games_for_date else Result.Fail(
         "skip")
예제 #26
0
 def delete_from_s3(self, s3_key):  # pragma: no cover
     try:
         self.s3_resource.Object(self.bucket_name, s3_key).delete()
         return Result.Ok()
     except botocore.exceptions.ClientError as ex:
         error_code = ex.response["Error"]["Code"]
         return Result.Fail(f"{repr(ex)} (Error Code: {error_code})")
예제 #27
0
파일: util.py 프로젝트: a-luna/vig-api
def validate_file(local_file_path: Path, hash_file_path: Path) -> Result:
    if not local_file_path.exists():
        return Result.Fail(f"Unable to locate file: {local_file_path}")
    md5 = hashlib.md5()
    with open(local_file_path, "rb") as f:
        while chunk := f.read(CHUNK_SIZE):
            md5.update(chunk)
예제 #28
0
def validate_folder_path(input_path: Union[Path, str]):
    if not input_path:
        return Result.Fail("NoneType or empty string is not a valid folder path.")
    if isinstance(input_path, str):
        folderpath = Path(input_path)
    elif not isinstance(input_path, Path):
        error = f'"input_path" parameter must be str or Path value (not "{type(input_path)}").'
        return Result.Fail(error)
    else:
        folderpath = input_path
    if not folderpath.exists():
        return Result.Fail(f'Directory does NOT exist: "{folderpath}"')
    if not folderpath.is_dir():
        return Result.Fail(f'The provided path is NOT a directory: "{folderpath}"')
    if is_windows() and folderpath.is_reserved():
        return Result.Fail(f'The provided path is reserved under Windows: "{folderpath}"')
    return Result.Ok(folderpath)
예제 #29
0
 def change_value(self, env_var_name, new_value):
     if env_var_name not in ENV_VAR_NAMES:
         return Result.Fail(
             f"{env_var_name} is not a recognized environment variable.")
     self.env_var_dict[env_var_name] = new_value
     self.write_dotenv_file()
     self.read_dotenv_file()
     return Result.Ok()
예제 #30
0
 def write_config_file(self):
     try:
         config_json = json.dumps(self.config_json, indent=2, sort_keys=False)
         self.config_filepath.write_text(config_json)
         return Result.Ok()
     except Exception as e:
         error = f"Error: {repr(e)}"
         return Result.Fail(error)