Пример #1
0
 def __extract_matches_list_of_series(self):
     soup = Common.get_soup_object(self.__series_link)
     series_formats = \
         soup.find('div', class_='cb-col-100 cb-col cb-nav-main cb-bg-white').find('div').text.split(".")[0]
     match_info_elements = soup.find_all(
         'div', class_='cb-col-60 cb-col cb-srs-mtchs-tm')
     for match_info_element in match_info_elements:
         match_title_tag = match_info_element.find(
             'a', class_='text-hvr-underline')
         match_venue_tag = match_info_element.find('div')
         [match_outcome_text, match_outcome] = self.__extract_match_outcome(
             match_info_element.find('a', class_='cb-text-link'))
         [is_valid_match, match_id, match_link, match_format
          ] = self.__validate_match(match_title_tag, match_venue_tag,
                                    match_outcome, series_formats)
         if is_valid_match:
             [match_winning_team,
              win_margin] = Common.get_match_winning_team_and_margin(
                  match_outcome, match_outcome_text)
             match_object = Match(match_id, match_title_tag.text,
                                  match_format, match_venue_tag.text,
                                  match_outcome,
                                  Common.home_page + match_link,
                                  match_winning_team, win_margin)
             self.__matches_list.append(match_object)
Пример #2
0
 def __extract_series_list_in_calender_year(self):
     link = Common.home_page + "/cricket-scorecard-archives/" + str(self.year)
     soup = Common.get_soup_object(link)
     series_blocks = soup.find_all('a', class_='text-hvr-underline')
     for index, series_block in enumerate(series_blocks):
         series_link = series_block.get('href')
         if ("cricket-series" in series_link) and Common.is_series_valid(series_link):
             series_id = series_link.split("/")[2]
             series_title = series_block.text  # .split(",")[0]
             series_link = Common.home_page + series_link
             series_object = Series(series_id, series_title, self.year, series_link)
             self.series_list.append(series_object)
Пример #3
0
 def __extract_match_data(self, category):
     link = self.link.replace("live-cricket-scores",
                              "live-cricket-scorecard")
     soup = Common.get_soup_object(link)
     if self.series is None:
         self.series = self.__extract_series_object(soup, category)
     self.format = Common.get_match_format(self.title, self.series.format)
     if self.format is not None:
         self.__extract_match_info(soup)
         if self.__is_valid() is True:
             self.__extract_teams(soup)
             self.__extract_teams_short_names()
             self.time = self.__get_match_time()
             self.is_valid = True
Пример #4
0
    def __init__(self, match_id, title, format, venue, result, match_link, winning_team, margin):
        self.__id = match_id
        self.__title = title
        self.__format = format
        self.__venue = venue
        self.__result = result
        self.__date = 0  # epoch time
        self.__winning_team = None
        self.__win_margin = margin
        # {'team_1_name' : 'team_1_short_name', 'team_2_name':'team_2_short_name'}
        self.__playing_teams = {}

        playing_teams = title.split(",")[0].split(" vs ")
        self.__playing_teams[playing_teams[0]] = playing_teams[0]
        self.__playing_teams[playing_teams[1]] = playing_teams[1]
        # India Women Red vs India Women Blue, India Red Won by 7 Wickets
        # https://www.cricbuzz.com/cricket-scores/20732 India Women Blue vs India Women Green, India Green Won by 7
        #  Wickets https://www.cricbuzz.com/cricket-scores/20733
        if self.__result == 'WIN':
            self.__winning_team = Common.get_close_match(winning_team, playing_teams)

        self.__match_link = match_link
        self.__match_info = {}
        self.__match_squad = {}
        self.__innings_scores = []
        self.__per_innings_head_to_head_data = []
        self.__logger = logging.getLogger(__name__)
Пример #5
0
 def insert(self, bowler_id, match_id, innings_num, wickets_taken,
            overs_bowled, runs_given, economy, team_id):
     balls = Common.convert_overs_to_balls(overs_bowled)
     sql = """INSERT INTO bowling_stats VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"""
     self.cursor.execute(sql,
                         (bowler_id, match_id, innings_num, wickets_taken,
                          balls, runs_given, economy, team_id))
Пример #6
0
    def __init__(self, title, venue, link, series_object, category):
        self.id = Common.get_id_from_link(link)
        self.title = title
        self.venue = venue
        # teams is a dictionary in below form
        # {'team-1' : [team-1's squad], 'team-2' : [team-2's squad]}
        self.teams = {}
        self.format = None
        self.time = None

        self.is_valid = False
        self.series = series_object
        self.link = link
        self.match_info = {}

        playing_teams = title.split(",")[0].split(" vs ")
        self.teams[playing_teams[0]] = {
            'short_name': playing_teams[0],
            'squad': []
        }
        self.teams[playing_teams[1]] = {
            'short_name': playing_teams[1],
            'squad': []
        }

        self.__extract_match_data(category)
Пример #7
0
 def __extract_team_squad(self, squad_block):
     squad = []
     player_blocks = squad_block.find_all(
         'a', class_='margin0 text-black text-hvr-underline')
     for player_block in player_blocks:
         player_id = player_block.get('href').split("/")[2]
         player_name = player_block.text
         player_name = Common.correct_player_name(player_name)
         squad.append(Player(player_name, player_id))
     return squad
Пример #8
0
    def __extract_schedule(self):
        soup = Common.get_soup_object(
            "https://www.cricbuzz.com/cricket-schedule/upcoming-series/")
        category_blocks = soup.find_all('div', {
            'class': 'cb-col-100 cb-col',
            'ng-show': True
        })
        for category_block in category_blocks:
            if category_block.next_element.text == self.date:
                category_type = Common.get_category_type(
                    category_block.get('ng-show'))
                series_blocks = category_block.find_all(
                    'div', class_='cb-col-100 cb-col')
                for series_block in series_blocks:
                    series_title = series_block.next_element.text
                    series_object = None
                    if series_title in self.series_data:
                        series_object = self.series_data[series_title]
                    match_blocks = series_block.find_all(
                        'div', 'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu ')
                    if (match_blocks is None) or\
                            ((match_blocks is not None) and (len(match_blocks) == 0)):
                        # Control comes here in case of multiple matches being played in single day of a series
                        match_blocks = series_block.find_all(
                            'div',
                            'cb-ovr-flo cb-col-60 cb-col cb-mtchs-dy-vnu cb-adjst-lst'
                        )
                    for match_block in match_blocks:
                        match_title_block = match_block.find('a', href=True)
                        match_title = match_title_block.text
                        match_link = Common.home_page + match_title_block.get(
                            'href')
                        match_venue = match_block.find('div').text

                        match_object = Match(match_title, match_venue,
                                             match_link, series_object,
                                             category_type)
                        if match_object.is_valid:
                            if series_object is None:
                                series_object = match_object.get_series_object(
                                )
                                self.series_data[series_title] = series_object
                            series_object.add_match(match_object)
Пример #9
0
    def __init__(self, link, match_squad_ref):
        self.__short_name_to_full_name_map = {}
        self.__full_match_commentary = []
        self.__per_innings_head_to_head_object_cache = [{}, {}, {}, {}]

        # {"player_name" : "team_name", ......}
        self.__local_squad = {}
        for team in match_squad_ref:
            for player in match_squad_ref[team]:
                self.__local_squad[player] = team

        soup = Common.get_soup_object(link)
        commentary_blocks = soup.find_all('p', class_='cb-col cb-col-90 cb-com-ln')
        for commentary_block in reversed(commentary_blocks):
            ball_commentary = commentary_block.text.split(',')
            self.__full_match_commentary.append(ball_commentary)
Пример #10
0
    def __extract_innings_batting_scores(self, innings_batting_block):
        batsman_score_blocks = innings_batting_block.find_all('div', class_='cb-col cb-col-100 cb-scrd-itms')
        batsman_objects = []
        for batsman_score_block in batsman_score_blocks:
            player_info_block = batsman_score_block.find('div', class_='cb-col cb-col-27 ')
            if player_info_block is not None:
                player_name = Common.correct_player_name(player_info_block.text)
                runs_scored = batsman_score_block.find('div',
                                                       class_='cb-col cb-col-8 text-right text-bold').text.strip()
                # (balls, fours, sixes, strikeRate)
                other_score_blocks = batsman_score_block.find_all('div', class_='cb-col cb-col-8 text-right')
                balls_played = other_score_blocks[0].text.strip()
                num_fours = other_score_blocks[1].text.strip()
                num_sixes = other_score_blocks[2].text.strip()

                batsman_objects.append(BatsmanScore(player_name, runs_scored, balls_played, num_fours, num_sixes))
        return batsman_objects
Пример #11
0
 def __extract_match_info_squad_and_scores(self, series_squad_ref):
     match_score_card_link = Common.home_page + "/api/html/cricket-scorecard/" + str(self.__id)
     soup = Common.get_soup_object(match_score_card_link)
     # Extract Match Info
     self.__extract_match_info(soup)
     # Extract Match Squad
     self.__extract_match_squad(soup, series_squad_ref)
     # Extract Per-Innings Scores
     team_innings = soup.find_all('div', id=True)
     for innings_num, innings_data in enumerate(team_innings):
         innings_bat_bowl_blocks = innings_data.find_all('div', class_='cb-col cb-col-100 cb-ltst-wgt-hdr')
         innings_batting_block = innings_bat_bowl_blocks[0]
         innings_bowling_block = innings_bat_bowl_blocks[1]
         innings_score_object = self.__extract_innings_total_score(innings_batting_block,
                                                                   innings_num, self.__playing_teams)
         innings_score_object.set_batting_scores(self.__extract_innings_batting_scores(innings_batting_block))
         innings_score_object.set_bowling_scores(self.__extract_innings_bowling_scores(innings_bowling_block))
         self.__innings_scores.append(innings_score_object)
Пример #12
0
 def __extract_player_profile(self):
     default_player_profile = {
         'Role': '--',
         'Batting Style': '--',
         'Bowling Style': '--'
     }
     default_keys = default_player_profile.keys()
     player_link = "http://www.cricbuzz.com/profiles/" + str(self.__id)
     soup = Common.get_soup_object(player_link)
     key_tags = soup.find_all(
         'div', class_="cb-col cb-col-40 text-bold cb-lst-itm-sm")
     value_tags = soup.find_all('div', "cb-col cb-col-60 cb-lst-itm-sm")
     for key, val in zip(key_tags, value_tags):
         key = key.text.strip()
         if key in default_keys:
             default_player_profile[key] = val.text.strip()
     self.__role = default_player_profile['Role']
     self.__batting_style = default_player_profile['Batting Style']
     self.__bowling_style = default_player_profile['Bowling Style']
Пример #13
0
 def __validate_match(self, match_title_block, match_venue_block,
                      match_outcome, series_formats):
     is_valid = False
     match_format = None
     match_id = None
     match_link = None
     if (match_title_block is not None) and ("cricket-scores" in match_title_block.get('href')) and \
                 (match_venue_block is not None) and (match_outcome is not None):
         match_format = Common.get_match_format(match_title_block.text,
                                                series_formats)
         if match_format is not None:
             match_link = match_title_block.get('href')
             match_id = match_link.split("/")[2]
             if self.__db_match_table.check_match_id(match_id):
                 self.__logger.info(
                     "Skipping {}. Available in DB".format(match_id))
             else:
                 is_valid = True
     return [is_valid, match_id, match_link, match_format]
Пример #14
0
    def __extract_innings_bowling_scores(self, innings_bowling_block):
        bowler_score_blocks = innings_bowling_block.find_all('div', class_='cb-col cb-col-100 cb-scrd-itms ')
        bowler_objects = []
        for bowler_score_block in bowler_score_blocks:
            player_info_block = bowler_score_block.find('div', class_='cb-col cb-col-40')
            if player_info_block is not None:
                player_name = Common.correct_player_name(player_info_block.text)
                wickets_taken = bowler_score_block.find('div',
                                                        class_='cb-col cb-col-8 text-right text-bold').text.strip()
                # Runs Given and Economy
                runs_and_economy_blocks = bowler_score_block.find_all('div', class_='cb-col cb-col-10 text-right')
                runs_given = runs_and_economy_blocks[0].text.strip()
                economy = runs_and_economy_blocks[1].text.strip()
                # Overs Bowled, Maiden Overs, No Balls, Wide Balls
                other_score_items = bowler_score_block.find_all('div', class_='cb-col cb-col-8 text-right')
                overs_bowled = other_score_items[0].text.strip()

                if len(economy) != 0:
                    # Reason : Wasim Jaffer : https://www.cricbuzz.com/live-cricket-scorecard/19085/vidarbha-vs-chhattisgarh-group-d-ranji-trophy-2017-18
                    bowler_objects.append(BowlerScore(player_name, overs_bowled, wickets_taken, runs_given, economy))
        return bowler_objects
Пример #15
0
 def __extract_match_squad(self, soup, series_squad_ref):
     squad_tags = soup.find_all('div',
                                {"class" : ["cb-col cb-col-100 cb-minfo-tm-nm",
                                            "cb-col cb-col-100 cb-minfo-tm-nm cb-minfo-tm2-nm"]})
     team_title = ""
     for squad_tag in squad_tags:
         player_blocks = squad_tag.find_all('a', class_='margin0 text-black text-hvr-underline')
         if len(player_blocks) == 0:
             team_title = squad_tag.text
             if "Squad" in team_title :
                 team_title = team_title.split("Squad")[0].strip()
                 self.__match_squad[team_title] = {}
                 if team_title not in series_squad_ref.keys():
                     series_squad_ref[team_title] = {}
         else:
             if len(team_title) == 0 :
                 raise Exception("match_link : {}".format(self.__match_link))
             else:
                 for player_block in player_blocks:
                     player_id = player_block.get('href').split("/")[2]
                     player_name = Common.correct_player_name(player_block.text)
                     if player_name not in series_squad_ref[team_title].keys():
                         series_squad_ref[team_title][player_name] = Player(player_name, player_id)
                     self.__match_squad[team_title][player_name] = series_squad_ref[team_title][player_name]
Пример #16
0
 def __get_player_full_name_from_short_name(self, name):
     if name not in self.__short_name_to_full_name_map.keys():
         close_match = Common.get_close_match(name, self.__local_squad.keys())
         self.__short_name_to_full_name_map[name] = close_match
     return self.__short_name_to_full_name_map[name]
Пример #17
0
 def __extract_match_outcome(self, match_outcome_block):
     if match_outcome_block is not None:
         match_outcome = Common.get_match_outcome(match_outcome_block.text)
     else:
         match_outcome = None
     return [match_outcome_block.text, match_outcome]
Пример #18
0
 def get_schedule(self):
     now = Common.get_date_now()
     sched = Schedule(now)
     return sched.get_list_of_series()
Пример #19
0
 def __get_match_time(self):
     date = self.match_info['Date'].split(" - ")[0].strip()
     time = self.match_info['Time']
     return Common.get_epoch_time_from_gmt(date + ' ' + time)