def create_association(self, bhv_id): url = Association.build_source_url(bhv_id) dom = logic.get_html(url) name = parsing.parse_league_name(dom) try: abbreviation = Association.get_association_abbreviation(name) except KeyError: LOGGER.warning("No abbreviation for association '%s'", name) return if self.options['associations'] and bhv_id not in self.options[ 'associations']: LOGGER.debug('SKIPPING Association (options): %s %s', bhv_id, name) return association, created = Association.objects.get_or_create( name=name, abbreviation=abbreviation, bhv_id=bhv_id) if created: LOGGER.info('CREATED Association: %s', association) else: LOGGER.info('EXISTING Association: %s', association) items = dom.xpath('//select[@name="orgID"]/option[position()>1]') for item in items: try: self.create_district(item, association) except Exception: logging.getLogger('mail').exception( "Could not create District")
def update_league_names(*_): for league in League.objects.all(): dom = logic.get_html(league.source_url()) name = parsing.parse_league_name(dom) if name != league.name: league.name = name league.save() LOGGER.info('RENAMED LEAGUE: %s', league)
def update_league_names(*_): for league in League.objects.all(): html = http.get_text(league.source_url()) dom = parsing.html_dom(html) name = parsing.parse_league_name(dom) if name != league.name: league.name = name league.save() LOGGER.info('RENAMED LEAGUE: %s', league)
def create_league(self, league_link, district, season): abbreviation = league_link.text bhv_id = parsing.parse_league_bhv_id(league_link) if self.options['leagues'] and bhv_id not in self.options['leagues']: self.stdout.write('SKIPPING League (options): {} {}'.format( bhv_id, abbreviation)) return if abbreviation[:1] in ['m', 'w', 'g', 'u' ] and not self.options['youth']: self.stdout.write('SKIPPING League (youth league): {} {}'.format( bhv_id, abbreviation)) return url = League.build_source_url(bhv_id) dom = logic.get_html(url) name = parsing.parse_league_name(dom) if League.is_youth_league(name) and not self.options['youth']: self.stdout.write('SKIPPING League (youth league): {} {}'.format( bhv_id, name)) return team_links = dom.xpath('//table[@class="scoretable"]/tr[position() > 1]/td[3]/a') or \ dom.xpath('//table[@class="scoretable"]/tr[position() > 1]/td[2]/a') if not team_links: self.stdout.write('SKIPPING League: {} {} (no team table)'.format( bhv_id, name)) return game_rows = parsing.parse_game_rows(dom) if not game_rows: self.stdout.write('SKIPPING League (no games): {} {}'.format( bhv_id, name)) return if len(game_rows) < len(team_links) * (len(team_links) - 1): self.stdout.write('SKIPPING League (few games): {} {}'.format( bhv_id, abbreviation)) return league, league_created = League.objects.get_or_create( name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id) if league_created: self.stdout.write('CREATED League: {}'.format(league)) else: self.stdout.write('EXISTING League: {}'.format(league)) for team_link in team_links: self.create_team(team_link, league)
def scrape_league(league_link, district, season, options): abbreviation = league_link.text bhv_id = parsing.parse_league_bhv_id(league_link) if bhv_id in BUGGED_LEAGUES: LOGGER.debug('SKIPPING League (ignore list): %s %s', bhv_id, abbreviation) return if options['leagues'] and bhv_id not in options['leagues']: LOGGER.debug('SKIPPING League (options): %s %s', bhv_id, abbreviation) return if abbreviation == 'TEST': LOGGER.debug('SKIPPING League (test league): %s %s', bhv_id, abbreviation) return url = League.build_source_url(bhv_id) html = http.get_text(url) dom = parsing.html_dom(html) name = parsing.parse_league_name(dom) irrelevant_league_name_indicators = [ 'Platzierungsrunde', 'Kreisvergleichsspiele', 'pokal', 'Pokal', 'Trophy', 'Vorbereitung', 'F-FS', 'M-FS', 'Quali', 'Freiwurf', 'Maxi', 'turnier', 'wettbewerb', 'Test', 'Planung', 'planung', ] if any(n in name for n in irrelevant_league_name_indicators): LOGGER.debug('SKIPPING League (name): %s %s', bhv_id, name) return team_links = parsing.parse_team_links(dom) if not team_links: LOGGER.debug('SKIPPING League (no team table): %s %s', bhv_id, name) return game_rows = parsing.parse_game_rows(dom) if not game_rows: LOGGER.debug('SKIPPING League (no games): %s %s', bhv_id, name) return try: name = LeagueName.objects.get(bhv_id=bhv_id).name except LeagueName.DoesNotExist: pass if League.is_youth(abbreviation, name) and not options['youth']: LOGGER.debug('SKIPPING League (youth league): %s %s %s', bhv_id, abbreviation, name) return league, league_created = League.objects.get_or_create( name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id) if league_created: LOGGER.info('CREATED League: %s', league) else: LOGGER.info('EXISTING League: %s', league) if options['skip_teams']: return for team_link in team_links: scrape_team(team_link, league) retirements = parsing.parse_retirements(dom) Team.check_retirements(retirements, league, LOGGER)
def create_league(self, league_link, district, season): abbreviation = league_link.text bhv_id = parsing.parse_league_bhv_id(league_link) if self.options['leagues'] and bhv_id not in self.options['leagues']: LOGGER.debug('SKIPPING League (options): %s %s', bhv_id, abbreviation) return if abbreviation == 'TEST': LOGGER.debug('SKIPPING League (test league): %s %s', bhv_id, abbreviation) return url = League.build_source_url(bhv_id) dom = logic.get_html(url) name = parsing.parse_league_name(dom) if any(n in name for n in [ 'Platzierungsrunde', 'Meister', 'Freiwurf', 'Maxi', 'turnier', 'wettbewerb', 'pokal', 'Test' ]): LOGGER.debug('SKIPPING League (name): %s %s', bhv_id, name) return team_links = parsing.parse_team_links(dom) if not team_links: LOGGER.debug('SKIPPING League (no team table): %s %s', bhv_id, name) return game_rows = parsing.parse_game_rows(dom) if not game_rows: LOGGER.debug('SKIPPING League (no games): %s %s', bhv_id, name) return if len(game_rows) < len(team_links) * (len(team_links) - 1): LOGGER.debug('SKIPPING League (few games): %s %s', bhv_id, abbreviation) return name = { 5380: "Männer Kreisliga 2-1", 5381: "Männer Kreisliga 2-2", 7424: "Männer Kreisliga C Staffel 3", 50351: "gemischte Jugend D Kreisliga A Staffel 1", 52853: "männliche Jugend C Bezirksliga Staffel 2", 58111: "Frauen Oberliga Rheinland-Pfalz/Saar 1", 58116: "Frauen Oberliga Rheinland-Pfalz/Saar 2", }.get(bhv_id, name) if League.is_youth(abbreviation, name) and not self.options['youth']: LOGGER.debug('SKIPPING League (youth league): %s %s %s', bhv_id, abbreviation, name) return league, league_created = League.objects.get_or_create( name=name, abbreviation=abbreviation, district=district, season=season, bhv_id=bhv_id) if league_created: LOGGER.info('CREATED League: %s', league) else: LOGGER.info('EXISTING League: %s', league) if self.options['skip_teams']: return for team_link in team_links: create_team(team_link, league) retirements = parsing.parse_retirements(dom) check_retirements(retirements, league)