def ParsePersonalBestsTable( swimmer, table, course_code, output ): for row in TableRows( table, pbs_headers_of_interest ): # The event as text is in row[3] event = Event.create_from_str( str( row[3] ), course_code ) if event is None: logging.error( "Failed to parse event: " + str( row[3] ) + " " + course_code ) else: swims.append( _create_swim( swimmer, event, row, output ) )
def __init__(self, line): tokens = line.split( "|" ) num_tokens = len( tokens ) assert( (num_tokens == 2) or (num_tokens == 3) ) self.event_code = Event.create_from_str( tokens[0], "S" ).get_short_course_event_code() self.time = float( RaceTime( tokens[1] ) ) if num_tokens == 3: self.is_nt = (tokens[2] == 'nt\n') else: self.is_nt = False
def __init__(self, line): tokens = line.split("|") num_tokens = len(tokens) assert ((num_tokens == 2) or (num_tokens == 3)) self.event_code = Event.create_from_str( tokens[0], "S").get_short_course_event_code() self.time = float(RaceTime(tokens[1])) if num_tokens == 3: self.is_nt = (tokens[2] == 'nt\n') else: self.is_nt = False
def _parse_spreadsheet_data( spreadsheet_data ): rows = spreadsheet_data.split( '\n' ) num_events = len( short_course_events ) nt_times_by_event = [ None ] * num_events for row in rows: columns = row.split( '\t' ) # Parse the event name event_code = Event.create_from_str( columns[0], 'S' ).event_code if len( columns ) != 9: raise RuntimeError( "Unexpected number of columns in spreadsheet data" ) nt_times_for_event = [] nt_times_by_event[ event_code ] = nt_times_for_event for i in range( 1, 9 ): if len( columns[i] ) == 0: nt_times_for_event.append( None ) else: nt_times_for_event.append( float( RaceTime( columns[i] ) ) ) return nt_times_by_event
def _parse_spreadsheet_data(spreadsheet_data): rows = spreadsheet_data.split('\n') seconds_per_point_by_event = [None] * num_events for row in rows: columns = row.split('\t') # Parse the event name event_code = Event.create_from_str(columns[0], 'S').event_code if len(columns) != 9: raise RuntimeError( "Unexpected number of columns in spreadsheet data") seconds_per_point_for_event = [] seconds_per_point_by_event[event_code] = seconds_per_point_for_event for i in range(1, 9): if len(columns[i]) == 0: seconds_per_point_for_event.append(None) else: seconds_per_point_for_event.append(float(columns[i])) return seconds_per_point_by_event
def _parse_spreadsheet_data( spreadsheet_data ): rows = spreadsheet_data.split( '\n' ) num_events = len( short_course_events ) qt_by_event = [ None ] * num_events expected_num_columns = _max_age - _min_age + 2 for row in rows: columns = row.split( '\t' ) # Parse the event name event_code = Event.create_from_str( columns[0], 'S' ).event_code if len( columns ) != expected_num_columns: raise RuntimeError( "Unexpected number of columns in spreadsheet data" ) qt_for_event = [] qt_by_event[ event_code ] = qt_for_event for i in range( 1, expected_num_columns ): if len( columns[i] ) == 0: qt_for_event.append( None ) else: qt_for_event.append( float( RaceTime( columns[i] ) ) ) return qt_by_event
def _parse_spreadsheet_data(spreadsheet_data): rows = spreadsheet_data.split('\n') num_events = len(short_course_events) qt_by_event = [None] * num_events expected_num_columns = max_age - min_age + 2 for row in rows: columns = row.split('\t') # Parse the event name event_code = Event.create_from_str(columns[0], 'S').event_code if len(columns) != expected_num_columns: raise RuntimeError( "Unexpected number of columns in spreadsheet data") qt_for_event = [] qt_by_event[event_code] = qt_for_event for i in range(1, expected_num_columns): if len(columns[i]) == 0: qt_for_event.append(None) else: qt_for_event.append(float(RaceTime(columns[i]))) return qt_by_event
def parse_event(tokens, course_code): str = tokens[0][12:] # Because they all start with 'Male/Female' return Event.create_from_str(str, course_code)
def scrape_meet( asa_meet_code, page_number, meet_name, date, course_code ): logging.info( "Attempting to parse meet " + meet_name + ", meet code: " + str( asa_meet_code ) + ", page: " + str(page_number) ) # Load a meet page from a URL like this... # https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=19611&targetclub=WINNCHRN url = "https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=" + str( asa_meet_code ) + "&targetclub=WINNCHRN&page=" + str( page_number ) page = helpers.FetchUrl( url ) if page is None: logging.error( "Failed to get page " + url ) return 503 tree = html.fromstring( page ) meet_has_been_parsed( asa_meet_code ) try: table = tree.get_element_by_id( "rankTable" ) except: logging.info( "No rankTable for " + url + ". Presuming no Winsford swimmers at that meet" ) return if page_number == 1: # When scraping the first page, one of our jobs is to count how many other pages # there are and add tasks to scrape those pages num_pages = scrape_num_pages( tree ) logging.info( "Meet contains " + str( num_pages ) + " pages ") date_str = date.strftime( "%d/%m/%y" ) for i in range( 2, num_pages+1 ): logging.info( "Queing update of page " + str(i) + " of " + meet_name ) taskqueue.add(url='/admin/scrape_meet', params={'asa_meet_code': str(asa_meet_code), 'meet_name' : meet_name, 'date' : date_str, 'course_code' : course_code, 'page' : str(i) }) swimmers_checked = set() update_swimmer_list = False for row in TableRows( table, _meet_headers_of_interest ): # First we look at the swimmer. # Is it one we've already seen while scraping this meet, or is it a new one? # If it's a new one, is it a swimmer that's in our database? # Perhaps it's a swimmer that's in our database as Cat 1 and needs upgrading. asa_number = int( row[0].text ) if asa_number not in swimmers_checked: swimmers_checked.add( asa_number ) swimmer = Swimmer.get( "Winsford", asa_number ) if swimmer is None: swimmer = SwimmerCat1.get( "Winsford", asa_number ) if swimmer is None: # This looks like a new Winsford swimmer that isn't in the database # Add a task to add them logging.info( "Found new Winsford swimmer: " + str( asa_number ) + ". Adding task to scrape." ) taskqueue.add(url='/admin/update_swimmers', params={'name_search': str(asa_number)}) #QueueUpdateSwimsForSwimmer( str(asa_number) ) update_swimmer_list = True else: # This is a swimmer that's in our database as Cat1 # Add a task to upgrade them logging.info( "Found new Cat 2 Winsford swimmer: " + str( asa_number ) + ". Adding task to upgrade." ) taskqueue.add(url='/admin/check_for_swimmer_upgrade', params={'asa_number': str(asa_number)}) update_swimmer_list = True else: logging.info( "Found existing Winsford swimmer: " + swimmer.full_name() ) if update_swimmer_list: taskqueue.add(url='/admin/update_swimmer_list') swims_for_swimmer = {} for row in TableRows( table, _meet_headers_of_interest ): # Now look at the actual swims. # If there's a swim link, then that means there are some splits. In those # cases we also add a task to parse the splits and add them to the Swim. asa_number = int( row[0].text ) event_str = row[1].text date_of_birth = helpers.ParseDate_dmy( row[2].text ) race_time = float( RaceTime( row[3].text ) ) event = Event.create_from_str( event_str, course_code ) asa_swim_id = get_asa_swim_id( row[3] ) swim = Swim.create( asa_number, date_of_birth, event, date, meet_name, race_time, asa_swim_id ) if asa_swim_id is not None: # Swim link. Add a task to parse the splits. swim_key_str = swim.create_swim_key_str() logging.info( "Adding split scraping task for swim " + swim_key_str ) taskqueue.add(url='/admin/scrape_splits', params={'swim': swim_key_str}) # Record this swim if asa_number not in swims_for_swimmer: swims_for_swimmer[ asa_number ] = []; swims_for_swimmer[ asa_number ].append( swim ) for asa_number, swims in swims_for_swimmer.iteritems(): num_swims = len( swims ) logging.info( "Putting " + str(num_swims) + " swims for " + str( asa_number ) ) put_new_swims( asa_number, swims )
def scrape_meet(asa_meet_code, page_number, meet_name, date, course_code): logging.info("Attempting to parse meet " + meet_name + ", meet code: " + str(asa_meet_code) + ", page: " + str(page_number)) # Load a meet page from a URL like this... # https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=19611&targetclub=WINNCHRN url = "https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=" + str( asa_meet_code) + "&targetclub=WINNCHRN&page=" + str(page_number) page = helpers.FetchUrl(url) if page is None: logging.error("Failed to get page " + url) return 503 tree = html.fromstring(page) meet_has_been_parsed(asa_meet_code) try: table = tree.get_element_by_id("rankTable") except: logging.info("No rankTable for " + url + ". Presuming no Winsford swimmers at that meet") return if page_number == 1: # When scraping the first page, one of our jobs is to count how many other pages # there are and add tasks to scrape those pages num_pages = scrape_num_pages(tree) logging.info("Meet contains " + str(num_pages) + " pages ") date_str = date.strftime("%d/%m/%y") for i in range(2, num_pages + 1): logging.info("Queing update of page " + str(i) + " of " + meet_name) taskqueue.add(url='/admin/scrape_meet', params={ 'asa_meet_code': str(asa_meet_code), 'meet_name': meet_name, 'date': date_str, 'course_code': course_code, 'page': str(i) }) swimmers_checked = set() update_swimmer_list = False for row in TableRows(table, _meet_headers_of_interest): # First we look at the swimmer. # Is it one we've already seen while scraping this meet, or is it a new one? # If it's a new one, is it a swimmer that's in our database? # Perhaps it's a swimmer that's in our database as Cat 1 and needs upgrading. asa_number = int(row[0].text) if asa_number not in swimmers_checked: swimmers_checked.add(asa_number) swimmer = Swimmer.get("Winsford", asa_number) if swimmer is None: swimmer = SwimmerCat1.get("Winsford", asa_number) if swimmer is None: # This looks like a new Winsford swimmer that isn't in the database # Add a task to add them logging.info("Found new Winsford swimmer: " + str(asa_number) + ". Adding task to scrape.") taskqueue.add(url='/admin/update_swimmers', params={'name_search': str(asa_number)}) #QueueUpdateSwimsForSwimmer( str(asa_number) ) update_swimmer_list = True else: # This is a swimmer that's in our database as Cat1 # Add a task to upgrade them logging.info("Found new Cat 2 Winsford swimmer: " + str(asa_number) + ". Adding task to upgrade.") taskqueue.add(url='/admin/check_for_swimmer_upgrade', params={'asa_number': str(asa_number)}) update_swimmer_list = True else: logging.info("Found existing Winsford swimmer: " + swimmer.full_name()) if update_swimmer_list: taskqueue.add(url='/admin/update_swimmer_list') swims_for_swimmer = {} for row in TableRows(table, _meet_headers_of_interest): # Now look at the actual swims. # If there's a swim link, then that means there are some splits. In those # cases we also add a task to parse the splits and add them to the Swim. asa_number = int(row[0].text) event_str = row[1].text date_of_birth = helpers.ParseDate_dmy(row[2].text) race_time = float(RaceTime(row[3].text)) event = Event.create_from_str(event_str, course_code) asa_swim_id = get_asa_swim_id(row[3]) swim = Swim.create(asa_number, date_of_birth, event, date, meet_name, race_time, asa_swim_id) if asa_swim_id is not None: # Swim link. Add a task to parse the splits. swim_key_str = swim.create_swim_key_str() logging.info("Adding split scraping task for swim " + swim_key_str) taskqueue.add(url='/admin/scrape_splits', params={'swim': swim_key_str}) # Record this swim if asa_number not in swims_for_swimmer: swims_for_swimmer[asa_number] = [] swims_for_swimmer[asa_number].append(swim) for asa_number, swims in swims_for_swimmer.iteritems(): num_swims = len(swims) logging.info("Putting " + str(num_swims) + " swims for " + str(asa_number)) put_new_swims(asa_number, swims)