# maybe there is better way? entry_datum = tree.xpath(xpath_start_item + '//table[3]//tr[' + str(j) + ']//td/span') entry_datum = [x.text.strip() if x.text is not None else '' for x in entry_datum] entry_data.append(entry_datum) # convert to data objects raw_data.append(RawRaceData(race_date, location, metadata, entry_headers, entry_data)) # return data for the race return raw_data # hard-code location location = Location(int(sys.argv[2]), None) # create the results data loader loader = HorseRacingResultsLoader(sys.argv[3]) # get html files in specified folder root = sys.argv[1] for f in os.listdir(root): if not f.endswith('.html') and not f.endswith('.htm'): continue # load results from this file filename = os.path.join(root, f) raw_data = parse_file(filename, location) loader.load(raw_data) loader.commit() print('Done parsing ' + filename) # close the loader
entry_data.append(entry_datum) # convert to data objects raw_data.append( RawRaceData(race_date, location, metadata, entry_headers, entry_data)) # return data for the race return raw_data # hard-code location location = Location(int(sys.argv[2]), None) # create the results data loader loader = HorseRacingResultsLoader(sys.argv[3]) # get html files in specified folder root = sys.argv[1] for f in os.listdir(root): if not f.endswith('.html') and not f.endswith('.htm'): continue # load results from this file filename = os.path.join(root, f) raw_data = parse_file(filename, location) loader.load(raw_data) loader.commit() print('Done parsing ' + filename) # close the loader
xpath_start_subitem = xpath_start_item + '//table[3]//tr[' + str(j) + ']//td' entry_datum = tree.xpath(xpath_start_subitem + '/span|' + xpath_start_subitem + '/p') entry_datum = [x.text.strip() if x.text is not None else '' for x in entry_datum] entry_data.append(entry_datum) # convert to data objects raw_data.append(RawRaceData(race_date, location, metadata, entry_headers, entry_data)) # return data for the race return raw_data # hard-code location location = Location(int(sys.argv[2]), None) # create the results data loader loader = HorseRacingResultsLoader(sys.argv[3]) loader.set_race_adapter_factory(PendingRaceAdapterFactory()) # get html files in specified folder root = sys.argv[1] for f in os.listdir(root): if not f.endswith('.html'): continue # load results from this file filename = os.path.join(root, f) raw_data = parse_file(filename, location) loader.load(raw_data) loader.commit() print('Done parsing ' + filename)