def insert_race(soup): # Extract year and month field year_month = soup.find("h3", {"class": "midashi3rd"}).string year = formatter("(\d+)年\d+月", year_month, "int") month = formatter("\d+年(\d+)月", year_month, "int") for row in soup.select("tbody tr"): cells = row.findAll('td') if len(cells) > 2 and cells[0].find("a") is not None: race_id_without_round = "20" + formatter("\d+", get_from_a( cells[0])) day = formatter("(\d+)日([日月火水木金土])", str(cells[0]), "int") place_id = race_id_without_round[4:6] place_name = to_place_name(place_id) days = int(race_id_without_round[6:8]) times = int(race_id_without_round[8:10]) for r in range(1, 13): Race.objects.get_or_create(race_id=race_id_without_round + str(r).zfill(2), race_dt=date(year, month, day), place_id=place_id, place_name=place_name, days=days, times=times, round=r) return
def parse_result(cells, race, tof): result = Result() result.race = race result.rank = formatter("\d+", cells[0].string, "int") result.bracket = formatter("\d+", cells[1].string, "int") result.horse_num = formatter("\d+", cells[2].string, "int") result.horse_id = formatter("\d+", get_from_a(cells[3])) result.horse_name = formatter("[^!-~\xa0]+", get_from_a(cells[3], "name")) result.key = race.race_id + result.horse_id result.sex = formatter("[牡牝騸セ]", cells[4].string) result.age = formatter("\d+", cells[4].string, "int") result.burden = formatter("(\d+).?\d?", cells[5].string, "float") result.jockey_id = formatter("\d+", get_from_a(cells[6])) result.jockey_name = formatter("[^!-~\xa0]+", get_from_a(cells[6], "name")) if cells[7].string is not None: tmp_time = re.split(r"[:.]", cells[7].string) tmp_time = [int(i) for i in tmp_time] # Type conversion tmp_sec = (tmp_time[0] * 60 + tmp_time[1]) + (tmp_time[2] / 10) result.finish_time = tmp_sec result.time_lag = round( result.finish_time - tof, 2) if result.finish_time is not None and tof != 0 else 0 result.last3f_time = formatter("\d+.\d+", cells[11].string, "float") result.odds = formatter("\d+.\d+", cells[12].string, "float") result.odor = formatter("\d+", cells[13].string, "int") result.weight = formatter("(\d+)\([+-]?\d*\)", cells[14].string, "int") result.weight_diff = formatter("\d+\(([+-]?\d+)\)", cells[14].string, "int") result.trainer_id = formatter("\d+", get_from_a(cells[15])) result.trainer_name = formatter("[^!-~\xa0]+", get_from_a(cells[15], "name")) result.owner_id = formatter("\d+", get_from_a(cells[19])) result.owner_name = formatter("[^!-~\xa0]+", get_from_a(cells[19], "name")) result.prize = formatter("\d*,?\d*.?\d+", cells[20].string, "float") return result
def parse_entry_8(cells, race): result = Result() result.race = race result.horse_id = formatter("\d+", get_from_a(cells[1])) result.horse_name = formatter("[^!-~\xa0]+", get_from_a(cells[1], "name")) result.key = race.race_id + result.horse_id result.sex = formatter("[牡牝騸セ]", cells[2].string) result.age = formatter("\d+", cells[2].string, "int") result.burden = formatter("(\d+).?\d?", cells[3].string, "float") result.jockey_id = formatter("\d+", get_from_a(cells[4])) result.jockey_name = formatter("[^!-~\xa0]+", get_from_a(cells[4], "name")) result.trainer_id = formatter("\d+", get_from_a(cells[5])) result.trainer_name = formatter("[^!-~\xa0]+", get_from_a(cells[5], "name")) result.odds = 0 result.odor = 0 return result
def parse_entry_12(cells, race): result = Result() result.race = race result.bracket = formatter("\d+", cells[0].string, "int") result.horse_num = formatter("\d+", cells[1].string, "int") result.horse_id = formatter("\d+", get_from_a(cells[3])) result.horse_name = formatter("[^!-~\xa0]+", get_from_a(cells[3], "name")) result.key = race.race_id + result.horse_id result.sex = formatter("[牡牝騸セ]", cells[4].string) result.age = formatter("\d+", cells[4].string, "int") result.burden = formatter("(\d+).?\d?", cells[5].string, "float") result.jockey_id = formatter("\d+", get_from_a(cells[6])) result.jockey_name = formatter("[^!-~\xa0]+", get_from_a(cells[6], "name")) result.trainer_id = formatter("\d+", get_from_a(cells[7])) result.trainer_name = formatter("[^!-~\xa0]+", get_from_a(cells[7], "name")) result.odds = formatter("\d+.\d+", cells[8].string, "float") result.odor = formatter("\d+", cells[9].string, "int") return result