Пример #1
0
def scrape_cur_standings():
    table_url = 'http://thensbl.com/orgstand.htm'

    tables = get_tables(table_url)

    standings_changed = False
    for table in tables:
        titles = table.find_all('tr', class_=re.compile('dmrptsecttitle'))

        for title in titles:
            element = []
            tit = title.get_text()
            if tit == 'Divisional':
                sql_table = 'team_standings'

                rows = table.find_all('tr', class_=re.compile('dmrptbody'))

                for row in rows:
                    element = []
                    for data in row:
                        if data.get_text() == '&nbsp':
                            element.append(None)
                        else:
                            #strip takes white space away from the front and end of a text string
                            element.append(data.get_text().strip())

                    year = element[0]
                    team_location_name = element[1]
                    wins = element[2]
                    losses = element[3]

                    if team_location_name is not None:
                        full_name = helper.get_team_name(
                            team_location_name, year)

                        qry = """SELECT ts.year
                        , ts.team_name
                        , MAX(ts.games_played) AS gp
                        FROM team_standings ts
                        WHERE 1
                            AND ts.team_name = '%s'
                            AND ts.year = %s
                        GROUP BY ts.team_name, ts.year"""

                        prev_gp = db.query(qry % (full_name, year))
                        if prev_gp == ():
                            print "\n\nNEW SEASON!!!!!\n\n"
                            prev_gp == 0
                        else:
                            prev_gp = prev_gp[0][2]

                        if int(wins) + int(losses) != prev_gp:
                            standings_changed = True

                        # print full_name, int(wins)+int(losses), prev_gp, standings_changed

    return standings_changed
Пример #2
0
def input_data(ratings, sql_table, cats, year):
    print '\t' + sql_table
    entries = []
    for player in ratings:
        entry = {}
        entry['year'] = year
        for cat, val in zip(cats, player):
            # any category we aren't interested in recording, we mark as foo
            if cat != 'foo':
                # entry[cat] = val #####
                if cat == 'player_name' and val is not None:
                    entry[cat] = val.replace('*', '').replace('#', '')
                else:
                    entry[cat] = val

        if (entry.get("player_name") not in ('Total', None, '', 'Other')
                and entry.get("team_abb") not in ('Total', None, '', 'Other')):
            entries.append(entry)
        elif entry.get("team_name") not in ('Total', None, '', 'Other'):

            full_name = helper.get_team_name(entry.get("team_name"), year)
            entry['team_name'] = full_name
            if sql_table == 'team_standings':
                entry['games_played'] = int(entry.get('w')) + int(
                    entry.get('l'))
            entries.append(entry)

        if 'player_name' in entry:
            helper.input_name(entry.get('player_name'))

    # used for debugging
    # if entries != []:
    #     for entry in entries[0:30]:
    #         print '\t\t',
    #         print entry
    #     raw_input("")

    if entries != []:
        db.insertRowDict(entries,
                         sql_table,
                         insertMany=True,
                         rid=0,
                         replace=True)
    db.conn.commit()