示例#1
0
class TopBetEu:
    def __init__(self, url, headers, params, cnx, cursor):
        self.url = url
        self.params = params
        self.headers = headers
        self.cnx = cnx
        self.cursor = cursor
        self.website = Website(url, params, headers, cursor, cnx)

    def parse(self):
        # get the webpage soup
        soup = self.website.soup()

        # for each game
        games = []
        for eventdiv in soup.find_all('div', class_='event'):
            # read the game header
            header = eventdiv.find('h3').text

            # read the teams
            match = re.search('(\w.+) at (\w.+) ', header)
            awayteamstr = re.sub('-.+', '', match.group(1).replace('-N','').replace('-A','')).replace('.', '%')
            hometeamstr = re.sub('-.+', '', match.group(2).replace('-N','').replace('-A','')).replace('.', '%')

            # read the gametime
            match = re.search('(....)-(..)-(..)\s+(..):(..)', header)
            gametime = datetime(
                int(match.group(1)),
                int(match.group(2)),
                int(match.group(3))
            )

            # read the lines
            awaycell, homecell = [line for line in eventdiv.find_all('td', class_='money')]
            awayline = int(awaycell.text)
            homeline = int(homecell.text)

            games.append((
                awayteamstr,
                awayline,
                hometeamstr,
                homeline,
                gametime,
            ))

        return games

    def __repr__(self):
        return '{0}({1},{2},{3})'.format(self.__class__.__name__,
                                         self.url,
                                         self.params,
                                         self.headers)
示例#2
0
class Pinnacle:
    def __init__(self, url, params, headers, cnx, cursor):
        self.url = url
        self.params = params
        self.headers = headers
        self.cnx = cnx
        self.cursor = cursor
        self.website = Website(url, params, headers, cursor, cnx)

    def parse(self):
        print 'parsing:', str(self)

        # get the webpage soup
        soup = self.website.soup()

        # the datatables
        tables = soup.find_all('table', class_='linesTbl')

        # slurp up rows (they come in groups of three)
        gamerows = {}
        for table in tables:
            # get the date for this table
            datestr = table.select('.linesHeader')[0].find('h4').text
            match = re.search('(\d{0,2})/(\d{0,2})', datestr)
            month = int(match.group(1))
            day = int(match.group(2))
            date = datetime.date(2015, month, day)
            gamerows[date] = []

            # sigh, go through all colors of table
            for row in table.select('.linesAlt1'):
                gamerows[date].append(row)
            for row in table.select('.linesAlt2'):
                gamerows[date].append(row)

        # group rows into 3 tuples
        # http://code.activestate.com/recipes/303060-group-a-list-into-sequential-n-tuples/
        gametuples = {}
        for date in gamerows:
            gametuples[date] = []
            for i in range(0, len(gamerows[date]), 3):
                tup = gamerows[date][i:i+3]
                if len(tup) == 3:
                    gametuples[date].append(tuple(tup))

        # go through for times and lines
        lines = []
        for date in gametuples:
            for linerowa, linerowb, draw in gametuples[date]:
                # get the lines
                lineaname = linerowa.select('.linesTeam')[0].text
                linebname = linerowb.select('.linesTeam')[0].text
                linealine = float(linerowa.select('.linesMLine')[0].text or -1)
                linebline = float(linerowb.select('.linesMLine')[0].text or -1)
                drawline = float(draw.select('.linesMLine')[0].text or -1)
                
                lines.append((lineaname, linealine, linebname, linebline, drawline, datetime.datetime.combine(date, datetime.time())))

        return lines

    def __repr__(self):
        return '{0}({1})'.format(self.__class__.__name__,
                                 self.website)