def scrape(week, year=2015):
    """
    Scrapes the dailyfantasynerd data that has been copied from the website and pasted to s3.
    Sends the data to s3 after processing.

    :param week (int): Week of the season
    :param year (int): Year of the season
    """

    print "Scraping DailyFantasyNerd Projections"
    client = boto3.client("s3")
    players_key = os.path.join("dailyfantasynerd", str(year), "week" + str(week), "players.txt")
    players_data = client.get_object(Bucket=SOURCES_BUCKET, Key=players_key)["Body"].read()
    ret = {}
    for line in players_data.splitlines():
        if "\t" in line:
            data = line.strip().split("\t")
            name = data[0].lower()
            if is_team(name):
                name = get_team(name)
            else:
                name = normalize_name(name)
            points = float(data[-1])
            ret[name] = {"name": name, "points": points}
    send_json_to_s3(year, week, "dailyfantasynerd", ret)
示例#2
0
def scrape(week, year=2015, position=0):
    """
    Scrapes the NFL page at the given position.

    :param week (int): Week of the season.
    :param year (int): Year of the season.
    :param position (int): The first index to start scraping through pagination.
    :return: The key on s3 where the data is sent to
    :rtype: str
    """
    print "Scraping NFL Projections"

    params = {
        'statWeek': week,
        'statType': 'weekProjectedStats',
        'statSeason': year,
        'statCategory': 'projectedStats',
        'position': position
    }
    # Get Players
    projections = crawl_nfl_projection_page(**params)

    # Grab defenses too
    params['offset'] = 0
    params['position'] = 8
    projections = crawl_nfl_projection_page(projections=projections, **params)
    return send_json_to_s3(year, week, 'nfl', projections)
示例#3
0
def scrape(week, year):
    """
    Scrapes the pasted results from Numberfire.

    :param week (int): Week of the season.
    :param year (int): Year of the season.
    :return: The key on s3 where the data is sent to
    :rtype: str
    """
    print "Scraping Numberfire Projections"
    projections = {}
    client = boto3.client('s3')

    players_key = os.path.join('numberfire', str(year), 'week' + str(week), 'players.txt')
    defense_key = os.path.join('numberfire', str(year), 'week' + str(week), 'defense.txt')

    print players_key
    players_data = client.get_object(Bucket=SOURCES_BUCKET, Key=players_key)
    defense_data = client.get_object(Bucket=SOURCES_BUCKET, Key=defense_key)
    players = players_data['Body'].read().splitlines() + defense_data['Body'].read().splitlines()
    for line in players:
        player = _read_numberfire_line(line)
        if player is None:
            continue
        projections[player['name']] = player

    return send_json_to_s3(year, week, 'numberfire', projections)
示例#4
0
def aggregate(week, year):
    """
    Aggregates all the projections from the different sources to a single dict and saves to s3.

    :param week (str): Week of the season.
    :param year (str): Year of the season.
    """
    print "aggregating data"
    # Build up a dict of all the projectsion keyed by the filename
    projections = {}
    client = boto3.client('s3')
    for key in _query_projections(year, week):
        print "Reading", key
        name, _ = os.path.basename(key).split('.')
        projections[name] = json.loads(client.get_object(Bucket=SCRAPED_BUCKET, Key=key)['Body'].read())

    # Build a list of all the players that show up in each projection.
    players = set.intersection(*[set(items.keys()) for items in projections.values()])
    print len(players)

    # Build up data
    output = {}
    for name in players:
        ret = {'name': name}
        points = np.array([])
        for source_name, data in projections.items():
            if source_name == 'numberfire':
                # Using Numbefire as a source for player info
                ret['opponent'] = data[name]['opponent']
                ret['position'] = data[name]['position']
                ret['salary'] = data[name]['salary']
                ret['team'] = data[name]['team']
            ret[source_name] = data[name]['points']
            points = np.append(points, float(data[name]['points']))
        ret['std'] = np.std(points)
        ret['mean'] = np.mean(points)
        trimmed_points = sorted(points)[1:-1]
        ret['trimmed_mean'] = np.mean(trimmed_points)
        output[name] = ret

    send_json_to_s3(year, week, 'projections', output)
示例#5
0
def scrape(week, year=2015):
    """
    Scrapes the Rotogrinders page.

    :param week (int): Week of the season.
    :param year (int): Year of the season.
    :return: The key on s3 where the data is sent to
    :rtype: str
    """
    print "Scraping RotoGrinders Projections"
    projections = crawl()
    return send_json_to_s3(year, week, 'rotogrinders', projections)
示例#6
0
def scrape(week, year=2015):
    """
    Scrapes the ESPN page.

    :param week (int): Week of the season.
    :param year (int): Year of the season.
    :return: The key on s3 where the data is sent to
    :rtype: str
    """
    print "Scraping ESPN Projections"
    # &scoringPeriodId=5&seasonId=2015&startIndex=40
    params = {
        'scoringPeriodId': week,
        'seasonId': year
    }
    projections = crawl_espn_projection_page(**params)
    return send_json_to_s3(year, week, 'espn', projections)