Пример #1
0
    def test_1(self):
        '''
        Tests Numberic and category inputs2ml
        Tets Numberic and categorical targets
        '''
        self.setUpData()
        ds = copper.Dataset('transforms/ml/data.csv')

        # Test: Inputs: Numeric and categorical
        ds.type['Num.as.Cat'] = ds.CATEGORY
        ds.role['Target.Num'] = ds.REJECTED
        ds.role['Target.Cat'] = ds.REJECTED
        sol = copper.read_csv('transforms/ml/ml.csv')
        del sol['Target.Num']
        del sol['Target.Cat']

        self.assertEqual(copper.transform.inputs2ml(ds), sol)

        # Tests: Numeric target
        sol = copper.read_csv('transforms/ml/ml.csv')['Target.Num']
        ds.role['Target.Num'] = ds.TARGET
        ds.role['Target.Cat'] = ds.REJECTED
        self.assertEqual(copper.transform.target2ml(ds), sol)

        # Tests: Categorical target
        sol = copper.read_csv('transforms/ml/ml.csv')['Target.Cat']
        ds.role['Target.Num'] = ds.REJECTED
        ds.role['Target.Cat'] = ds.TARGET
        self.assertEqual(copper.transform.target2ml(ds), sol)
Пример #2
0
    def test_to_number(self):
        self.setUpData()
        data = copper.read_csv('transforms/1/data.csv')
        sol = copper.read_csv('transforms/1/transformed.csv')

        t1 = data['Number.1'].apply(copper.transform.to_number)
        self.assertEqual(t1, sol['Number.1'])
        t2 = data['Number.2'].apply(copper.transform.to_number)
        self.assertEqual(t2, sol['Number.2'])
Пример #3
0
    def test_strptime(self):
        self.setUpData()
        data = copper.read_csv('transforms/1/data.csv')
        sol = copper.read_csv('transforms/1/transformed.csv')

        dates1 = data['Date.1'].apply(copper.transform.strptime, args='%Y-%m-%d')
        dates2 = data['Date.2'].apply(copper.transform.strptime, args='%Y/%m/%d')
        dates3 = data['Date.3'].apply(copper.transform.strptime, args=('%m/%d/%y'))
        dates1, dates2, dates3 = dates1.dropna(), dates2.dropna(), dates3.dropna()
        
        self.assertEqual(len(dates1), 12)
        self.assertEqual(dates1.values, dates2.values)
        self.assertEqual(dates2.values, dates3.values)
        self.assertEqual(dates1.values, dates3.values)
Пример #4
0
    def test_join(self):
        """
        Tests join of different datasets
        """
        self.setUpData()
        ds_all = copper.Dataset("dataset/1/data.csv")
        df = copper.read_csv("dataset/1/data.csv")

        l = len(df.columns)
        ds1 = copper.Dataset(df.ix[:, 0 : int(l / 4)])
        ds2 = copper.Dataset(df.ix[:, int(l / 4) : int(2 * (l / 4))])
        ds3 = copper.Dataset(df.ix[:, int(2 * (l / 4)) : int(3 * (l / 4))])
        ds4 = copper.Dataset(df.ix[:, int(3 * (l / 4)) : int(4 * (l / 4))])

        ds = copper.join(ds1, ds2, others=[ds3, ds4])
        self.assertEqual(ds, ds_all)

        # 2. Change value of a section, the change should be reflected on the joined
        # Note: increasing the data.csv file probably will have to change ds2 to ds1 below
        ds2.type["Cat.1"] = ds.NUMBER
        ds2.update()
        ds_all.type["Cat.1"] = ds.NUMBER
        ds_all.update()

        ds = copper.join(ds1, ds2, others=[ds3, ds4])
        self.assertEqual(ds, ds_all)
Пример #5
0
    def test_costs(self):
        '''
        Tests the values of the costs functions
        '''
        self.setup()

        self.ml.costs = [[0, 4], [12, 16]]

        profit = copper.read_csv('ml/1/profit.csv').set_index('Model')
        self.assertEqual(self.ml.profit(), profit)

        oportunity_cost = copper.read_csv('ml/1/oportunity_cost.csv').set_index('Model')['Oportuniy cost']
        self.assertEqual(self.ml.oportunity_cost(), oportunity_cost)

        cost_no_ml = copper.read_csv('ml/1/cost_no_ml.csv').set_index('Model')['Costs of not using ML']
        self.assertEqual(self.ml.cost_no_ml(), cost_no_ml)
Пример #6
0
    def test_pandas(self):
        """
        Test basic functionality of pandas
            1. Get/Set columns
            2. Head/Tail
            3. Correlation matrix
        """
        self.setUpData()

        ds = copper.Dataset("dataset/1/data.csv")
        df = copper.read_csv("dataset/1/data.csv")

        # 1.1 Get columns
        for col in df.columns:
            self.assertEqual(ds[col], df[col])

        # 1.2 Set columns - already existing columns only
        ds["Number.1"] = ds["Number.1"] - 10
        df["Number.1"] = df["Number.1"] - 10
        self.assertEqual(df, ds.frame)

        fnc = lambda x: 12 * (2007 - int(str(x)[0:4])) - int(str(x)[4:6]) + 2
        ds["Date.Encoded"] = ds["Date.Encoded"].apply(fnc)
        df["Date.Encoded"] = df["Date.Encoded"].apply(fnc)
        self.assertEqual(df, ds.frame)

        # 2. Head/Tail
        self.assertEqual(ds.head(), df.head())
        self.assertEqual(ds.head(13), df.head(13))
        self.assertEqual(ds.tail(), df.tail())
        self.assertEqual(ds.tail(9), df.tail(9))

        # 3. Correlation matrix
        self.assertEqual(ds.corr(), df.corr())
Пример #7
0
    def test_cat2num(self):
        """
        Tests the automatic transformation of a Category to Number.
        More tests can be found on the tranformation tests.
        """
        self.setUpData()
        ds = copper.Dataset("dataset/1/data.csv")
        sol = copper.read_csv("dataset/1/transformed.csv")

        # Test the imported metadata
        self.assertEqual(ds.type["Number.1"], ds.NUMBER)
        self.assertEqual(ds.type["Number.2"], ds.NUMBER)
        self.assertEqual(ds.type["Cat.1"], ds.CATEGORY)
        self.assertEqual(ds.type["Cat.2"], ds.CATEGORY)
        self.assertEqual(ds.type["Num.as.Cat"], ds.CATEGORY)
        self.assertEqual(ds.type["Money"], ds.CATEGORY)

        # Change test 1
        ds.type["Num.as.Cat"] = ds.NUMBER
        self.assertEqual(ds.type["Num.as.Cat"], ds.NUMBER)
        ds.update()
        self.assertEqual(ds["Num.as.Cat"], sol["Num.as.Cat"])

        # Change test 2:
        ds.type["Money"] = ds.NUMBER
        self.assertEqual(ds.type["Money"], ds.NUMBER)
        ds.update()
        self.assertEqual(ds["Money"], sol["Money"])
Пример #8
0
    def test_1_cat_2_num(self):
        '''
        Tests:
            * Initial metadata
            * Automatic category to number transformation
                * metadata
                * most of the values are converted to number
                * values that cannot be converted become nan
        '''
        self.setUpData()
        ds = copper.Dataset('dataset/1/data.csv')
        sol = copper.read_csv('dataset/1/transform_filled.csv')

        self.assertEqual(ds.type['Number.1'], ds.NUMBER)
        self.assertEqual(ds.type['Number.2'], ds.NUMBER)
        self.assertEqual(ds.type['Cat.1'], ds.CATEGORY)
        self.assertEqual(ds.type['Cat.2'], ds.CATEGORY)
        self.assertEqual(ds.type['Num.as.Cat'], ds.CATEGORY)

        ds.type['Num.as.Cat'] = ds.NUMBER
        ds.update()
        # Test the metadata
        self.assertEqual(ds.type['Num.as.Cat'], ds.NUMBER)

        # Test the values
        self.assertEqual(ds['Num.as.Cat'], sol['Num.as.Cat'])
Пример #9
0
    def test_fillna(self):
        """
        Tests:
            * Fill na in type=Number
            * Fill na in type=Category
        """
        self.setUpData()
        ds = copper.Dataset("dataset/1/data.csv")
        sol = copper.read_csv("dataset/1/transform_filled.csv")

        # Number.1 does not have missing values
        prev = ds["Number.1"]
        ds.fillna(cols="Number.1", method="mean")
        self.assertEqual(ds["Number.1"], sol["Number.1"])

        # Number.2 does have missing values
        ds.fillna(cols="Number.2", method="mean")
        self.assertEqual(ds["Number.2"], sol["Number.2"])

        # Cat.1 does have missing values
        ds.fillna(cols="Cat.1", method="mode")
        self.assertEqual(ds["Cat.1"], sol["Cat.1"])

        # Cat.2 does NOT have missing values
        ds.fillna(cols="Cat.2", method="mode")
        self.assertEqual(ds["Cat.2"], sol["Cat.2"])
Пример #10
0
    def test_predict(self):
        ''' Tests the prediction and prediction probabilities
        Tests that using the defaul option is the same as using the given test
        Tests that is possible to predict other datasets
        '''
        self.setup()

        predict_train = copper.read_csv('ml/1/predict_train.csv').set_index('Model')
        predict_test = copper.read_csv('ml/1/predict_test.csv').set_index('Model')
        predict_proba_train = copper.read_csv('ml/1/predict_proba_train.csv').set_index('index')
        predict_proba_test = copper.read_csv('ml/1/predict_proba_test.csv').set_index('index')

        self.assertEqual(self.mc.predict(), predict_test)
        self.assertEqual(self.mc.predict(ds=self.test), predict_test)
        self.assertEqual(self.mc.predict(self.train), predict_train)

        self.assertEqual(self.mc.predict_proba(), predict_proba_test, 1)
        self.assertEqual(self.mc.predict_proba(self.test), predict_proba_test, 1)
        self.assertEqual(self.mc.predict_proba(ds=self.train), predict_proba_train, 1)
Пример #11
0
    def test_date2number(self):
        '''
        Requires:
            transforms.strptime
        '''
        self.setUpData()
        data = copper.read_csv('transforms/1/data.csv')
        sol = copper.read_csv('transforms/1/transformed.csv')

        # Default startdate

        dates1 = data['Date.1'].apply(copper.transform.strptime, args='%Y-%m-%d')
        dates2 = data['Date.2'].apply(copper.transform.strptime, args='%Y/%m/%d')
        dates3 = data['Date.3'].apply(copper.transform.strptime, args='%m/%d/%y')
        nums1 = dates1.apply(copper.transform.date_to_number)
        nums2 = dates2.apply(copper.transform.date_to_number)
        nums3 = dates3.apply(copper.transform.date_to_number)
        self.assertEqual(nums1.values, nums3.values)
        self.assertEqual(nums2.values, nums3.values)
        self.assertEqual(nums1.values, nums3.values)
        ans_1 = 13879
        self.assertEqual(nums1[0], ans_1)
        self.assertEqual(nums2[0], ans_1)
        self.assertEqual(nums3[0], ans_1)
        
        # Custom startdate
        from datetime import datetime
        copper.transform.start_date = datetime(2000, 1, 1)
        nums1_2 = dates1.apply(copper.transform.date_to_number)
        nums2_2 = dates2.apply(copper.transform.date_to_number)
        nums3_2 = dates3.apply(copper.transform.date_to_number)

        self.assertEqual(nums1_2.values, nums2_2.values)
        self.assertEqual(nums2_2.values, nums3_2.values)
        self.assertEqual(nums1_2.values, nums3_2.values)
        ans_1_2 = 2922
        self.assertEqual(nums1_2[0], ans_1_2)
        self.assertEqual(nums2_2[0], ans_1_2)
        self.assertEqual(nums3_2[0], ans_1_2)
        self.assertNotEqual(nums1[0], nums1_2[0])
        self.assertNotEqual(nums2[0], nums1_2[0])
        self.assertNotEqual(nums3[0], nums1_2[0])
Пример #12
0
    def test_create(self):
        """ 
        Different ways of creating a Dataset
        """
        self.setUpData()
        ds1 = copper.Dataset("dataset/1/data.csv")

        df = copper.read_csv("dataset/1/data.csv")
        ds2 = copper.Dataset(df)

        ds3 = copper.Dataset()
        ds3.load("dataset/1/data.csv")

        self.assertEqual(ds1, ds2)
        self.assertEqual(ds2, ds3)
Пример #13
0
    def test_1_filter(self):
        '''
        Tests: Change of role types combination on filter
        '''
        self.setUpData()
        ds = copper.Dataset('dataset/1/data.csv')
        df = copper.read_csv('dataset/1/data.csv')

        # 1. Initial frame
        self.assertEqual(ds.frame, df)

        # 2. No filters - Return everything
        self.assertEqual(ds.filter(), df)
        # 2.1 Reject a column but still no filters
        ds.role['Number.2'] = ds.REJECTED
        self.assertEqual(ds.filter(), df)

        # 3. Filter by inputs
        ds.role['Number.2'] = ds.REJECTED
        self.assertEqual(ds.filter(role=ds.INPUT), df[['Number.1', 'Cat.1', 'Cat.2', 'Num.as.Cat']])
        # 3.1 Put the column back
        ds.role['Number.2'] = ds.INPUT
        self.assertEqual(ds.filter(role=ds.INPUT), df)

        # 4. Filter by Target - Inputs changed
        ds.role['Cat.1'] = ds.TARGET
        self.assertEqual(ds.filter(role=ds.TARGET), df[['Cat.1']])
        self.assertEqual(ds.filter(role=ds.INPUT), df[['Number.1', 'Number.2', 'Cat.2', 'Num.as.Cat']])

        # 5. Filter by type
        self.assertEqual(ds.filter(type=ds.NUMBER), df[['Number.1', 'Number.2']])
        self.assertEqual(ds.filter(type=ds.CATEGORY), df[['Cat.1', 'Cat.2', 'Num.as.Cat']])

        # 6. Filter by role and type
        ds.role['Cat.1'] = ds.TARGET
        self.assertEqual(ds.filter(role=ds.INPUT, type=ds.NUMBER), df[['Number.1', 'Number.2']])
        self.assertEqual(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[['Cat.2', 'Num.as.Cat']])
        self.assertEqual(ds.filter(role=ds.TARGET, type=ds.CATEGORY), df[['Cat.1']])

        # Multiple roles
        self.assertEqual(ds.filter(role=[ds.INPUT, ds.TARGET]), df)

        # Multiple types
        self.assertEqual(ds.filter(type=[ds.NUMBER, ds.CATEGORY]), df)

        # Multiple roles and types
        self.assertEqual(ds.filter(role=[ds.INPUT, ds.TARGET], type=[ds.NUMBER, ds.CATEGORY]), df)
Пример #14
0
    def test_fillna_2(self):
        """
        Tests the fill of all columns at once

        1. One column is REJECTED and therefore is not filled
        """

        self.setUpData()
        ds = copper.Dataset("dataset/1/data.csv")
        sol = copper.read_csv("dataset/1/transform_filled.csv")

        ds.type["Num.as.Cat"] = ds.NUMBER
        ds.type["Money"] = ds.NUMBER
        ds.update()

        ds.fillna(method="mean")
        self.assertEqual(ds.frame, sol)
Пример #15
0
    def test_create(self):
        '''  Tests  the different ways of creating a Dataset
        '''
        frame = pd.DataFrame(np.random.rand(5,5), index=np.arange(5))
        frame.index.name = 'index'
        frame.to_csv('/tmp/temp.csv')

        ds1 = copper.Dataset('/tmp/temp.csv')
        df = copper.read_csv('/tmp/temp.csv')
        ds2 = copper.Dataset(df)
        ds3 = copper.Dataset()
        ds3.load('/tmp/temp.csv')
        ds4 = copper.Dataset()
        ds4.frame = df

        self.assertEqual(ds1, ds2)
        self.assertEqual(ds2, ds3)
        self.assertEqual(ds3, ds4)
Пример #16
0
    def test_1_fillna(self):
        '''
        Tests:
            * Fill na in type=Number
            * Fill na in type=Category
        '''
        self.setUpData()
        ds = copper.Dataset('dataset/1/data.csv')
        sol = copper.read_csv('dataset/1/transform_filled.csv')

        # Number.1 does not have missing values
        prev = ds['Number.1']
        ds.fillna(cols='Number.1', method='mean')
        self.assertEqual(ds['Number.1'], prev)

        # Number.2 does have missing values
        ds.fillna(cols='Number.2', method='mean')
        self.assertEqual(ds['Number.2'], sol['Number.2'])

        # Cat.1 does have missing values
        ds.fillna(cols='Cat.1', method='mode')
        self.assertEqual(ds['Cat.1'], sol['Cat.1'])
Пример #17
0
    def test_cm(self):
        '''
        Tests the values of the confusion matrixes
        '''
        self.setup()

        cms = self.ml._cm()
        self.assertEqual(cms['GNB'], np.array([[1196,  236], [ 406,  162]]))
        self.assertEqual(cms['DT'], np.array([[1365,   67], [ 506,   62]]))
        self.assertEqual(cms['SVM'], np.array([[1362,   70], [ 531,   37]]))
        self.assertEqual(cms['GB'], np.array([[1387,   45], [ 515,   53]]))

        self.assertEqual(self.ml.cm('GNB').values, np.array([[1196,  236], [ 406,  162]]))
        self.assertEqual(self.ml.cm('DT').values, np.array([[1365,   67], [ 506,   62]]))
        self.assertEqual(self.ml.cm('SVM').values, np.array([[1362,   70], [ 531,   37]]))
        self.assertEqual(self.ml.cm('GB').values, np.array([[1387,   45], [ 515,   53]]))

        sol = copper.read_csv('ml/1/cm.csv').set_index('Model')
        sol.index.name = None
        self.assertEqual(self.ml.cm_table(), sol)
        cm_0 = sol.ix[:, sol.columns[0:3]].sort(['Rate 0\'s'], ascending=False)
        cm_1 = sol.ix[:, sol.columns[3:]].sort(['Rate 1\'s'], ascending=False)
        self.assertEqual(self.ml.cm_table(0), cm_0)
        self.assertEqual(self.ml.cm_table(1), cm_1)
Пример #18
0
import copper
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime, date
copper.project.path = '../..'

games = copper.read_csv('games.csv').set_index('id')
BASE_URL = 'http://espn.go.com/nba/boxscore?gameId={0}'

request = requests.get(BASE_URL.format(games.index[0]))

table = BeautifulSoup(request.text).find('table', class_='mod-data')
heads = table.find_all('thead')
headers = heads[0].find_all('tr')[1].find_all('th')[1:]
headers = [th.text for th in headers]
columns = ['id', 'team', 'player'] + headers

players = pd.DataFrame(columns=columns)

def get_players(players, team_name):
    array = np.zeros((len(players), len(headers)+1), dtype=object)
    array[:] = np.nan
    for i, player in enumerate(players):
        cols = player.find_all('td')
        array[i, 0] = cols[0].text.split(',')[0]
        for j in range(1, len(headers) + 1):
            if not cols[1].text.startswith('DNP'):
                array[i, j] = cols[j].text
    
Пример #19
0
# get each NBA player stats from ESPN
# credit: http://danielfrg.com/blog/2013/04/01/nba-scraping-data/

import copper
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime, date

copper.project.path = "../.."

games = copper.read_csv("games.csv").set_index("id")
BASE_URL = "http://espn.go.com/nba/boxscore?gameId={0}"

request = requests.get(BASE_URL.format(games.index[0]))

table = BeautifulSoup(request.text).find("table", class_="mod-data")
heads = table.find_all("thead")
headers = heads[0].find_all("tr")[1].find_all("th")[1:]
headers = [th.text for th in headers]
columns = ["id", "team", "player"] + headers

players = pd.DataFrame(columns=columns)


def get_players(players, team_name):
    array = np.zeros((len(players), len(headers) + 1), dtype=object)
    array[:] = np.nan
    for i, player in enumerate(players):
        cols = player.find_all("td")
Пример #20
0
    ## Is this useful?
    def important_features(self, clf_name):
        clf = self._clfs[clf_name]
        importances = clf.feature_importances_
        indices = np.argsort(importances)[::-1]
        plt.title("Feature importances")
        plt.bar(range(len(importances)), importances[indices],
                                color="r", align="center")
        plt.xticks(range(len(importances)), indices)


if __name__ == '__main__':
    # ''' DONORS
    copper.config.path = '../tests'

    ds = copper.read_csv('donors/data.csv')
    ds.role['TARGET_D'] = ds.REJECTED
    ds.role['TARGET_B'] = ds.TARGET
    ds.type['ID'] = ds.CATEGORY

    ds.fillna('DemAge', 'mean')
    ds.fillna('GiftAvgCard36', 'mean')

    ml = copper.MachineLearning()
    ml.dataset = ds
    ml.sample(trainSize=0.5)

    from sklearn import tree
    tree_clf = tree.DecisionTreeClassifier(compute_importances=True, max_depth=10)
    # ml.add_clf(tree_clf, 'DT')
Пример #21
0
    def test_1_role_ml(self):
        '''
        Depends on: test_1_fillna and test_1_cat_2_num

        Tests:
            1. Initial roles are Input
            2. Modify roles and returned frames are correct
                * Inputs are correct for machine learning
            3. Target is correct
        '''
        self.setUpData()
        ds = copper.Dataset('dataset/1/data.csv')
        df = copper.read_csv('dataset/1/data.csv')
        ml_df = copper.read_csv('dataset/1/ml.csv')

        # 1. Initial role
        self.assertEqual(ds.role['Number.1'], ds.INPUT)
        self.assertEqual(ds.role['Number.2'], ds.INPUT)
        self.assertEqual(ds.role['Cat.1'], ds.INPUT)
        self.assertEqual(ds.role['Cat.2'], ds.INPUT)
        self.assertEqual(ds.role['Num.as.Cat'], ds.INPUT)
        self.assertEqual(ds.frame, df)
        # Correct data
        ds.type['Num.as.Cat'] = ds.NUMBER
        ds.update()
        ds.fillna(method='mean')
        # 2. Inputs values are correct
        self.assertEqual(ds.inputs, ml_df)

        # 2.1 Modify roles
        ds.role['Number.1'] = ds.REJECTED
        self.assertEqual(ds.role['Number.1'], ds.REJECTED)
        self.assertEqual(ds.inputs, ml_df[ml_df.columns[1:]])

        # 2.2 Modify roles
        ds.role['Number.2'] = ds.REJECTED
        self.assertEqual(ds.role['Number.2'], ds.REJECTED)
        self.assertEqual(ds.inputs, ml_df[ml_df.columns[2:]])

        # 2.3 Modify roles
        ds.role['Cat.1'] = ds.REJECTED
        self.assertEqual(ds.role['Cat.1'], ds.REJECTED)
        self.assertEqual(ds.inputs, ml_df[ml_df.columns[4:]])

        # 2.4 Modify roles
        ds.role['Cat.2'] = ds.REJECTED
        self.assertEqual(ds.role['Cat.2'], ds.REJECTED)
        self.assertEqual(ds.inputs, ml_df[ml_df.columns[6:]])
        self.assertEqual(ds.inputs, ml_df[['Num.as.Cat']])

        # 3. Target
        # Back to normal
        ds.role['Number.1'] = ds.INPUT
        ds.role['Number.2'] = ds.INPUT
        ds.role['Cat.1'] = ds.INPUT
        ds.role['Cat.2'] = ds.INPUT
        self.assertEqual(ds.inputs, ml_df)
        # Set Target
        ds.role['Num.as.Cat'] = ds.TARGET
        self.assertEqual(ds.inputs, ml_df[ml_df.columns[:-1]])
        target = ml_df[ml_df.columns[-1]]
        target.name = 'Target'
        self.assertEqual(ds.target, target)
Пример #22
0
import copper
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime, date
copper.project.path = '../../'

year = 2013
teams = copper.read_csv('teams.csv')
BASE_URL = 'http://espn.go.com/nba/team/schedule/_/name/{0}/year/{1}/{2}'
BASE_GAME_URL = 'http://espn.go.com/nba/boxscore?gameId={0}'

game_id = []
dates = []
home_team = []
home_team_score = []
visit_team = []
visit_team_score = []
for index, row in teams.iterrows():
# for index, row in teams[:1].iterrows():
    _team = row['name']
    print(_team)
    r = requests.get(BASE_URL.format(row['prefix_1'], year, row['prefix_2']))
    table = BeautifulSoup(r.text).table
    for row in table.find_all('tr')[1:]:
    # for row in table.find_all('tr')[1:3]:
        columns = row.find_all('td')
        try: 
            _id = columns[2].a['href'].split('?id=')[1]
            _home = True if columns[1].li.text == 'vs' else False
Пример #23
0
import copper
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime, date
copper.project.path = '../..'

games = copper.read_csv('games.csv').set_index('id')
BASE_URL = 'http://espn.go.com/nba/boxscore?gameId={0}'

request = requests.get(BASE_URL.format(games.index[0]))

table = BeautifulSoup(request.text).find('table', class_='mod-data')
heads = table.find_all('thead')
headers = heads[0].find_all('tr')[1].find_all('th')[1:]
headers = [th.text for th in headers]
columns = ['id', 'team', 'player'] + headers

players = pd.DataFrame(columns=columns)


def get_players(players, team_name):
    array = np.zeros((len(players), len(headers) + 1), dtype=object)
    array[:] = np.nan
    for i, player in enumerate(players):
        cols = player.find_all('td')
        array[i, 0] = cols[0].text.split(',')[0]
        for j in range(1, len(headers) + 1):
            if not cols[1].text.startswith('DNP'):
                array[i, j] = cols[j].text