def clean_frame(df):

    # remove negative nums
    df = clean.map_remove(df, 'amount_raised', clean.neg_num)
    df = clean.map_remove(df, 'amount_goal', clean.neg_num)
    df = clean.map_remove(df, 'category_id', clean.neg_num)
    df = clean.map_remove(df, 'platform_id', clean.neg_num)
    df = clean.map_remove(df, 'amount_raised', clean.neg_num)
    df = clean.map_remove(df, 'amount_goal', clean.neg_num)

    # remove outliers
    df = outlier.remove_outliers(df, 5, 'amount_goal')
    df = outlier.remove_outliers(df, 5, 'amount_raised')

    # add amount needed column to df
    df['amount_needed'] = df.amount_goal - df.amount_raised
    # add completed column
    df.completed = [df.amount_needed == 0]

    # tokenize keywords using NLTK library
    #df.description = keyw.tokenizeElements(df, 'description')

    return df
def clean_frame(df):

    # remove negative nums
    df = clean.map_remove(df, 'amount_raised', clean.neg_num)
    df = clean.map_remove(df, 'amount_goal', clean.neg_num)
    df = clean.map_remove(df, 'category_id', clean.neg_num)
    df = clean.map_remove(df, 'platform_id', clean.neg_num)
    df = clean.map_remove(df, 'amount_raised', clean.neg_num)
    df = clean.map_remove(df, 'amount_goal', clean.neg_num)

    # remove outliers
    df = outlier.remove_outliers(df, 5, 'amount_goal')
    df = outlier.remove_outliers(df, 5, 'amount_raised')

    # add amount needed column to df
    df['amount_needed'] = df.amount_goal - df.amount_raised
    # add completed column
    df.completed = [df.amount_needed == 0]


    # tokenize keywords using NLTK library
    #df.description = keyw.tokenizeElements(df, 'description')

    return df
 def test_returns_dataframe_arg1(self):
     t = (m.map_remove(df2, 'goal', m.neg_num))
     self.assertEquals(t.__class__.__name__, 'DataFrame')
 def test_raise_IndexError_for_out_of_range(self):
     self.assertRaises(IndexError, (m.map_remove(df, 90, m.neg_num)))
 def test_raise_TypeError_for_non__arg2(self):
     self.assertRaises(TypeError, (m.map_remove(df, [2, 2], m.neg_num)))
 def test_raise_TypeError_for_non_dataframe_arg1(self):
     self.assertRaises(TypeError, (m.map_remove('qwerty', 3, m.neg_num)))
 def test_should_remove_rows_where_function_true_for_col(self):
     self.assertEquals(len(m.map_remove(df, 0, m.neg_num)), 3)
     self.assertEquals(len(m.map_remove(df, 6, m.neg_num)), 4)
     self.assertEquals(len(m.map_remove(df, 2, m.neg_num)), 3)
示例#8
0
 def test_returns_dataframe_arg1(self):
     t = (m.map_remove(df2, 'goal', m.neg_num))
     self.assertEquals(t.__class__.__name__, 'DataFrame')
示例#9
0
 def test_raise_IndexError_for_out_of_range(self):
     self.assertRaises(IndexError, (m.map_remove(df, 90, m.neg_num)))
示例#10
0
 def test_raise_TypeError_for_non__arg2(self):
     self.assertRaises(TypeError, (m.map_remove(df, [2, 2], m.neg_num)))
示例#11
0
 def test_raise_TypeError_for_non_dataframe_arg1(self):
     self.assertRaises(TypeError, (m.map_remove('qwerty', 3, m.neg_num)))
示例#12
0
 def test_should_remove_rows_where_function_true_for_col(self):
     self.assertEquals(len(m.map_remove(df, 0, m.neg_num)), 3)
     self.assertEquals(len(m.map_remove(df, 6, m.neg_num)), 4)
     self.assertEquals(len(m.map_remove(df, 2, m.neg_num)), 3)