示例#1
0
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=[
         "author_name", "author_id", "author_screen_name", "created_at",
         "hashtags", "text", "in_reply_to_status_id_str"
     ])
     a = a.typedetect()
     a.to_string()
示例#2
0
 def test_buzzdata(self):
     a = Babe().pull(
         protocol='buzzdata',
         dataroom='best-city-contest-worldwide-cost-of-living-index',
         uuid='aINAPyLGur4y37yAyCM7w3',
         username='******',
         format='xls')
     a = a.head(2)
     a.to_string()
示例#3
0
 def test_buzzdata(self):
     a = Babe().pull(
         protocol="buzzdata",
         dataroom="best-city-contest-worldwide-cost-of-living-index",
         uuid="aINAPyLGur4y37yAyCM7w3",
         username="******",
         format="xls",
     )
     a = a.head(2)
     a.to_string()
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=["author_name",
                                      "author_id",
                                      "author_screen_name",
                                      "created_at",
                                      "hashtags",
                                      "text",
                                      "in_reply_to_status_id_str"])
     a = a.typedetect()
     a.to_string()
 def test_user_agent(self):
     a = Babe().pull(string=self.s, format="csv")
     a = a.user_agent(field="useragent",
                      output_os="os",
                      output_browser="browser",
                      output_browser_version="browser_version")
     self.assertEquals(a.to_string(), self.s2)
 def test_bulk(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.typedetect()
     a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list),
                     bulk_size=2,
                     insert_fields=["b"])
     self.assertEquals(a.to_string(), self.s2)
示例#7
0
    def test_html(self):
        a = Babe().pull(string=self.s, format="csv")
        self.assertEqual(a.to_string(format="html"), """<h2></h2><table>
<tr><th>a</th><th>b</th></tr>
<tr><td>1</td><td>2</td></tr>
</table>
""")
示例#8
0
 def test_join_none(self):
     a = Babe().pull(string=self.s1, format='csv')
     a = a.join(join_stream=Babe().pull(string=self.s2_bis, format='csv'),
                key='country',
                join_key='country_code',
                on_error=Babe.ON_ERROR_NONE)
     self.assertEquals(a.to_string(), self.sjoined_bis)
示例#9
0
 def test_bulk(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.typedetect()
     a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list),
                     bulk_size=2,
                     insert_fields=["b"])
     self.assertEquals(a.to_string(), self.s2)
示例#10
0
 def test_user_agent(self):
     a = Babe().pull(string=self.s, format="csv")
     a = a.user_agent(field="useragent",
                      output_os="os",
                      output_browser="browser",
                      output_browser_version="browser_version")
     self.assertEquals(a.to_string(), self.s2)
示例#11
0
 def test_join_none(self):
     a = Babe().pull(string=self.s1, format='csv')
     a = a.join(join_stream=Babe().pull(string=self.s2_bis, format='csv'),
                key='country',
                join_key='country_code',
                on_error=Babe.ON_ERROR_NONE)
     self.assertEquals(a.to_string(), self.sjoined_bis)
示例#12
0
 def test_http(self):
     a = Babe().pull(protocol='http',
                     host='localhost',
                     name='Test',
                     filename='remote/files/test.csv',
                     port=self.port)
     self.assertEquals(a.to_string(), 'foo,bar,f,d\n1,2,3.2,2010/10/02\n3,4,1.2,2011/02/02\n')
示例#13
0
 def test_groupby(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n',
                     format="csv").typedetect()
     a = a.group(key="a",
                 reducer=lambda key, rows:
                 (key, sum([row.b for row in rows])))
     self.assertEquals(a.to_string(), "a,b\n1,6\n3,4\n")
示例#14
0
    def test_tuple(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda obj: obj._replace(foo=obj.foo + 1))
        s = """foo,bar,f,d
2,2,3.2,2010/10/02
4,4,1.2,2011/02/02
"""
        self.assertEquals(a.to_string(), s)
示例#15
0
    def test_insert(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda row: row.foo + 1, insert_fields=['fooplus'])
        s = """foo,bar,f,d,fooplus
1,2,3.2,2010/10/02,2
3,4,1.2,2011/02/02,4
"""
        self.assertEquals(a.to_string(), s)
示例#16
0
    def test_insert(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda row : row.foo+1, insert_fields=['fooplus'])
        s = """foo,bar,f,d,fooplus
1,2,3.2,2010/10/02,2
3,4,1.2,2011/02/02,4
"""
        self.assertEquals(a.to_string(), s)
示例#17
0
    def test_tuple(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda obj: obj._replace(foo=obj.foo + 1))
        s = """foo,bar,f,d
2,2,3.2,2010/10/02
4,4,1.2,2011/02/02
"""
        self.assertEquals(a.to_string(),  s)
 def test_parse(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.parse_time(field="time",
                      output_time="time",
                      output_date="date",
                      output_hour="hour",
                      input_timezone="CET",
                      output_timezone="GMT")
     self.assertEquals(a.to_string(), self.s2)
示例#19
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s, format='csv', name='Test')
     a.push(filename='test3.csv', bucket='florian-test', protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     self.assertEquals(b.to_string(), s)
示例#20
0
 def test_parse(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.parse_time(field="time",
                      output_time="time",
                      output_date="date",
                      output_hour="hour",
                      input_timezone="CET",
                      output_timezone="GMT")
     self.assertEquals(a.to_string(), self.s2)
示例#21
0
 def test_pushsqlite(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='sqlite',
                database='tests/files/test.sqlite',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='sqlite',
                         database='tests/files/test.sqlite',
                         table='test_table')
     self.assertEquals(b.to_string(), self.s)
    def test_pull_bigquery(self):
        dataset_id = 'ladata'
        day = '20151010'
        table_name = 'crazy_{}'.format(day)
        query = """
SELECT
    uid,
    count(1)
FROM
    [{}.{}]
WHERE
    name='pgr'
GROUP BY 1
ORDER BY 2 DESC;""".format(dataset_id, table_name)

        a = Babe().pull_bigquery(project_id='bigquery-testing-1098',
                                 query=query,
                                 timeout=1000,
                                 num_retries=2)

        print a.to_string()
示例#23
0
 def test_vectorwise(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='vectorwise',
                database='pybabe_test',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='vectorwise',
                         database='pybabe_test',
                         table='test_table')
     self.assertEquals(b.to_string(), self.s)
示例#24
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s,
                     format='csv',
                     name='Test')
     a.push(filename='test3.csv',
            bucket='florian-test',
            protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     self.assertEquals(b.to_string(), s)
示例#25
0
    def test_pushsqlite_partition(self):
        a = Babe().pull(string=self.s, format='csv')
        a = a.typedetect()
        a.push_sql(table='test_table', database_kind='sqlite', database='test.sqlite', drop_table = True, create_table=True)

        a = Babe().pull(string=self.s2, format='csv')
        a = a.typedetect()
        a = a.partition(field='id')
        a.push_sql(table='test_table', database_kind='sqlite', database='test.sqlite', delete_partition=True)

        b = Babe().pull_sql(database_kind='sqlite', database='test.sqlite', table='test_table')
        b = b.sort(field="id")
        self.assertEquals(b.to_string(), self.sr)
示例#26
0
 def test_vectorwise(self):
     a = Babe().pull(string=self.s,
                     format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='vectorwise',
                database='pybabe_test',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='vectorwise',
                         database='pybabe_test',
                         table='test_table')
     self.assertEquals(b.to_string(), self.s)
示例#27
0
    def test_pushsqlite_partition(self):
        a = Babe().pull(string=self.s, format='csv')
        a = a.typedetect()
        a.push_sql(table='test_table',
                   database_kind='sqlite',
                   database='test.sqlite',
                   drop_table=True,
                   create_table=True)

        a = Babe().pull(string=self.s2, format='csv')
        a = a.typedetect()
        a = a.partition(field='id')
        a.push_sql(table='test_table',
                   database_kind='sqlite',
                   database='test.sqlite',
                   delete_partition=True)

        b = Babe().pull_sql(database_kind='sqlite',
                            database='test.sqlite',
                            table='test_table')
        b = b.sort(field="id")
        self.assertEquals(b.to_string(), self.sr)
示例#28
0
 def test_groupby(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n', format="csv").typedetect()
     a = a.group(key="a", reducer=lambda key, rows: (key, sum([row.b for row in rows])))
     self.assertEquals(a.to_string(), "a,b\n1,6\n3,4\n")
示例#29
0
 def test_sqldump(self):
     a = Babe().pull(string=self.s, format='sql', table='foobar', fields=['id', 'number', 'title', 'datetime'])
     self.assertEquals(a.to_string(), self.s2)
示例#30
0
 def test_html(self):
     a = Babe().pull(string=self.s, format="csv")
     print a.to_string(format="html")
示例#31
0
 def test_replace(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.replace_in_string("cou", "bar", field="b")
     self.assertEquals(a.to_string(), self.sr)
示例#32
0
 def test_http(self):
     a = Babe().pull(protocol="http", host="localhost", name="Test", filename="remote/test.csv", port=self.port)
     self.assertEquals(a.to_string(), "foo,bar,f,d\n1,2,3.2,2010/10/02\n3,4,1.2,2011/02/02\n")
示例#33
0
 def test_filter2(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filterColumns(remove_fields=['a'])
     self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
示例#34
0
 def test_replace(self):
     a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
     a = a.mapTo(lambda row: [row.foo + 1, row.bar * 2], fields=['a', 'b'])
     s = """a,b\n2,4\n4,8\n"""
     self.assertEquals(a.to_string(), s)
示例#35
0
 def test_dedup2(self):
     a = Babe().pull(stream=StringIO(self.s2), format="csv")
     a = a.dedup()
     self.assertEquals(a.to_string(), self.s3)
示例#36
0
 def test_windowMap(self):
     a = Babe().pull(stream=StringIO('a\n1\n2\n3\n4\n5\n6\n7\n'),
                     format="csv").typedetect()
     a = a.windowMap(
         3, lambda rows: rows[-1]._make([sum([row.a for row in rows])]))
     self.assertEquals(a.to_string(), 'a\n1\n3\n6\n9\n12\n15\n18\n')
示例#37
0
 def test_min(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.minN(column='a', n=2)
     self.assertEquals(a.to_string(), 'a,b\n1,2\n1,4\n')
示例#38
0
 def test_filter_values(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter_values(a=3, b=4)
     self.assertEquals(a.to_string(), "a,b\n3,4\n")
示例#39
0
 def test_http(self):
     a = Babe().pull(protocol='http', host='localhost', name='Test', filename='remote/test.csv', port=self.port)
     self.assertEquals(a.to_string(), 'foo,bar,f,d\n1,2,3.2,2010/10/02\n3,4,1.2,2011/02/02\n')
示例#40
0
 def test_dedup4(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.dedup(fields=['value'])
     self.assertEquals(a.to_string(), self.s4)
示例#41
0
 def test_groupAll(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n',
                     format="csv").typedetect()
     a = a.group_all(reducer=lambda rows: (max([row.b for row in rows]), ),
                     fields=['max'])
     self.assertEquals(a.to_string(), "max\n4\n")
示例#42
0
 def test_pushpull(self):
     a = Babe().pull(string=self.s2, format="csv", primary_key="rown")
     a = a.typedetect()
     a.push_mongo(db="pybabe_test", collection="test_pushpull", drop_collection=True)
     b = Babe().pull_mongo(db="pybabe_test", fields=["rown", "f", "s"], collection="test_pushpull")
     self.assertEquals(b.to_string(), self.s2)
示例#43
0
 def test_tuple(self):
     a = Babe().pull(stream=StringIO("a,b\n1,2:3\n4,5:6\n"), format="csv")
     a = a.flatMap(
         lambda row: [row._replace(b=i) for i in row.b.split(':')])
     self.assertEquals(a.to_string(), "a,b\n1,2\n1,3\n4,5\n4,6\n")
示例#44
0
 def test_filter(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
     a = a.filter(function=lambda x : x.a == 3)
     self.assertEquals(a.to_string(), 'a,b\n3,4\n')
示例#45
0
 def test_filter(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter(function=lambda x: x.a == 3)
     self.assertEquals(a.to_string(), 'a,b\n3,4\n')
 def test_load(self):
     start_time = "2012-04-23 11:00"
     end_time = "2012-04-23 12:00"
     a = Babe().pull_kontagent(start_time, end_time, sample_mode=True)
     a = a.head(n=10)
     print a.to_string()
示例#47
0
 def test_zip(self):
     a = Babe().pull(string=self.s, format="csv")
     a.push(filename='tests/files/test.zip')
     b = Babe().pull(filename='tests/files/test.zip')
     self.assertEquals(b.to_string(), self.s)
示例#48
0
 def test_primarykey3(self):
     a = Babe().pull(stream=StringIO(self.s3), format='csv')
     a = a.primary_key_detect()
     self.assertEquals(a.to_string(), self.s3)
示例#49
0
 def test_country_code(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.geoip_country_code()
     self.assertEquals(a.to_string(), self.s2)
示例#50
0
 def test_transpose(self):
     a = Babe().pull(string=self.s, format='csv',
                     primary_key='city').transpose()
     self.assertEquals(a.to_string(), self.s2)
示例#51
0
 def test_transpose(self):
     a = Babe().pull(string=self.s, format='csv', primary_key='city').transpose()
     self.assertEquals(a.to_string(), self.s2)
示例#52
0
 def test_airport(self):
     a = Babe().pull(filename='data/airports.csv')
     a = a.primary_key_detect()
     a = a.head(n=10)
     a.to_string()
示例#53
0
 def test_replace(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.replace_in_string("cou", "bar", field="b")
     self.assertEquals(a.to_string(), self.sr)
示例#54
0
 def test_filter2(self):
      a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
      a = a.filterColumns(remove_fields=['a'])
      self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
示例#55
0
 def test_groupAll(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n', format="csv").typedetect()
     a = a.group_all(reducer=lambda rows: (max([row.b for row in rows]),), fields=['max'])
     self.assertEquals(a.to_string(), "max\n4\n")
示例#56
0
 def test_tuple(self):
     a = Babe().pull(stream=StringIO("a,b\n1,2:3\n4,5:6\n"), format="csv")
     a = a.flatMap(lambda row: [row._replace(b=i) for i in row.b.split(':')])
     self.assertEquals(a.to_string(), "a,b\n1,2\n1,3\n4,5\n4,6\n")
示例#57
0
 def test_join(self):
     a = Babe().pull(string=self.s1, format='csv')
     a = a.join(join_stream=Babe().pull(string=self.s2, format='csv'),
                key='country',
                join_key='country_code')
     self.assertEquals(a.to_string(), self.sjoined)
示例#58
0
 def test_replace(self):
     a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
     a = a.mapTo(lambda row : [row.foo+1, row.bar*2], fields=['a','b'])
     s = """a,b\n2,4\n4,8\n"""
     self.assertEquals(a.to_string(), s)
示例#59
0
 def test_rename(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.rename(a="c")
     self.assertEquals(a.to_string(), 'c,b\n1,2\n3,4\n1,4\n')
示例#60
0
 def test_filter_values(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
     a = a.filter_values(a=3,b=4)
     self.assertEquals(a.to_string(), "a,b\n3,4\n")