Пример #1
0
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=[
         "author_name", "author_id", "author_screen_name", "created_at",
         "hashtags", "text", "in_reply_to_status_id_str"
     ])
     a = a.typedetect()
     a.to_string()
Пример #2
0
 def test_buzzdata(self):
     a = Babe().pull(
         protocol='buzzdata',
         dataroom='best-city-contest-worldwide-cost-of-living-index',
         uuid='aINAPyLGur4y37yAyCM7w3',
         username='******',
         format='xls')
     a = a.head(2)
     a.to_string()
Пример #3
0
 def test_buzzdata(self):
     a = Babe().pull(
         protocol="buzzdata",
         dataroom="best-city-contest-worldwide-cost-of-living-index",
         uuid="aINAPyLGur4y37yAyCM7w3",
         username="******",
         format="xls",
     )
     a = a.head(2)
     a.to_string()
Пример #4
0
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=["author_name",
                                      "author_id",
                                      "author_screen_name",
                                      "created_at",
                                      "hashtags",
                                      "text",
                                      "in_reply_to_status_id_str"])
     a = a.typedetect()
     a.to_string()
Пример #5
0
 def test_user_agent(self):
     a = Babe().pull(string=self.s, format="csv")
     a = a.user_agent(field="useragent",
                      output_os="os",
                      output_browser="browser",
                      output_browser_version="browser_version")
     self.assertEquals(a.to_string(), self.s2)
Пример #6
0
 def test_bulk(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.typedetect()
     a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list),
                     bulk_size=2,
                     insert_fields=["b"])
     self.assertEquals(a.to_string(), self.s2)
Пример #7
0
    def test_html(self):
        a = Babe().pull(string=self.s, format="csv")
        self.assertEqual(a.to_string(format="html"), """<h2></h2><table>
<tr><th>a</th><th>b</th></tr>
<tr><td>1</td><td>2</td></tr>
</table>
""")
Пример #8
0
 def test_join_none(self):
     a = Babe().pull(string=self.s1, format='csv')
     a = a.join(join_stream=Babe().pull(string=self.s2_bis, format='csv'),
                key='country',
                join_key='country_code',
                on_error=Babe.ON_ERROR_NONE)
     self.assertEquals(a.to_string(), self.sjoined_bis)
Пример #9
0
 def test_bulk(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.typedetect()
     a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list),
                     bulk_size=2,
                     insert_fields=["b"])
     self.assertEquals(a.to_string(), self.s2)
Пример #10
0
 def test_user_agent(self):
     a = Babe().pull(string=self.s, format="csv")
     a = a.user_agent(field="useragent",
                      output_os="os",
                      output_browser="browser",
                      output_browser_version="browser_version")
     self.assertEquals(a.to_string(), self.s2)
Пример #11
0
 def test_join_none(self):
     a = Babe().pull(string=self.s1, format='csv')
     a = a.join(join_stream=Babe().pull(string=self.s2_bis, format='csv'),
                key='country',
                join_key='country_code',
                on_error=Babe.ON_ERROR_NONE)
     self.assertEquals(a.to_string(), self.sjoined_bis)
Пример #12
0
 def test_http(self):
     a = Babe().pull(protocol='http',
                     host='localhost',
                     name='Test',
                     filename='remote/files/test.csv',
                     port=self.port)
     self.assertEquals(a.to_string(), 'foo,bar,f,d\n1,2,3.2,2010/10/02\n3,4,1.2,2011/02/02\n')
Пример #13
0
 def test_groupby(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n',
                     format="csv").typedetect()
     a = a.group(key="a",
                 reducer=lambda key, rows:
                 (key, sum([row.b for row in rows])))
     self.assertEquals(a.to_string(), "a,b\n1,6\n3,4\n")
Пример #14
0
    def test_tuple(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda obj: obj._replace(foo=obj.foo + 1))
        s = """foo,bar,f,d
2,2,3.2,2010/10/02
4,4,1.2,2011/02/02
"""
        self.assertEquals(a.to_string(), s)
Пример #15
0
    def test_insert(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda row: row.foo + 1, insert_fields=['fooplus'])
        s = """foo,bar,f,d,fooplus
1,2,3.2,2010/10/02,2
3,4,1.2,2011/02/02,4
"""
        self.assertEquals(a.to_string(), s)
Пример #16
0
    def test_insert(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda row : row.foo+1, insert_fields=['fooplus'])
        s = """foo,bar,f,d,fooplus
1,2,3.2,2010/10/02,2
3,4,1.2,2011/02/02,4
"""
        self.assertEquals(a.to_string(), s)
Пример #17
0
    def test_tuple(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda obj: obj._replace(foo=obj.foo + 1))
        s = """foo,bar,f,d
2,2,3.2,2010/10/02
4,4,1.2,2011/02/02
"""
        self.assertEquals(a.to_string(),  s)
Пример #18
0
 def test_parse(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.parse_time(field="time",
                      output_time="time",
                      output_date="date",
                      output_hour="hour",
                      input_timezone="CET",
                      output_timezone="GMT")
     self.assertEquals(a.to_string(), self.s2)
Пример #19
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s, format='csv', name='Test')
     a.push(filename='test3.csv', bucket='florian-test', protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     self.assertEquals(b.to_string(), s)
Пример #20
0
 def test_parse(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.parse_time(field="time",
                      output_time="time",
                      output_date="date",
                      output_hour="hour",
                      input_timezone="CET",
                      output_timezone="GMT")
     self.assertEquals(a.to_string(), self.s2)
Пример #21
0
 def test_pushsqlite(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='sqlite',
                database='tests/files/test.sqlite',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='sqlite',
                         database='tests/files/test.sqlite',
                         table='test_table')
     self.assertEquals(b.to_string(), self.s)
    def test_pull_bigquery(self):
        dataset_id = 'ladata'
        day = '20151010'
        table_name = 'crazy_{}'.format(day)
        query = """
SELECT
    uid,
    count(1)
FROM
    [{}.{}]
WHERE
    name='pgr'
GROUP BY 1
ORDER BY 2 DESC;""".format(dataset_id, table_name)

        a = Babe().pull_bigquery(project_id='bigquery-testing-1098',
                                 query=query,
                                 timeout=1000,
                                 num_retries=2)

        print a.to_string()
Пример #23
0
 def test_vectorwise(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='vectorwise',
                database='pybabe_test',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='vectorwise',
                         database='pybabe_test',
                         table='test_table')
     self.assertEquals(b.to_string(), self.s)
Пример #24
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s,
                     format='csv',
                     name='Test')
     a.push(filename='test3.csv',
            bucket='florian-test',
            protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     self.assertEquals(b.to_string(), s)
Пример #25
0
    def test_pushsqlite_partition(self):
        a = Babe().pull(string=self.s, format='csv')
        a = a.typedetect()
        a.push_sql(table='test_table', database_kind='sqlite', database='test.sqlite', drop_table = True, create_table=True)

        a = Babe().pull(string=self.s2, format='csv')
        a = a.typedetect()
        a = a.partition(field='id')
        a.push_sql(table='test_table', database_kind='sqlite', database='test.sqlite', delete_partition=True)

        b = Babe().pull_sql(database_kind='sqlite', database='test.sqlite', table='test_table')
        b = b.sort(field="id")
        self.assertEquals(b.to_string(), self.sr)
Пример #26
0
 def test_vectorwise(self):
     a = Babe().pull(string=self.s,
                     format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='vectorwise',
                database='pybabe_test',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='vectorwise',
                         database='pybabe_test',
                         table='test_table')
     self.assertEquals(b.to_string(), self.s)
Пример #27
0
    def test_pushsqlite_partition(self):
        a = Babe().pull(string=self.s, format='csv')
        a = a.typedetect()
        a.push_sql(table='test_table',
                   database_kind='sqlite',
                   database='test.sqlite',
                   drop_table=True,
                   create_table=True)

        a = Babe().pull(string=self.s2, format='csv')
        a = a.typedetect()
        a = a.partition(field='id')
        a.push_sql(table='test_table',
                   database_kind='sqlite',
                   database='test.sqlite',
                   delete_partition=True)

        b = Babe().pull_sql(database_kind='sqlite',
                            database='test.sqlite',
                            table='test_table')
        b = b.sort(field="id")
        self.assertEquals(b.to_string(), self.sr)
Пример #28
0
 def test_groupby(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n', format="csv").typedetect()
     a = a.group(key="a", reducer=lambda key, rows: (key, sum([row.b for row in rows])))
     self.assertEquals(a.to_string(), "a,b\n1,6\n3,4\n")
Пример #29
0
 def test_sqldump(self):
     a = Babe().pull(string=self.s, format='sql', table='foobar', fields=['id', 'number', 'title', 'datetime'])
     self.assertEquals(a.to_string(), self.s2)
Пример #30
0
 def test_html(self):
     a = Babe().pull(string=self.s, format="csv")
     print a.to_string(format="html")
Пример #31
0
 def test_replace(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.replace_in_string("cou", "bar", field="b")
     self.assertEquals(a.to_string(), self.sr)
Пример #32
0
 def test_http(self):
     a = Babe().pull(protocol="http", host="localhost", name="Test", filename="remote/test.csv", port=self.port)
     self.assertEquals(a.to_string(), "foo,bar,f,d\n1,2,3.2,2010/10/02\n3,4,1.2,2011/02/02\n")
Пример #33
0
 def test_filter2(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filterColumns(remove_fields=['a'])
     self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
Пример #34
0
 def test_replace(self):
     a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
     a = a.mapTo(lambda row: [row.foo + 1, row.bar * 2], fields=['a', 'b'])
     s = """a,b\n2,4\n4,8\n"""
     self.assertEquals(a.to_string(), s)
Пример #35
0
 def test_dedup2(self):
     a = Babe().pull(stream=StringIO(self.s2), format="csv")
     a = a.dedup()
     self.assertEquals(a.to_string(), self.s3)
Пример #36
0
 def test_windowMap(self):
     a = Babe().pull(stream=StringIO('a\n1\n2\n3\n4\n5\n6\n7\n'),
                     format="csv").typedetect()
     a = a.windowMap(
         3, lambda rows: rows[-1]._make([sum([row.a for row in rows])]))
     self.assertEquals(a.to_string(), 'a\n1\n3\n6\n9\n12\n15\n18\n')
Пример #37
0
 def test_min(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.minN(column='a', n=2)
     self.assertEquals(a.to_string(), 'a,b\n1,2\n1,4\n')
Пример #38
0
 def test_filter_values(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter_values(a=3, b=4)
     self.assertEquals(a.to_string(), "a,b\n3,4\n")
Пример #39
0
 def test_http(self):
     a = Babe().pull(protocol='http', host='localhost', name='Test', filename='remote/test.csv', port=self.port)
     self.assertEquals(a.to_string(), 'foo,bar,f,d\n1,2,3.2,2010/10/02\n3,4,1.2,2011/02/02\n')
Пример #40
0
 def test_dedup4(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.dedup(fields=['value'])
     self.assertEquals(a.to_string(), self.s4)
Пример #41
0
 def test_groupAll(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n',
                     format="csv").typedetect()
     a = a.group_all(reducer=lambda rows: (max([row.b for row in rows]), ),
                     fields=['max'])
     self.assertEquals(a.to_string(), "max\n4\n")
Пример #42
0
 def test_pushpull(self):
     a = Babe().pull(string=self.s2, format="csv", primary_key="rown")
     a = a.typedetect()
     a.push_mongo(db="pybabe_test", collection="test_pushpull", drop_collection=True)
     b = Babe().pull_mongo(db="pybabe_test", fields=["rown", "f", "s"], collection="test_pushpull")
     self.assertEquals(b.to_string(), self.s2)
Пример #43
0
 def test_tuple(self):
     a = Babe().pull(stream=StringIO("a,b\n1,2:3\n4,5:6\n"), format="csv")
     a = a.flatMap(
         lambda row: [row._replace(b=i) for i in row.b.split(':')])
     self.assertEquals(a.to_string(), "a,b\n1,2\n1,3\n4,5\n4,6\n")
Пример #44
0
 def test_filter(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
     a = a.filter(function=lambda x : x.a == 3)
     self.assertEquals(a.to_string(), 'a,b\n3,4\n')
Пример #45
0
 def test_filter(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter(function=lambda x: x.a == 3)
     self.assertEquals(a.to_string(), 'a,b\n3,4\n')
Пример #46
0
 def test_load(self):
     start_time = "2012-04-23 11:00"
     end_time = "2012-04-23 12:00"
     a = Babe().pull_kontagent(start_time, end_time, sample_mode=True)
     a = a.head(n=10)
     print a.to_string()
Пример #47
0
 def test_zip(self):
     a = Babe().pull(string=self.s, format="csv")
     a.push(filename='tests/files/test.zip')
     b = Babe().pull(filename='tests/files/test.zip')
     self.assertEquals(b.to_string(), self.s)
Пример #48
0
 def test_primarykey3(self):
     a = Babe().pull(stream=StringIO(self.s3), format='csv')
     a = a.primary_key_detect()
     self.assertEquals(a.to_string(), self.s3)
Пример #49
0
 def test_country_code(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.geoip_country_code()
     self.assertEquals(a.to_string(), self.s2)
Пример #50
0
 def test_transpose(self):
     a = Babe().pull(string=self.s, format='csv',
                     primary_key='city').transpose()
     self.assertEquals(a.to_string(), self.s2)
Пример #51
0
 def test_transpose(self):
     a = Babe().pull(string=self.s, format='csv', primary_key='city').transpose()
     self.assertEquals(a.to_string(), self.s2)
Пример #52
0
 def test_airport(self):
     a = Babe().pull(filename='data/airports.csv')
     a = a.primary_key_detect()
     a = a.head(n=10)
     a.to_string()
Пример #53
0
 def test_replace(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.replace_in_string("cou", "bar", field="b")
     self.assertEquals(a.to_string(), self.sr)
Пример #54
0
 def test_filter2(self):
      a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
      a = a.filterColumns(remove_fields=['a'])
      self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
Пример #55
0
 def test_groupAll(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n', format="csv").typedetect()
     a = a.group_all(reducer=lambda rows: (max([row.b for row in rows]),), fields=['max'])
     self.assertEquals(a.to_string(), "max\n4\n")
Пример #56
0
 def test_tuple(self):
     a = Babe().pull(stream=StringIO("a,b\n1,2:3\n4,5:6\n"), format="csv")
     a = a.flatMap(lambda row: [row._replace(b=i) for i in row.b.split(':')])
     self.assertEquals(a.to_string(), "a,b\n1,2\n1,3\n4,5\n4,6\n")
Пример #57
0
 def test_join(self):
     a = Babe().pull(string=self.s1, format='csv')
     a = a.join(join_stream=Babe().pull(string=self.s2, format='csv'),
                key='country',
                join_key='country_code')
     self.assertEquals(a.to_string(), self.sjoined)
Пример #58
0
 def test_replace(self):
     a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
     a = a.mapTo(lambda row : [row.foo+1, row.bar*2], fields=['a','b'])
     s = """a,b\n2,4\n4,8\n"""
     self.assertEquals(a.to_string(), s)
Пример #59
0
 def test_rename(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.rename(a="c")
     self.assertEquals(a.to_string(), 'c,b\n1,2\n3,4\n1,4\n')
Пример #60
0
 def test_filter_values(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
     a = a.filter_values(a=3,b=4)
     self.assertEquals(a.to_string(), "a,b\n3,4\n")