示例#1
0
 def test_gz(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv', name='Test')
     a.push(filename='test.csv.gz')
     b = Babe().pull(filename='test.csv.gz')
     buf = StringIO()
     b.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s)
示例#2
0
 def test_join_none(self):
     a = Babe().pull(string=self.s1, format='csv')
     a = a.join(join_stream=Babe().pull(string=self.s2_bis, format='csv'),
                key='country',
                join_key='country_code',
                on_error=Babe.ON_ERROR_NONE)
     self.assertEquals(a.to_string(), self.sjoined_bis)
示例#3
0
 def test_zip(self):
     babe = Babe()
     a = babe.pull(stream=StringIO(self.s), format="csv")
     a.push(filename='tests/test.zip')
     b = Babe().pull(filename='tests/test.zip')
     buf = StringIO()
     b.push(stream=buf)
     self.assertEquals(buf.getvalue(), self.s)
示例#4
0
 def test_join(self):
     a = Babe().pull(stream=StringIO(self.s1), format='csv')
     a = a.join(join_stream=Babe().pull(stream=StringIO(self.s2),
                                        format='csv'),
                key='country',
                join_key='country_code')
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.sjoined)
示例#5
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s, format='csv', name='Test')
     a.push(filename='test3.csv', bucket='florian-test', protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     self.assertEquals(b.to_string(), s)
示例#6
0
 def test_join_none(self):
     a = Babe().pull(stream=StringIO(self.s1), format='csv')
     a = a.join(join_stream=Babe().pull(stream=StringIO(self.s2_bis),
                                        format='csv'),
                key='country',
                join_key='country_code',
                on_error=Babe.ON_ERROR_NONE)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.sjoined_bis)
示例#7
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     buf1 = StringIO(s)
     a = Babe().pull(stream=buf1, format='csv', name='Test')
     a.push(filename='test3.csv', bucket='florian-test', protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     buf = StringIO()
     b.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), s)
示例#8
0
 def test_vectorwise(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='vectorwise',
                database='pybabe_test',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='vectorwise',
                         database='pybabe_test',
                         table='test_table')
     self.assertEquals(b.to_string(), self.s)
示例#9
0
 def test_pushpull(self):
     a = Babe().pull(stream=StringIO(self.s2),
                     format='csv',
                     primary_key='rown')
     a = a.typedetect()
     a.push_mongo(db='pybabe_test',
                  collection='test_pushpull',
                  drop_collection=True)
     b = Babe().pull_mongo(db="pybabe_test",
                           fields=['rown', 'f', 's'],
                           collection='test_pushpull')
     buf = StringIO()
     b.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
示例#10
0
 def test_vectorwise(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table',
                database_kind='vectorwise',
                database='pybabe_test',
                drop_table=True,
                create_table=True)
     b = Babe().pull_sql(database_kind='vectorwise',
                         database='pybabe_test',
                         table='test_table')
     buf = StringIO()
     b.push(stream=buf, format='csv', delimiter=',')
     self.assertEquals(buf.getvalue(), self.s)
示例#11
0
 def test_tuple(self):
     a = Babe().pull(stream=StringIO("a,b\n1,2:3\n4,5:6\n"), format="csv")
     a = a.flatMap(
         lambda row: [row._replace(b=i) for i in row.b.split(':')])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "a,b\n1,2\n1,3\n4,5\n4,6\n")
示例#12
0
 def test_rename(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.rename(a="c")
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'c,b\n1,2\n3,4\n1,4\n')
示例#13
0
 def test_min(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.minN(column='a', n=2)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'a,b\n1,2\n1,4\n')
示例#14
0
 def test_filter_values(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter_values(a=3, b=4)
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "a,b\n3,4\n")
示例#15
0
 def test_partition_s3(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.partition(field='date')
     a.push(protocol="s3",
            bucket="florian-test",
            format="csv",
            filename_template='foobar/$date.csv.gz')
示例#16
0
 def test_transpose(self):
     a = Babe().pull(stream=StringIO(self.s),
                     format='csv',
                     primary_key='city').transpose()
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
示例#17
0
 def test_multi(self):
     a = Babe()
     a = a.pull(stream=StringIO(self.s),
                format='csv').pull(stream=StringIO(self.s), format='csv')
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s + self.s)
示例#18
0
 def test_user_agent(self):
     a = Babe().pull(string=self.s, format="csv")
     a = a.user_agent(field="useragent",
                      output_os="os",
                      output_browser="browser",
                      output_browser_version="browser_version")
     self.assertEquals(a.to_string(), self.s2)
示例#19
0
 def test_filter2(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filterColumns(remove_fields=['a'])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "b\n2\n4\n4\n")
示例#20
0
 def test_groupby(self):
     a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n',
                     format="csv").typedetect()
     a = a.group(key="a",
                 reducer=lambda key, rows:
                 (key, sum([row.b for row in rows])))
     self.assertEquals(a.to_string(), "a,b\n1,6\n3,4\n")
示例#21
0
文件: tests.py 项目: tcabrol/PyBabe
 def test_ftp(self):
     babe = Babe()
     a = babe.pull('tests/test.csv', name='Test')
     a.push(filename='test.csv',
            protocol='ftp',
            host='localhost',
            port=self.port)
示例#22
0
 def test_bulk(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.typedetect()
     a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list),
                     bulk_size=2,
                     insert_fields=["b"])
     self.assertEquals(a.to_string(), self.s2)
示例#23
0
 def test_null(self):
     a = Babe().pull(stream=StringIO(self.s),
                     format='csv',
                     null_value="NULL")
     buf = StringIO()
     a = a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), self.s2)
示例#24
0
 def test_filter(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter(function=lambda x: x.a == 3)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'a,b\n3,4\n')
示例#25
0
 def test_replace(self):
     a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
     a = a.mapTo(lambda row: [row.foo + 1, row.bar * 2], fields=['a', 'b'])
     buf = StringIO()
     a.push(stream=buf, format='csv')
     s = """a,b\n2,4\n4,8\n"""
     self.assertEquals(buf.getvalue(), s)
示例#26
0
    def test_insert(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda row: row.foo + 1, insert_fields=['fooplus'])
        s = """foo,bar,f,d,fooplus
1,2,3.2,2010/10/02,2
3,4,1.2,2011/02/02,4
"""
        self.assertEquals(a.to_string(), s)
示例#27
0
    def test_tuple(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda obj: obj._replace(foo=obj.foo + 1))
        s = """foo,bar,f,d
2,2,3.2,2010/10/02
4,4,1.2,2011/02/02
"""
        self.assertEquals(a.to_string(), s)
示例#28
0
 def test_sort(self):
     babe = Babe()
     s = '\n'.join(['k,v'] + ['%u,%u' % (i, -i) for i in xrange(0, 10001)])
     a = babe.pull(string=s, name='test', format='csv')
     a = a.typedetect()
     a = a.sort(field='v')
     a = a.head(n=1)
     self.assertEquals(a.to_string(), 'k,v\n10000,-10000\n')
示例#29
0
 def test_mail(self):
     a = Babe().pull(stream=StringIO(self.s1),
                     source="Table 1",
                     format='csv')
     a = a.pull(stream=StringIO(self.s2), source="Table 2", format='csv')
     a.mail(subject="Test",
            recipients="*****@*****.**",
            in_body=True)
示例#30
0
 def test_sqldump(self):
     a = Babe().pull(stream=StringIO(self.s),
                     format='sql',
                     table='foobar',
                     fields=['id', 'number', 'title', 'datetime'])
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)