Exemplo n.º 1
0
 def test_partition(self):
     a = Babe().pull(string=self.s, format='csv')
     a = a.partition(field='date')
     d = {}
     a.push(stream_dict=d, format="csv")
     self.assertEquals(d['2012-04-04'].getvalue(), 'date,name,value\n2012-04-04,John,1\n2012-04-04,Luke,2\n')
     self.assertEquals(d['2012-04-05'].getvalue(), 'date,name,value\n2012-04-05,John,1\n')
Exemplo n.º 2
0
 def test_gz(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv', name='Test')
     a.push(filename='test.csv.gz')
     b = Babe().pull(filename='test.csv.gz')
     buf = StringIO()
     b.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s)
Exemplo n.º 3
0
 def test_replace(self):
     a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
     a = a.mapTo(lambda row : [row.foo+1, row.bar*2], fields=['a','b'])
     buf = StringIO()
     a.push(stream=buf, format='csv')
     s = """a,b\n2,4\n4,8\n"""
     self.assertEquals(buf.getvalue(), s)
Exemplo n.º 4
0
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=
     ["author_name", "author_id", "author_screen_name", "created_at", "hashtags", "text", "in_reply_to_status_id_str"])
     a = a.typedetect()
     buf = StringIO()
     a.push(stream=buf, format='csv')
Exemplo n.º 5
0
 def test_partition(self):
     a = Babe().pull(string=self.s, format="csv")
     a = a.partition(field="date")
     d = {}
     a.push(stream_dict=d, format="csv")
     self.assertEquals(d["2012-04-04"].getvalue(), "date,name,value\n2012-04-04,John,1\n2012-04-04,Luke,2\n")
     self.assertEquals(d["2012-04-05"].getvalue(), "date,name,value\n2012-04-05,John,1\n")
Exemplo n.º 6
0
 def test_partition_s3(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.partition(field='date')
     a.push(protocol="s3",
            bucket="florian-test",
            format="csv",
            filename_template='foobar/$date.csv.gz')
Exemplo n.º 7
0
 def test_filter_values(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter_values(a=3, b=4)
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "a,b\n3,4\n")
Exemplo n.º 8
0
 def test_filter2(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filterColumns(remove_fields=['a'])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "b\n2\n4\n4\n")
Exemplo n.º 9
0
 def test_multi(self):
     a = Babe()
     a = a.pull(stream=StringIO(self.s),
                format='csv').pull(stream=StringIO(self.s), format='csv')
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s + self.s)
Exemplo n.º 10
0
 def test_gz(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv', name='Test')
     a.push(filename='test.csv.gz')
     b = Babe().pull(filename='test.csv.gz')
     buf = StringIO()
     b.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s)
Exemplo n.º 11
0
 def test_bulk(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.typedetect() 
     a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list), bulk_size=2, insert_fields=["b"])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 12
0
    def test_s3(self):
        s = "a,b\n1,a\n3,b\n"
        filename = 'tests/test_bq.csv'
        a = Babe().pull(string=s,
                        format='csv',
                        name='Test')

        a.push(filename=filename,
               format='csv',
               delimiter='\t',
               quotechar='|',
               encoding='utf8',
               bucket='bertrandtest',
               protocol='gs')

        b = Babe()

        b.push_bigquery(filename=filename,
                        bucket='bertrandtest',
                        project_id='bigquery-testing-1098',
                        dataset_id='ladata',
                        table_name='tests',
                        schema=[
                            {
                                "name": "entier",
                                "type": "INTEGER",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "string",
                                "type": "STRING",
                                "mode": "REQUIRED"
                            }
                        ])
Exemplo n.º 13
0
 def test_replace(self):
     a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
     a = a.mapTo(lambda row: [row.foo + 1, row.bar * 2], fields=['a', 'b'])
     buf = StringIO()
     a.push(stream=buf, format='csv')
     s = """a,b\n2,4\n4,8\n"""
     self.assertEquals(buf.getvalue(), s)
Exemplo n.º 14
0
 def test_tuple(self):
     a = Babe().pull(stream=StringIO("a,b\n1,2:3\n4,5:6\n"), format="csv")
     a = a.flatMap(
         lambda row: [row._replace(b=i) for i in row.b.split(':')])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "a,b\n1,2\n1,3\n4,5\n4,6\n")
Exemplo n.º 15
0
 def test_min(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.minN(column='a', n=2)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'a,b\n1,2\n1,4\n')
Exemplo n.º 16
0
 def test_transpose(self):
     a = Babe().pull(stream=StringIO(self.s),
                     format='csv',
                     primary_key='city').transpose()
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 17
0
 def test_rename(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.rename(a="c")
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'c,b\n1,2\n3,4\n1,4\n')
Exemplo n.º 18
0
 def test_filter(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filter(function=lambda x: x.a == 3)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'a,b\n3,4\n')
Exemplo n.º 19
0
 def test_multi2(self):
     a = Babe()
     a = a.pull(stream=StringIO(self.s), format='csv').pull(string=self.s, format='csv')
     a = a.merge_substreams()
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 20
0
 def test_windowMap(self):
     a = Babe().pull(stream=StringIO('a\n1\n2\n3\n4\n5\n6\n7\n'),
                     format="csv").typedetect()
     a = a.windowMap(
         3, lambda rows: rows[-1]._make([sum([row.a for row in rows])]))
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'a\n1\n3\n6\n9\n12\n15\n18\n')
Exemplo n.º 21
0
 def test_load_partition(self):
     start_time = '2012-04-23 11:00'
     end_time = '2012-04-23 12:00'
     a = Babe().pull_kontagent(start_time, end_time, sample_mode=True)
     a = a.head(n=10)
     d = {}
     a.push(stream_dict=d, format='csv')
     self.assertEquals(list(d.keys()), ['2012-04-23_11'])
Exemplo n.º 22
0
 def test_multi2(self):
     a = Babe()
     a = a.pull(stream=StringIO(self.s), format='csv').pull(string=self.s,
                                                            format='csv')
     a = a.merge_substreams()
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 23
0
 def test_zip(self):
     babe = Babe()
     a = babe.pull(stream=StringIO(self.s), format="csv")
     a.push(filename='tests/test.zip')
     b = Babe().pull(filename='tests/test.zip')
     buf = StringIO()
     b.push(stream=buf)
     self.assertEquals(buf.getvalue(), self.s)
Exemplo n.º 24
0
 def test_load(self):
     start_time = '2012-04-23 11:00'
     end_time = '2012-04-23 12:00'
     a = Babe().pull_kontagent(start_time, end_time, sample_mode=True)
     buf = StringIO()
     a = a.head(n=10)
     a.push(stream=buf, format='csv')
     print buf.getvalue()
Exemplo n.º 25
0
 def test_zip(self):
     babe = Babe()
     a = babe.pull(stream=StringIO(self.s), format="csv")
     a.push(filename='tests/test.zip')
     b = Babe().pull(filename='tests/test.zip')
     buf = StringIO()
     b.push(stream=buf)
     self.assertEquals(buf.getvalue(), self.s)
Exemplo n.º 26
0
 def test_vectorwise(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.typedetect()
     a.push_sql(table='test_table', database_kind='vectorwise', database='pybabe_test', drop_table = True, create_table=True)
     b = Babe().pull_sql(database_kind='vectorwise', database='pybabe_test', table='test_table')
     buf = StringIO()
     b.push(stream=buf, format='csv', delimiter=',')
     self.assertEquals(buf.getvalue(), self.s)
Exemplo n.º 27
0
 def test_buzzdata(self):
     a = Babe().pull(protocol='buzzdata', 
             dataroom='best-city-contest-worldwide-cost-of-living-index',
             uuid='aINAPyLGur4y37yAyCM7w3', 
              username='******', format='xls')
     a = a.head(2)
     buf = StringIO()
     a.push(stream=buf, format='csv')
Exemplo n.º 28
0
 def test_load(self):
     start_time = '2012-04-23 11:00'
     end_time = '2012-04-23 12:00'
     a = Babe().pull_kontagent(start_time, end_time, sample_mode=True)
     buf = StringIO()
     a = a.head(n=10)
     a.push(stream=buf, format='csv')
     print buf.getvalue()
Exemplo n.º 29
0
 def test_pushpull(self):
     a  = Babe().pull(stream=StringIO(self.s2), format='csv', primary_key='rown')
     a = a.typedetect()
     a.push_mongo(db='pybabe_test',collection='test_pushpull', drop_collection=True)
     b = Babe().pull_mongo(db="pybabe_test", fields=['rown', 'f', 's'], collection='test_pushpull')
     buf = StringIO()
     b.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)      
Exemplo n.º 30
0
 def test_sqldump(self):
     a = Babe().pull(stream=StringIO(self.s),
                     format='sql',
                     table='foobar',
                     fields=['id', 'number', 'title', 'datetime'])
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 31
0
 def test_groupAll(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.groupAll(reducer=lambda rows: (max([row.b for row in rows]), ),
                    fields=['max'])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "max\n4\n")
Exemplo n.º 32
0
 def test_load_partition(self):
     start_time = "2012-04-23 11:00"
     end_time = "2012-04-23 12:00"
     a = Babe().pull_kontagent(start_time, end_time, sample_mode=True)
     a = a.head(n=10)
     d = {}
     a.push(stream_dict=d, format="csv")
     self.assertEquals(list(d.keys()), ["2012-04-23_11"])
Exemplo n.º 33
0
 def test_groupby(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.groupBy(key="a",
                   reducer=lambda key, rows:
                   (key, sum([row.b for row in rows])))
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), "a,b\n1,6\n3,4\n")
Exemplo n.º 34
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s, format='csv', name='Test')
     a.push(filename='test3.csv', bucket='florian-test', protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     self.assertEquals(b.to_string(), s)
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s,
                     format='csv',
                     name='Test')
     a.push(filename='test_gs.csv',
            bucket='bertrandtest',
            delimiter="\t",
            protocol="gs")
Exemplo n.º 36
0
 def test_s3_glob2(self):
     s = "a,b\n1,2\n3,4\n"
     buf1 = StringIO(s)
     a = Babe().pull(stream=buf1, format='csv', name='Test')
     a.push(filename='foofoobar/test_glob_4.csv', bucket='florian-test', protocol="s3") 
     b = Babe().pull(filename='foofoobar/test_glob_?.csv', name='Test', bucket='florian-test', protocol="s3")
     buf = StringIO() 
     b.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), s)
Exemplo n.º 37
0
 def test_bulk(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     a = a.typedetect()
     a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list),
                     bulk_size=2,
                     insert_fields=["b"])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 38
0
 def test_user_agent(self):
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     buf = StringIO()
     a = a.user_agent(field="useragent",
                      output_os="os",
                      output_browser="browser",
                      output_browser_version="browser_version")
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 39
0
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=[
         "author_name", "author_id", "author_screen_name", "created_at",
         "hashtags", "text", "in_reply_to_status_id_str"
     ])
     a = a.typedetect()
     buf = StringIO()
     a.push(stream=buf, format='csv')
Exemplo n.º 40
0
 def test_join(self):
     a = Babe().pull(stream=StringIO(self.s1), format='csv')
     a = a.join(join_stream=Babe().pull(stream=StringIO(self.s2),
                                        format='csv'),
                key='country',
                join_key='country_code')
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.sjoined)
Exemplo n.º 41
0
    def test_insert(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda row : row.foo+1, insert_fields=['fooplus'])
        buf = StringIO()
        a.push(stream=buf, format='csv')
        s = """foo,bar,f,d,fooplus
1,2,3.2,2010/10/02,2
3,4,1.2,2011/02/02,4
"""
        self.assertEquals(buf.getvalue(), s)
Exemplo n.º 42
0
    def test_insert(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda row: row.foo + 1, insert_fields=['fooplus'])
        buf = StringIO()
        a.push(stream=buf, format='csv')
        s = """foo,bar,f,d,fooplus
1,2,3.2,2010/10/02,2
3,4,1.2,2011/02/02,4
"""
        self.assertEquals(buf.getvalue(), s)
Exemplo n.º 43
0
 def test_join_none(self):
     a = Babe().pull(stream=StringIO(self.s1), format='csv')
     a = a.join(join_stream=Babe().pull(stream=StringIO(self.s2_bis),
                                        format='csv'),
                key='country',
                join_key='country_code',
                on_error=Babe.ON_ERROR_NONE)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.sjoined_bis)
Exemplo n.º 44
0
    def test_tuple(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda obj : obj._replace(foo=obj.foo + 1))
        buf = StringIO()
        a.push(stream=buf, format='csv')
        s = """foo,bar,f,d
2,2,3.2,2010/10/02
4,4,1.2,2011/02/02
"""
        self.assertEquals(buf.getvalue(), s) 
Exemplo n.º 45
0
    def test_tuple(self):
        a = Babe().pull(filename='tests/test.csv', name='Test').typedetect()
        a = a.mapTo(lambda obj: obj._replace(foo=obj.foo + 1))
        buf = StringIO()
        a.push(stream=buf, format='csv')
        s = """foo,bar,f,d
2,2,3.2,2010/10/02
4,4,1.2,2011/02/02
"""
        self.assertEquals(buf.getvalue(), s)
Exemplo n.º 46
0
 def test_partition(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     a = a.partition(field='date')
     d = {}
     a.push(stream_dict=d, format="csv")
     self.assertEquals(
         d['2012-04-04'].getvalue(),
         'date,name,value\n2012-04-04,John,1\n2012-04-04,Luke,2\n')
     self.assertEquals(d['2012-04-05'].getvalue(),
                       'date,name,value\n2012-04-05,John,1\n')
Exemplo n.º 47
0
 def test_buzzdata(self):
     a = Babe().pull(
         protocol='buzzdata',
         dataroom='best-city-contest-worldwide-cost-of-living-index',
         uuid='aINAPyLGur4y37yAyCM7w3',
         username='******',
         format='xls')
     a = a.head(2)
     buf = StringIO()
     a.push(stream=buf, format='csv')
Exemplo n.º 48
0
 def test_parse(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv')
     buf = StringIO()
     a = a.parse_time(field="time",
                      output_time="time",
                      output_date="date",
                      output_hour="hour",
                      input_timezone="CET",
                      output_timezone="GMT")
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 49
0
def wordcount():
    a = Babe().pull(protocol='http',
                    host='www.ietf.org',
                    filename='rfc/rfc1149.txt')
    a = a.flatMap(lambda row: [(w, 1) for w in re.findall('\w+', row.text)],
                  columns=['word', 'count'])
    a = a.groupBy(key='word',
                  reducer=lambda word, rows: (word, sum([row.count for row in rows])))
    a = a.maxN(column='count',
               n=10)
    a.push(stream=sys.stdout,
           format='csv')
Exemplo n.º 50
0
 def test_s3(self):
     s = "a,b\n1,2\n3,4\n"
     a = Babe().pull(string=s,
                     format='csv',
                     name='Test')
     a.push(filename='test3.csv',
            bucket='florian-test',
            protocol="s3")
     b = Babe().pull(filename='test3.csv',
                     name='Test',
                     bucket='florian-test',
                     protocol="s3")
     self.assertEquals(b.to_string(), s)
Exemplo n.º 51
0
    def test_pushsqlite_partition(self):
        a = Babe().pull(stream=StringIO(self.s), format='csv')
        a = a.typedetect()
        a.push_sql(table='test_table', database_kind='sqlite', database='test.sqlite', drop_table = True, create_table=True)

        a = Babe().pull(stream=StringIO(self.s2), format='csv')
        a = a.typedetect()
        a = a.partition(field='id')
        a.push_sql(table='test_table', database_kind='sqlite', database='test.sqlite', delete_partition=True)

        b = Babe().pull_sql(database_kind='sqlite', database='test.sqlite', table='test_table')
        b = b.sort(field="id")
        buf = StringIO()
        b.push(stream=buf, format='csv', delimiter=',')
        self.assertEquals(buf.getvalue(), self.sr)
Exemplo n.º 52
0
    def test_csv_escape(self):
        s = """a\tb\tc
1\tab\t{\\"hello, buzz\\"}
2\tcd\t
"""

        class Dialect(csv.Dialect):
            lineterminator = '\n'
            delimiter = ','
            doublequote = False
            escapechar = '\\'
            quoting = csv.QUOTE_MINIMAL
            quotechar = '|'

        b = Babe()
        b = b.pull(string=s, format='csv', name='Test')
        b.push(filename='tests/files/test3.csv', dialect=Dialect)
Exemplo n.º 53
0
 def test_memo(self):
     tmpfile = NamedTemporaryFile()
     tmpfile.write(self.s)
     tmpfile.flush()
     a = Babe().pull(filename=tmpfile.name, memoize=True, format="csv")
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), self.s)
     #os.remove(tmpfile.name)
     tmpfile.close()
     self.assertFalse(os.path.exists(tmpfile.name))
     b = Babe().pull(filename=tmpfile.name, memoize=True, format="csv")
     buf2 = StringIO()
     b.push(stream=buf2, format="csv")
     self.assertEquals(buf2.getvalue(), self.s)
     c = Babe().pull(filename=tmpfile.name, memoize=False, format="csv")
     buf3 = StringIO()
     self.assertRaises(IOError, lambda : c.push(stream=buf3, format="csv"))
Exemplo n.º 54
0
 def test_html(self): 
     a = Babe().pull(stream=StringIO(self.s), format="csv")
     buf = StringIO()
     a.push(stream=buf, format="html") 
     print buf.getvalue()
Exemplo n.º 55
0
 def test_partition_s3(self):
     a = Babe().pull(string=self.s, format="csv")
     a = a.partition(field="date")
     a.push(protocol="s3", bucket="florian-test", format="csv", filename_template="foobar/$date.csv.gz")
Exemplo n.º 56
0
 def test_null(self):
     a = Babe().pull(stream=StringIO(self.s), format='csv', null_value="NULL")
     buf = StringIO()
     a = a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), self.s2)
Exemplo n.º 57
0
 def test_gz(self):
     a = Babe().pull(string=self.s, format="csv", name="Test")
     a.push(filename="tests/files/test.csv.gz")
     b = Babe().pull(filename="tests/files/test.csv.gz")
     self.assertEquals(b.to_string(), self.s)
    def test_gs_load_from_kontagent(self):
        # export 1 full day
        bucket = 'bertrandtest'
        game = 'wordox'
        day = '20151021'
        hour = '14'
        table_name = '{}_{}'.format(game, day)
        filename = '{}.csv'.format(table_name + hour)
        result = time.strptime(day + ' ' + hour, '%Y%m%d %H')
        start_time = datetime(result.tm_year,
                              result.tm_mon,
                              result.tm_mday,
                              result.tm_hour)
        end_time = start_time + timedelta(hours=1)

        a = Babe()
        a = a.pull_kontagent(start_time=start_time,
                             sample_mode=False,
                             end_time=end_time,
                             KT_APPID='869fb4a24faa4c61b702ea137cbe16ad',
                             discard_names=["PointSend"])
        a = a.mapTo(decode_data, insert_fields=["decoded_data"])
        a = a.filterColumns(keep_fields=v1)
        a = a.filter(lambda row: uid_type_check(row) is True)
        a.push(filename=filename,
               format='csv',
               delimiter='\t',
               quotechar='|',
               encoding='utf8',
               bucket=bucket,
               protocol='gs')

        a.push_bigquery(filename=filename,
                        bucket=bucket,
                        project_id='bigquery-testing-1098',
                        dataset_id='ladata',
                        table_name=table_name,
                        schema=[
                            {
                                "name": "date",
                                "type": "STRING",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "hour",
                                "type": "INTEGER",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "time",
                                "type": "TIMESTAMP",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "name",
                                "type": "STRING",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "uid",
                                "type": "INTEGER"
                            },
                            {
                                "name": "st1",
                                "type": "STRING"
                            },
                            {
                                "name": "st2",
                                "type": "STRING"
                            },
                            {
                                "name": "st3",
                                "type": "STRING"
                            },
                            {
                                "name": "channel_type",
                                "type": "STRING"
                            },
                            {
                                "name": "value",
                                "type": "INTEGER"
                            },
                            {
                                "name": "level",
                                "type": "INTEGER"
                            },
                            {
                                "name": "recipients",
                                "type": "STRING"
                            },
                            {
                                "name": "tracking_data",
                                "type": "STRING"
                            },
                            {
                                "name": "data",
                                "type": "STRING"
                            }
                        ],
                        job_id='{}_{}'.format(start_time, end_time),
                        num_retries=5)
Exemplo n.º 59
0
 def test_zip(self):
     a = Babe().pull(string=self.s, format="csv")
     a.push(filename='tests/files/test.zip')
     b = Babe().pull(filename='tests/files/test.zip')
     self.assertEquals(b.to_string(), self.s)
Exemplo n.º 60
0
 def test_join_none(self):
     a = Babe().pull(stream=StringIO(self.s1), format='csv')
     a = a.join(join_stream=Babe().pull(stream=StringIO(self.s2_bis), format='csv'), key='country', join_key='country_code', on_error=Babe.ON_ERROR_NONE)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), self.sjoined_bis)