def test_filter2(self): a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect() a = a.filterColumns(remove_fields=['a']) buf = StringIO() a.push(stream=buf, format="csv") self.assertEquals(buf.getvalue(), "b\n2\n4\n4\n")
def test_twitter(self): a = Babe().pull_twitter() a = a.filterColumns(keep_fields= ["author_name", "author_id", "author_screen_name", "created_at", "hashtags", "text", "in_reply_to_status_id_str"]) a = a.typedetect() buf = StringIO() a.push(stream=buf, format='csv')
def test_twitter(self): a = Babe().pull_twitter() a = a.filterColumns(keep_fields=[ "author_name", "author_id", "author_screen_name", "created_at", "hashtags", "text", "in_reply_to_status_id_str" ]) a = a.typedetect() a.to_string()
def test_filter2(self): a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect() a = a.filterColumns(remove_fields=['a']) self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
def test_gs_load_from_kontagent(self): # export 1 full day bucket = 'bertrandtest' game = 'wordox' day = '20151021' hour = '14' table_name = '{}_{}'.format(game, day) filename = '{}.csv'.format(table_name + hour) result = time.strptime(day + ' ' + hour, '%Y%m%d %H') start_time = datetime(result.tm_year, result.tm_mon, result.tm_mday, result.tm_hour) end_time = start_time + timedelta(hours=1) a = Babe() a = a.pull_kontagent(start_time=start_time, sample_mode=False, end_time=end_time, KT_APPID='869fb4a24faa4c61b702ea137cbe16ad', discard_names=["PointSend"]) a = a.mapTo(decode_data, insert_fields=["decoded_data"]) a = a.filterColumns(keep_fields=v1) a = a.filter(lambda row: uid_type_check(row) is True) a.push(filename=filename, format='csv', delimiter='\t', quotechar='|', encoding='utf8', bucket=bucket, protocol='gs') a.push_bigquery(filename=filename, bucket=bucket, project_id='bigquery-testing-1098', dataset_id='ladata', table_name=table_name, schema=[ { "name": "date", "type": "STRING", "mode": "REQUIRED" }, { "name": "hour", "type": "INTEGER", "mode": "REQUIRED" }, { "name": "time", "type": "TIMESTAMP", "mode": "REQUIRED" }, { "name": "name", "type": "STRING", "mode": "REQUIRED" }, { "name": "uid", "type": "INTEGER" }, { "name": "st1", "type": "STRING" }, { "name": "st2", "type": "STRING" }, { "name": "st3", "type": "STRING" }, { "name": "channel_type", "type": "STRING" }, { "name": "value", "type": "INTEGER" }, { "name": "level", "type": "INTEGER" }, { "name": "recipients", "type": "STRING" }, { "name": "tracking_data", "type": "STRING" }, { "name": "data", "type": "STRING" } ], job_id='{}_{}'.format(start_time, end_time), num_retries=5)