Exemplo n.º 1
0
 def query(self,
           querystr,
           fetch=cfg.FETCH_BY_DEFAULT,
           dest=None,
           fill=True,
           overwrite_method='fail'):
     '''execute any arbitary query on the associated table'''
     self.fetched = fetch
     with bqutil.Mask_Printing():
         output, source, exceeds_max = raw_query(
             self.con,
             querystr,
             self.last_modified,
             dest=dest,
             fetch=fetch,
             overwrite_method=overwrite_method)
         new_bqdf = BQDF(self.con,
                         '%s' % bqutil.stringify(source),
                         fill=fill)
         new_bqdf.local = output
         new_bqdf.fetched = fetch
     if exceeds_max:
         pass  # TODO figure how why exceeds_max isn't behaving as expected
         # print "Number of rows in remote table exceeds bqdf object's
         # max_rows. Only max_rows have been fetched locally"
     return new_bqdf
Exemplo n.º 2
0
def stream_df_to_remote(con,
                        df,
                        overwrite_method='fail',
                        projectId=None,
                        datasetId=None,
                        tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    dataset_ref = {'datasetId': datasetId, 'projectId': projectId}
    table_ref = {
        'tableId': tableId,
        'datasetId': datasetId,
        'projectId': projectId
    }
    table = {
        "kind": "bigquery#table",
        'tableReference': table_ref,
        'schema': schema
    }
    try:
        con.client._apiclient.tables().insert(body=table,
                                              **dataset_ref).execute()
    except:
        pass
    datarows = []
    for i, row in df.iterrows():
        jsondata = {col: row[col] for col in df.columns}
        datarows.append({"json": jsondata})

    body = {'kind': 'bigquery#tableDataInsertAllRequest', 'rows': datarows}
    update = con.client._apiclient.tabledata().insertAll(
        body=body, **table_ref).execute()
    return con, bqutil.stringify(table_ref)
Exemplo n.º 3
0
Arquivo: bqdf.py Projeto: askerry/bqpy
 def query(self, querystr, fetch=cfg.FETCH_BY_DEFAULT, dest=None, fill=True, overwrite_method='fail'):
     '''execute any arbitary query on the associated table'''
     self.fetched = fetch
     with bqutil.Mask_Printing():
         output, source, exceeds_max = raw_query(
             self.con, querystr, self.last_modified, dest=dest, fetch=fetch, overwrite_method=overwrite_method)
         new_bqdf = BQDF(self.con, '%s' % bqutil.stringify(source), fill=fill)
         new_bqdf.local = output
         new_bqdf.fetched = fetch
     if exceeds_max:
         pass  # TODO figure how why exceeds_max isn't behaving as expected
         # print "Number of rows in remote table exceeds bqdf object's
         # max_rows. Only max_rows have been fetched locally"
     return new_bqdf
Exemplo n.º 4
0
def batch_df_to_remote(con,
                       df,
                       overwrite_method='fail',
                       delete='True',
                       name=None,
                       projectId=None,
                       datasetId=None,
                       tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    table_ref = {
        'tableId': tableId,
        'datasetId': datasetId,
        'projectId': projectId
    }
    if overwrite_method == 'append':
        write_disposition = 'WRITE_APPEND'
    elif overwrite_method == 'overwrite':
        write_disposition = 'WRITE_TRUNCATE'
    else:
        write_disposition = 'WRITE_EMPTY'
    df.to_csv(tableId + '.csv', index=False)
    filename = os.path.join(os.getcwd(), tableId + '.csv')
    project = bqutil.dictify(self.remote)['projectId']
    if name is None:
        name = datasetId + tableId
    bqutil.file_to_bucket(con, project, self.bucket, filename, name=name)
    jobref = bucket_to_bq(con,
                          table_ref,
                          projectId,
                          bucket,
                          name,
                          schema=schema,
                          write_disposition=write_disposition,
                          wait=True)
    if delete:
        delete_from_bucket(con, project, bucket, name)
    return con, bqutil.stringify(table_ref)
Exemplo n.º 5
0
Arquivo: core.py Projeto: askerry/bqpy
def batch_df_to_remote(con, df, overwrite_method='fail', delete='True', name=None, projectId=None, datasetId=None, tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    table_ref = {'tableId': tableId,
                 'datasetId': datasetId,
                 'projectId': projectId}
    if overwrite_method == 'append':
        write_disposition = 'WRITE_APPEND'
    elif overwrite_method == 'overwrite':
        write_disposition = 'WRITE_TRUNCATE'
    else:
        write_disposition = 'WRITE_EMPTY'
    df.to_csv(tableId + '.csv', index=False)
    filename = os.path.join(os.getcwd(), tableId + '.csv')
    project = bqutil.dictify(self.remote)['projectId']
    if name is None:
        name = datasetId + tableId
    bqutil.file_to_bucket(con, project, self.bucket, filename, name=name)
    jobref = bucket_to_bq(con, table_ref, projectId, bucket, name,
                          schema=schema, write_disposition=write_disposition, wait=True)
    if delete:
        delete_from_bucket(con, project, bucket, name)
    return con, bqutil.stringify(table_ref)
Exemplo n.º 6
0
Arquivo: core.py Projeto: askerry/bqpy
def stream_df_to_remote(con, df, overwrite_method='fail', projectId=None, datasetId=None, tableId=None):
    '''write pandas dataframe as bigquery table'''
    schema = {"fields": bqutil.bqjson_from_df(df, dumpjson=False)}
    dataset_ref = {'datasetId': datasetId,
                   'projectId': projectId}
    table_ref = {'tableId': tableId,
                 'datasetId': datasetId,
                 'projectId': projectId}
    table = {"kind": "bigquery#table",
             'tableReference': table_ref, 'schema': schema}
    try:
        con.client._apiclient.tables().insert(
            body=table, **dataset_ref).execute()
    except:
        pass
    datarows = []
    for i, row in df.iterrows():
        jsondata = {col: row[col] for col in df.columns}
        datarows.append({"json": jsondata})

    body = {'kind': 'bigquery#tableDataInsertAllRequest', 'rows': datarows}
    update = con.client._apiclient.tabledata().insertAll(
        body=body, **table_ref).execute()
    return con, bqutil.stringify(table_ref)