Exemplo n.º 1
0
def push(format, metainfo, instream, outfile, encoding, delimiter=None, **kwargs):
    if not encoding:
        encoding = "utf8"
    dialect = kwargs.get('dialect', default_dialect) 
    if delimiter:
        dialect.delimiter = delimiter
    writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding)
    writer.writerow(metainfo.fields)
    for k in instream: 
        if isinstance(k, StreamFooter):
            break
        else:
            writer.writerow(k)
Exemplo n.º 2
0
def push_sql(stream, database_kind, table=None, host=None, create_table=False, drop_table=False, protocol=None, database=None,
    ssh_host=None, user=None, password=None, sql_command=None, delete_partition=False, **kwargs):
    db_params = PUSH_DB[database_kind]
    c = db_params['command']
    if user:
        c = c + [db_params['user'] % user]
    if password:
        c = c + [db_params['password'] % password]

    if not database:
        raise Exception("Missing parameter (database)")

    c = c + [database]

    for row in stream:
        if isinstance(row, StreamHeader):
            metainfo = row
            if not table:
                table_name = metainfo.typename
            else:
                table_name = table

            p = Popen(c, stdin=PIPE, stdout=None, stderr=None)

            if drop_table:
                drop_table_query = db_params['drop_table'] % table_name
                p.stdin.write(drop_table_query)
                p.stdin.flush()
                if p.returncode:
                    break

            if create_table:
                fields = ','.join([name + ' varchar(255)' for name in metainfo.fields])
                create_table_query = Template(db_params['create_table']).substitute(table=table_name, fields=fields)
                p.stdin.write(create_table_query)
                p.stdin.flush()
                if p.returncode:
                    break

            if delete_partition and not drop_table:
                if not metainfo.partition:
                    raise Exception("No partition information available in header: unable to delete partition")
                conditions = ["%s = '%s'" % (k, str(v)) for (k, v) in metainfo.partition.iteritems()]
                condition = ' AND '.join(conditions)
                delete_partition_query = Template(db_params['delete_partition']).substitute(table=table_name, condition=condition)
                p.stdin.write(delete_partition_query)
                p.stdin.flush()
                if p.returncode:
                    break

            p.stdin.close()
            p.wait()

            writestream = None

            #print import_query
            if "import_query" in db_params:
                p = Popen(c, stdin=PIPE, stdout=None, stderr=None)
                tmpfifo = TempFifo()
                import_query = db_params['import_query'] % (tmpfifo.filename, table_name)
                p.stdin.write(import_query)
                p.stdin.flush()
                writestream = tmpfifo.open_write()
            elif 'load_command' in db_params:
                load_command = [Template(s).substitute(table=table_name, database=database) for s in db_params['load_command']]
                print load_command
                pp = Popen(load_command, stdin=PIPE, stdout=None, stderr=None)
                writestream = pp.stdin
            else:
                raise Exception("Missing load_command or import_query in db_kind spec")

            writer = UnicodeCSVWriter(writestream, dialect=sql_dialect(), encoding="utf-8")
            #writer = csv.writer(writestream, dialect=sql_dialect())
        elif isinstance(row, StreamFooter):
            if "import_query" in db_params:
                tmpfifo.close()
                p.stdin.close()
                p.wait()
            elif 'load_command' in db_params:
                pp.stdin.close()
                pp.wait()
        else:
            writer.writerow(row)
Exemplo n.º 3
0
def push_sql(stream,
             database_kind,
             table=None,
             host=None,
             create_table=False,
             drop_table=False,
             protocol=None,
             database=None,
             ssh_host=None,
             user=None,
             password=None,
             sql_command=None,
             delete_partition=False,
             **kwargs):
    db_params = PUSH_DB[database_kind]
    c = db_params['command']
    if user:
        c = c + [db_params['user'] % user]
    if password:
        c = c + [db_params['password'] % password]

    if not database:
        raise Exception("Missing parameter (database)")

    c = c + [database]

    for row in stream:
        if isinstance(row, StreamHeader):
            metainfo = row
            if not table:
                table_name = metainfo.typename
            else:
                table_name = table

            p = Popen(c, stdin=PIPE, stdout=None, stderr=None)

            if drop_table:
                drop_table_query = db_params['drop_table'] % table_name
                p.stdin.write(drop_table_query)
                p.stdin.flush()
                if p.returncode:
                    break

            if create_table:
                fields = ','.join(
                    [name + ' varchar(255)' for name in metainfo.fields])
                create_table_query = Template(
                    db_params['create_table']).substitute(table=table_name,
                                                          fields=fields)
                p.stdin.write(create_table_query)
                p.stdin.flush()
                if p.returncode:
                    break

            if delete_partition and not drop_table:
                if not metainfo.partition:
                    raise Exception(
                        "No partition information available in header: unable to delete partition"
                    )
                conditions = [
                    "%s = '%s'" % (k, str(v))
                    for (k, v) in metainfo.partition.iteritems()
                ]
                condition = ' AND '.join(conditions)
                delete_partition_query = Template(
                    db_params['delete_partition']).substitute(
                        table=table_name, condition=condition)
                p.stdin.write(delete_partition_query)
                p.stdin.flush()
                if p.returncode:
                    break

            p.stdin.close()
            p.wait()

            writestream = None

            #print import_query
            if "import_query" in db_params:
                p = Popen(c, stdin=PIPE, stdout=None, stderr=None)
                tmpfifo = TempFifo()
                import_query = db_params['import_query'] % (tmpfifo.filename,
                                                            table_name)
                p.stdin.write(import_query)
                p.stdin.flush()
                writestream = tmpfifo.open_write()
            elif 'load_command' in db_params:
                load_command = [
                    Template(s).substitute(table=table_name, database=database)
                    for s in db_params['load_command']
                ]
                print load_command
                pp = Popen(load_command, stdin=PIPE, stdout=None, stderr=None)
                writestream = pp.stdin
            else:
                raise Exception(
                    "Missing load_command or import_query in db_kind spec")

            writer = UnicodeCSVWriter(writestream,
                                      dialect=sql_dialect(),
                                      encoding="utf-8")
            #writer = csv.writer(writestream, dialect=sql_dialect())
        elif isinstance(row, StreamFooter):
            if "import_query" in db_params:
                tmpfifo.close()
                p.stdin.close()
                p.wait()
            elif 'load_command' in db_params:
                pp.stdin.close()
                pp.wait()
        else:
            writer.writerow(row)