def _bulk_upsert(self, documents, namespace):
        with self.pgsql.cursor() as cursor:
            document_buffer = []
            insert_accumulator = 0

            for document in documents:
                document_buffer.append(document)
                insert_accumulator += 1

                if insert_accumulator % self.chunk_size == 0:
                    sql_bulk_insert(cursor,
                                    self.mappings,
                                    namespace,
                                    document_buffer,
                                    quiet=self.quiet)

                    self.commit()
                    document_buffer = []

                    LOG.info('%s %s copied...', insert_accumulator, namespace)

            sql_bulk_insert(cursor,
                            self.mappings,
                            namespace,
                            document_buffer,
                            quiet=self.quiet)
            self.commit()
    def upsert(self, doc, namespace, timestamp):
        if not is_mapped(self.mappings, namespace):
            return

        try:
            with self.pgsql.cursor() as cursor:
                self._upsert(namespace, doc, cursor, timestamp)
                self.commit()
        except Exception as e:
            LOG.error("Impossible to upsert %s to %s\n%s", doc, namespace,
                      traceback.format_exc())
    def upsert(self, doc, namespace, timestamp):
        if not is_mapped(self.mappings, namespace):
            return

        try:
            with self.pgsql.cursor() as cursor:
                self._upsert(namespace, doc, cursor, timestamp)
                self.commit()

        except psycopg2.Error:
            LOG.error(u"Impossible to upsert %s to %s", doc, namespace)

            if not self.quiet:
                LOG.error(u"Traceback:\n%s", traceback.format_exc())
    def bulk_upsert(self, documents, namespace, timestamp):
        LOG.info('Inspecting %s...', namespace)

        if is_mapped(self.mappings, namespace):
            LOG.info('Mapping found for %s !...', namespace)
            LOG.info('Deleting all rows before update %s !...', namespace)

            db, collection = db_and_collection(namespace)
            for linked_table in self.get_linked_tables(db, collection):
                sql_delete_rows(self.pgsql.cursor(), linked_table)

            sql_delete_rows(self.pgsql.cursor(), collection)
            self.commit()

            self._bulk_upsert(documents, namespace)
            LOG.info('%s done.', namespace)
    def _init_schema(self):
        self.prepare_mappings()

        try:
            for database in self.mappings:
                foreign_keys = []

                with self.pgsql.cursor() as cursor:
                    for collection in self.mappings[database]:
                        self.insert_accumulator[collection] = 0

                        pk_found = False
                        pk_name = self.mappings[database][collection]['pk']
                        columns = ['_creationdate TIMESTAMP']
                        indices = [u"INDEX idx_{0}__creation_date ON {0} (_creationdate DESC)".format(collection)] + \
                                  self.mappings[database][collection].get('indices', [])

                        for column in self.mappings[database][collection]:
                            column_mapping = self.mappings[database][
                                collection][column]

                            if 'dest' in column_mapping:
                                name = column_mapping['dest']
                                column_type = column_mapping['type']
                                nullable = column_mapping.get('nullable', True)

                                constraints = ''
                                if name == pk_name:
                                    constraints = "CONSTRAINT {0}_PK PRIMARY KEY".format(
                                        collection.upper())
                                    pk_found = True

                                if not nullable:
                                    constraints = '{} NOT NULL'.format(
                                        constraints)

                                if column_type != ARRAY_TYPE and column_type != ARRAY_OF_SCALARS_TYPE:
                                    columns.append(name + ' ' + column_type +
                                                   ' ' + constraints)

                                if 'index' in column_mapping:
                                    indices.append(
                                        u"INDEX idx_{2}_{0} ON {1} ({0})".
                                        format(name, collection,
                                               collection.replace('.', '_')))

                            if 'fk' in column_mapping:
                                foreign_keys.append({
                                    'table':
                                    column_mapping['dest'],
                                    'ref':
                                    collection,
                                    'fk':
                                    column_mapping['fk'],
                                    'pk':
                                    pk_name
                                })

                        if not pk_found:
                            columns.append(pk_name + ' SERIAL CONSTRAINT ' +
                                           collection.upper() +
                                           '_PK PRIMARY KEY')

                        if sql_table_exists(cursor, collection):
                            sql_drop_table(cursor, collection)

                        sql_create_table(cursor, collection, columns)

                        for index in indices:
                            cursor.execute("CREATE " + index)

                    sql_add_foreign_keys(cursor, foreign_keys)
                    self.commit()

        except psycopg2.Error:
            LOG.error(u"A fatal error occured during tables creation")

            if not self.quiet:
                LOG.error(u"Traceback:\n%s", traceback.format_exc())
    def bulk_upsert(self, documents, namespace, timestamp):
        LOG.info('Inspecting %s...', namespace)

        if is_mapped(self.mappings, namespace):
            try:
                LOG.info('Mapping found for %s !...', namespace)
                LOG.info('Deleting all rows before update %s !...', namespace)

                db, collection = db_and_collection(namespace)
                for linked_table in self.get_linked_tables(db, collection):
                    sql_delete_rows(self.pgsql.cursor(), linked_table)

                sql_delete_rows(self.pgsql.cursor(), collection)
                self.commit()

                self._bulk_upsert(documents, namespace)
                LOG.info('%s done.', namespace)

            except psycopg2.Error:
                LOG.error(
                    "Impossible to bulk insert documents in namespace %s: %s",
                    namespace, documents)

                if not self.quiet:
                    LOG.error("Traceback:\n%s", traceback.format_exc())
Exemplo n.º 7
0
def sql_bulk_insert(cursor, mappings, namespace, documents, quiet=False):
    queries = []
    _sql_bulk_insert(queries, mappings, namespace, documents)

    for querytree in queries:
        query = flatten_query_tree([querytree])

        with_stmts = []
        final_stmt = ''

        for subquery in query:
            keyvals = dict(zip(subquery['keys'], subquery['values']))
            foreign_keys = {}
            values = {}

            for key in keyvals:
                val = keyvals[key]

                if isinstance(val, ForeignKey):
                    foreign_keys[key] = val.split('.')[1]

                else:
                    values[key] = val

            foreign_keys_sorted = sorted(foreign_keys.keys())
            values_sorted = sorted(values.keys())

            data_alias = '{0}_data_{1}'.format(subquery['collection'],
                                               subquery['idx'])
            rows_alias = '{0}_rows_{1}'.format(subquery['collection'],
                                               subquery['idx'])
            subquery['alias'] = {'data': data_alias, 'rows': rows_alias}

            with_stmts.append(
                '{alias} ({columns}) AS (VALUES ({values}))'.format(
                    alias=data_alias,
                    columns=', '.join(values_sorted),
                    values=', '.join([values[key] for key in values_sorted])))

            keys = ', '.join(values_sorted + foreign_keys_sorted)
            projection = [
                '{0}.{1} AS {1}'.format(data_alias, key)
                for key in values_sorted
            ]
            aliases = [data_alias]

            if 'parent' in subquery:
                psubquery = query[subquery['parent']]
                parent_rows_alias = psubquery['alias']['rows']

                projection += [
                    '{0}.{1} AS {2}'.format(parent_rows_alias,
                                            foreign_keys[key], key)
                    for key in foreign_keys_sorted
                ]
                aliases.append(parent_rows_alias)

            projection = ', '.join(projection)
            aliases = ', '.join(aliases)

            if not subquery['last']:
                with_stmts.append(
                    '{alias} AS (INSERT INTO {table} ({columns}) SELECT {projection} FROM {aliases} RETURNING {pk})'
                    .format(alias=rows_alias,
                            table=subquery['collection'],
                            columns=keys,
                            projection=projection,
                            aliases=aliases,
                            pk=subquery['pk']))

            else:
                final_stmt = 'INSERT INTO {table} ({columns}) SELECT {projection} FROM {aliases}'.format(
                    table=subquery['collection'],
                    columns=keys,
                    projection=projection,
                    aliases=aliases)

        sql = 'WITH {0} {1}'.format(', '.join(with_stmts), final_stmt)

        try:
            cursor.execute(sql)

        except psycopg2.Error as e:
            LOG.error(
                u"Impossible to upsert document %s in namespace %s: %s\n%s",
                querytree['document']['mapped'][querytree['pk']],
                querytree['collection'], e, sql)

            if not quiet:
                LOG.error(u"Traceback:\n%s", traceback.format_exc())