Exemplo n.º 1
0
def drop_trigger_type_notification(db, schema, table, name, proc):
    cmd = "DROP TRIGGER IF EXISTS %s ON %s.%s CASCADE" % (name, schema, table)
    try:
        logger.info("[TABLE] Dropping trigger '%s'" % name)
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error("[TABLE] Dropping trigger '%s' failed: %s" % (name, ex))
Exemplo n.º 2
0
def add_pk(db, schema, table, attr):
    """
    Adds primary key to a PostgreSQL table.

    Parameters
    ----------
    db : obj
    schema : string
    table : string
    attr  : string

    Returns
    -------
    -

    Example
    -------
    add_pk(pg, 'public', 'employee', 'id')

    """
    cmd = 'ALTER TABLE %s.%s ADD PRIMARY KEY (%s)' % (schema, table.lower(),
                                                      attr)
    try:
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error(
            """
            Failed to add primary key to table %s.
            Details: %s
            """ % (table, ex), CURR_FILE)
Exemplo n.º 3
0
def add_multiple_columns(db, schema, table, attrs, types):
    """
    Add new column to a specific table.
    Parameters
    ----------
    name : str
    column_name : str
    column_type : str

    Example
    -------
    add_multiple_columns(db, ['nyanya', some_integer'], ['text', integer'])
    """
    statements_add = []
    attrs_types = zip(attrs, types)
    for i, j in attrs_types:
        statements_add.append(' '.join(['ADD COLUMN IF NOT EXISTS', i, j]))
    statements_merged = ', '.join(statements_add)

    cmd = "ALTER TABLE IF EXISTS %s.%s %s;" % (schema, table.lower(),
                                               statements_merged)

    for i, j in zip(attrs, types):
        logger.warn("Adding column %s (%s) to company %s." %
                    (i, j, table.lower()))
    try:
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error('[TABLE] %s when executing command %s.' % (ex, cmd))
Exemplo n.º 4
0
def exists(db, schema, table):
    """
    Check if a table exists in the PG database.

    Parameters
    ----------
    table : string

    Returns
    -------
    True: table exists in the database
    False: otherwise
    """
    cmd = """
    SELECT table_name FROM information_schema.tables
    WHERE table_schema='%s' AND table_name='%s';
    """ % (schema, table.lower())

    try:
        res = db.execute_cmd_with_fetch(cmd)
        if res:
            return True
        else:
            return False
    except Exception as ex:
        logger.error('[TABLE] %s when executing command %s.' % (ex, cmd))
Exemplo n.º 5
0
def log_rows(db, schema, values):
    """
    Holds the operation, relation name, object id and
    timestamp for each entry of the oplog.

    Parameters
    ----------

    Returns
    -------
    -

    Example
    -------
    create_oplog_table(pg, 'purr')

    """
    table_name = "purr_oplog"
    # id is SERIAL type, we can skip it when inserting rows:
    attrs = table_desc[table_name]["attrs"][1:]
    try:
        row.insert_bulk(db, schema, table_name, attrs, values)
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to insert logs into table %s: %s"
            % (table_name, ex))
Exemplo n.º 6
0
def create_log_error_table(db, schema='public'):
    """
    Logs the error's location, message and timestamp 
    when an it occurs.

    Parameters
    ----------
    db: connection obj
    schema: name of the schema in Postgres
    Returns
    -------
    -

    Example
    -------
    create_log_error_table(pg, 'purr')

    """
    table_name = "purr_error"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]

    values = [int(time.time())]
    try:
        table.create(db, schema, table_name, attrs, types)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Exemplo n.º 7
0
def create_transfer_stats_table(db, schema='public'):
    """
    Logs the number, relation name, timestamp 
    for each collection transfer.

    Parameters
    ----------
    db: connection obj
    schema: name of the schema in Postgres
    Returns
    -------
    -

    Example
    -------
    create_transfer_stats_table(pg, 'purr')

    """
    table_name = "purr_transfer_stats"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]

    values = [int(time.time())]
    try:
        table.create(db, schema, table_name, attrs, types)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Exemplo n.º 8
0
def drop_type_notification(db, name):
    """
    Drops the function which creates a notification.
    The notification created by this function is sent to channel 'purr'
    and signalizes a type change.

    Parameters
    ----------
    db  : obj
        : Postgres connection object
    name: sting
        : name of the function
    Returns
    -------
    -
    Example
    -------
    drop_notification(db, function_name)
    """
    cmd = "DROP FUNCTION IF EXISTS %s();" % name

    try:
        db.execute_cmd(cmd)
        logger.info("Dropping procedure: %s" % name, CURR_FILE)
    except Exception as ex:
        logger.error("Dropping procedure failed: %s" % ex, CURR_FILE)
Exemplo n.º 9
0
def create_type_notification(db, name):
    """
    Creates a function which will notify channel 'purr' about type changes.

    Parameters
    ----------
    db  : obj
        : Postgres connection object
    name: sting
        : name of the function
    Returns
    -------
    -
    Example
    -------
    create_notification(db, name_function)
    """
    cmd = """CREATE OR REPLACE FUNCTION %s()
    RETURNS TRIGGER AS $$
    BEGIN
        PERFORM pg_notify('purr', 'type_change');
        RETURN NULL;
    END;
    $$ LANGUAGE plpgsql;
    """ % name

    try:
        logger.info("Creating procedure: %s" % name, CURR_FILE)
        db.execute_cmd(cmd)

    except Exception as ex:
        logger.error("Insert failed: %s" % ex, CURR_FILE)
Exemplo n.º 10
0
def create_stat_table(db, schema='public'):
    """
    Creates a table that holds the timestamp of the
    latest successfully inserted item.
    Parameters
    ----------

    Returns
    -------
    -

    Example
    -------
    create_stat_table(pg, 'purr')

    """
    table_name = "purr_info"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]
    values = [0, None, int(time.time())]
    try:
        table.create(db, schema, table_name, attrs, types)
        ts = get_latest_successful_ts(db, schema)
        if len(ts) == 0:
            row.insert(db, schema, table_name, attrs, values)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Exemplo n.º 11
0
def log_error(db, values, schema='public'):
    """
    Insert the number, relation name, timestamp 
    for each collection transfer.

    Parameters
    ----------

    Returns
    -------
    -

    Example
    -------
    log_stats(pg, 'purr', [])
    """
    
    table_name = "purr_error"
    # id is SERIAL type, we can skip it when inserting rows:
    attrs = table_desc[table_name]["attrs"][1:]
    try:
        row.insert(db, schema, table_name, attrs, values)
    except Exception as ex:
        logger.error("""[TRANSFER_INFO] Failed to insert logs into table %s: %s"""
                     % (table_name, ex))
Exemplo n.º 12
0
    def insert_multiple(self, docs, r, coll):
        '''
        Transfers multiple documents with different fields
        (not whole collections).
        Used by [TAILER]
        Parameters
        ----------
        doc : dict
            : document
        r : Relation
            relation in PG
        coll : string
             : collection name
        Returns
        -------
        -

        Raises
        ------
        Example
        -------
        '''
        (
            att_new,
            att_orig,
            types,
            name_rel,
            type_x_props_pg
        ) = cp.config_fields(self.coll_def, coll)

        # TODO: check if this is necessary:
        if types == []:
            return

        # Adding extra properties to inserted/updated row is necessary
        # because this attribute is not part of the original document
        # and anything that is not defined in the collection.yml file
        # will be pushed in this value. This function will also create
        # a dictionary which will contain all the information about
        # the attribute before and after the conversion.

        self.attr_details = self.prepare_attr_details(
            att_new, att_orig, types, type_x_props_pg)
        # TODO remove this stuff with the extra props
        try:
            if self.include_extra_props is True:
                r.insert_bulk(
                    docs, self.attr_details, self.include_extra_props)
            else:
                r.insert_bulk_no_extra_props_tailed(
                    docs, self.attr_details, self.include_extra_props)
        except Exception as ex:
            logger.error("""
            %s Transferring to %s was unsuccessful.
            Exception: %s
            """ % (
                CURR_FILE,

                r.relation_name, ex))
            logger.error("%s\n" % docs)
Exemplo n.º 13
0
def get_latest_successful_ts(db, schema='public'):
    """
  Get the timestamp of the latest successful transfer.

  Parameters
  ----------

  Returns
  -------
  -

  Example
  -------
  get_latest_successful_ts(pg, 'purr')

  """
    table_name = 'purr_info'
    cmd = "SELECT latest_successful_ts FROM %s.%s;" % (schema, table_name)

    try:
        res = db.execute_cmd_with_fetch(cmd)
        return res
    except Exception as ex:
        logger.error(
            """[TRANSFER_INFO] Failed to get the timestamp
             of the latest successful transfer: %s"""
            % (ex)
        )
Exemplo n.º 14
0
def drop(db, schema, tables):
    """
    Drop one or more tables in the PG database.

    Parameters
    ----------
    schema : string
    tables : list

    Example
    -------
    drop(pg, 'public', ['my_table'])

    Todo
    ----
    - first check if all tables in the list exist
    """
    tables_cmd = []
    for t in tables:
        tables_cmd.append('%s.%s' % (schema, t.lower()))
    tables_cmd = ', '.join(tables_cmd)

    cmd = "DROP TABLE IF EXISTS %s" % (tables_cmd)
    try:
        db.execute_cmd(cmd)
        logger.info('[TABLE] Dropping table(s) %s.' % (tables_cmd))
    except Exception as ex:
        logger.error('[TABLE] %s when executing command %s.' % (ex, cmd))
Exemplo n.º 15
0
def create(db, schema, name, attrs, types, pks=["id"]):
    """
    Creates a table in Postgres.
    Parameters
    ----------
    name : str
    TODO
    ----
    """
    attrs_and_types = []

    for i in range(len(attrs)):
        pair = '"%s" %s' % (attrs[i], types[i])
        attrs_and_types.append(pair)

    pks = [('"%s"' % pk) for pk in pks]
    primary_keys = "PRIMARY KEY (%s)" % ",".join(pks)
    attrs_and_types.append(primary_keys)
    attrs_and_types = ", ".join(attrs_and_types)

    name = name.lower()
    cmd = "CREATE TABLE IF NOT EXISTS %s.%s(%s);" % (schema, name,
                                                     attrs_and_types)
    try:
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error('[TABLE] %s when executing command %s.' % (ex, cmd))
Exemplo n.º 16
0
def create_oplog_table(db, schema='public'):
    """
    Logs the operation, relation name, object id and
    timestamp for each entry of the oplog.

    Parameters
    ----------
    db: connection obj
    schema: name of the schema in Postgres
    Returns
    -------
    -

    Example
    -------
    create_oplog_table(pg, 'purr')

    """
    table_name = "purr_oplog"
    attrs = table_desc[table_name]["attrs"]
    types = table_desc[table_name]["types"]
    pks = table_desc[table_name]["pks"]

    values = [int(time.time())]
    try:
        table.drop(db, schema, [table_name])
        table.create(db, schema, table_name, attrs, types, pks)
        logger.info("[TRANSFER INFO] Created table %s." % (table_name))
    except Exception as ex:
        logger.error(
            "[TRANSFER_INFO] Failed to create table %s: %s" % (table_name, ex))
Exemplo n.º 17
0
def vacuum(db, schema, table):
    cmd = "VACUUM FULL ANALYZE %s.%s;" % (schema, table)
    try:
        logger.info("[TABLE] Vacuuming table '%s.%s'" % (schema, table))
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error("[TABLE] Vacuuming table '%s.%s' failed: %s" %
                     (schema, table, ex))
Exemplo n.º 18
0
def get_doc_by_id(db, name, id):
    try:
        c = db[name]
        bz = c.find_one({"_id": ObjectId(id)})
        return bz
    except Exception as ex:
        logger.error("""
            [COLLECTION] Loading document from collection %s failed.
            Details: %s""" % (name, ex))
Exemplo n.º 19
0
def start(extractor, coll_config):
    collections = cp.config_collection_names(coll_config)
    if collections is None:
        logger.error("""
                [TRANSFER] No collections found.
                Check your collection names in the setup file.
                """)
        return
    extractor.transfer(collections)
Exemplo n.º 20
0
def create_trigger_type_notification(db, schema, table, name, proc):
    cmd = """
    CREATE TRIGGER %s AFTER INSERT OR UPDATE OR DELETE ON %s.%s
    FOR EACH ROW EXECUTE PROCEDURE %s()
    """ % (name, schema, table, proc)
    try:
        logger.info("[TABLE] Creating trigger '%s'" % name)
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error("[TABLE] Creating trigger '%s' failed: %s" % (name, ex))
Exemplo n.º 21
0
 def __init__(self, settings):
     db_name = settings['db_name']
     try:
         self.client = pymongo.MongoClient(settings['connection'])
     except Exception as ex:
         logger.error("Could not initialize MongoDB client: %s" % ex)
     try:
         self.conn = self.client[db_name]
     except Exception as ex:
         logger.error("Could not create connection to MongoDB: %s" % ex)
Exemplo n.º 22
0
def update_latest_successful_ts(db, schema, dt):
    cmd = "UPDATE %s.purr_info SET latest_successful_ts='%s';" % (
        schema, str(dt))
    try:
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error(
            """[TRANSFER_INFO] Failed to update the timestamp
            of the latest successful transfer: %s"""
            % (ex)
        )
Exemplo n.º 23
0
def get_table(db, schema='public'):
    cmd = """SELECT id, collection_name, relation_name,
    types FROM %s.purr_collection_map ORDER BY id""" % (schema)
    try:
        coll_map = db.execute_cmd_with_fetch(cmd)
        logger.info("Getting schema from DB.", CURR_FILE)
        return coll_map

    except Exception as ex:
        logger.error("[TRANSFER_INFO] Failed to get collection map table" %
                     (ex))
Exemplo n.º 24
0
def reset(db, schema='public'):
    """
    Reset existing schema or create a new one.
    """
    drop = 'DROP SCHEMA IF EXISTS %s CASCADE;' % schema
    create = 'CREATE SCHEMA %s;' % schema
    try:
        db.execute_cmd(drop)
        db.execute_cmd(create)
        logger.info("[SCHEMA] Schema %s is reset." % schema)
    except Exception as ex:
        logger.error("[SCHEMA] Schema reset failed. %s" % ex)
Exemplo n.º 25
0
def create(db, schema='public'):
    """
    Create schema if it does not exist.
    """
    cmd = 'CREATE SCHEMA IF NOT EXISTS %s;' % (schema)
    try:
        db.execute_cmd(cmd)
    except Exception as ex:
        logger.error("""
        [SCHEMA] Creating schema with name %s failed.
        Details: %s
        """ % (schema, ex))
Exemplo n.º 26
0
def create_file(coll_map):
    """
    Creates the collection map file.
    """
    name_file = "collections.yml"
    operation = "w"
    try:
        logger.info("%s Creating collection map file..." % CURR_FILE)
        with open(name_file, operation) as file_out:
            yaml.dump(coll_map, file_out, default_flow_style=False)
        logger.info("Collection map file created: %s" % name_file, CURR_FILE)
    except Exception as ex:
        logger.error("Failed to create collection map file. Details: %s" % ex,
                     CURR_FILE)
Exemplo n.º 27
0
def log_tailed_docs(pg, schema, docs_useful, ids_log, table_name, oper,
                    merged):
    log_entries = []
    ts = time.time()
    logger.info("IDs: %s" % ids_log)
    if len(ids_log) != len(docs_useful) and oper != DELETE:
        logger.error("n(ids)=%s; n(docs_useful)=%s" %
                     (len(ids_log), len(docs_useful)))
    for i in range(len(docs_useful)):
        id = ids_log[i]
        doc = "no entry"
        try:
            if docs_useful[i] is not None and oper != DELETE:
                doc = str(docs_useful[i])
            else:
                doc = "Doc is NULL"
        except Exception as ex:
            logger.error(
                "%s Converting log entry failed. Details: %s\n Document: " %
                (CURR_FILE, ex))
            logger.error(docs_useful[i])
        row = [oper, table_name, id, ts, merged, doc]
        log_row = tuple(row)
        log_entries.append(log_row)
    try:
        transfer_info.log_rows(pg, schema, log_entries)
    except Exception as ex:
        logger.error("%s Logging failed. Details: %s" % (CURR_FILE, ex))
Exemplo n.º 28
0
 def coll_in_map(self, name):
     '''
     name: string;
         name of collection as 'name_db.name_coll', e.g. 'cat_db.Breeds'
     Checks if a collection exists in collections.yml.
     '''
     coll = name.split(".")[1]
     try:
         if coll in self.coll_settings.keys():
             return True
         else:
             return False
     except Exception as ex:
         logger.error("%s Details %s" % (CURR_FILE, ex))
         return False
Exemplo n.º 29
0
def check(db, colls_requested):
    """
    Checks if requested collections exist in the database.
    Gets all collection names from MongoDB (colls_name) and creates
    a new list which contains only the existing collection names.

    Parameters
    ----------
    db : pymongo.database.Database
      Database connection and name
    colls_requested : list
      Contains the list of requested collection names.

    Returns
    -------
    colls_existing : list
      Contains only existing collection names.

    Raises
    ------
    ValueError
      when a requested collection does not exist in the database (MongoDB)

    Example
    -------
    check(db, ['Car'])
      []
    check(db, ['Region', 'Customer']
      ['Region', 'Customer']
    """
    colls_name = db.collection_names(include_system_collections=False)
    colls_existing = []
    logger.info('[COLLECTION] Checking collection names...')
    try:
        for coll in colls_requested:
            try:
                colls_name.index(coll)
                colls_existing.append(coll)
            except ValueError:
                logger.warn("""
                    [COLLECTION] '%s' is not in the Mongo database.
                    Skipping data transfer""" % coll)
    except Exception as ex:
        logger.error("[COLLECTION] Checking collection names failed: %s" % ex)
    return colls_existing
Exemplo n.º 30
0
def get_by_name_reduced(db, name, fields, size=20000):
    """
    Gets data from collection limited by batch size containing
    only specific fields.

    Parameters
    ----------
    db : pymongo.database.Database
      Database connection and name
    name : string
      Name of collection.
    fields : list
      Names of fields to include in the query.

    Returns
    -------
    docs : pymongo.cursor.Cursor

    Raises
    ------

    Example
    -------
    get_by_name(db, 'Car', ['_id', 'type', 'nfOfSeats'])

    TODO
    ----
    - let the user decide batch size
    """
    docs = []
    try:
        logger.info('[COLLECTION] Loading data from collection %s...' % name)
        c = db[name]
        # create the document given to a query that specifies which
        # fields MongoDB returns in the result set
        projection = {}
        for field in fields:
            projection[field] = 1
        bz = c.find({}, projection).sort('$natural', pymongo.DESCENDING)
        docs = bz.batch_size(size)
    except Exception as ex:
        logger.error("""[COLLECTION] Loading data from collection %s failed.
            Details: %s""" % (name, ex))
    return docs