Пример #1
0
    def forge_query(self, model, page_size=PAGE_SIZE, count=False):

        class_scope = None
        if 'class' in model.keys():
            class_scope = model['class']
        else:
            class_scope = 'Resources'

        base_name = model['base_name']
        fieldnames = ''
        for key in model['fields'].keys():
            fieldnames += key + ","
        #remove trailing comma
        fieldnames = fieldnames[0:-1]

        queryStr = "{} | where type =~ '{}' | project {}".format(
            class_scope, base_name, fieldnames)
        if count:
            queryStr += " | count"
        else:
            queryStr += " | limit {}".format(page_size)

        logger.debug("QUERY STR : {}".format(queryStr))

        return queryStr
Пример #2
0
    def execute_query(self, query_name, query_conf):

        model_name, queryPipeline = build_mongo_query(query_conf)
        collection = self.db[model_name]

        logger.info("Executing mongo Query on Collection {}: {}".format(
            model_name, queryPipeline))

        results = collection.aggregate(queryPipeline)
        results_list = list(results)

        logger.debug(results_list)

        result_dataset = {
            "header": {
                "schema": SCHEMA_NAME,
                "model": model_name,
                "query_name": query_name,
                "query_conf": query_conf,
                "count": len(results_list),
            },
            "data": results_list
        }

        return result_dataset
Пример #3
0
    def get_count(self, model, search_domains=[]):

        countStr = 'SELECT COUNT(*)' + self.build_domain_query(
            model, search_domains=search_domains)
        logger.debug('Applying count query string: {}'.format(countStr))
        total_count = self.client.execute(countStr).fetchone()[0]

        return total_count
Пример #4
0
    def read_query(self, model, search_domains=[], start_row=None):

        queryStr = self.build_read_query(model,
                                         search_domains=search_domains,
                                         offset=start_row)
        logger.debug('Applying the following queryStr: {}'.format(queryStr))
        results = self.client.execute(queryStr).fetchall()

        return results
Пример #5
0
    def plan_changes(self,
                     schema,
                     create_new=True,
                     delete_old=False,
                     alter_changed=True):
        """Establishes a comparison between the current connector's manifest and the current db schema state,
        and produces a 'change plan' JSON file"""

        new, old, matching, changed, changes = self.compare_schema(schema)

        # Determine all models that need to be dropped from the schema
        to_delete = None
        if delete_old and alter_changed:
            to_delete = {*old, *changed}
        elif delete_old and not alter_changed:
            to_delete = old
        elif alter_changed and not delete_old:
            to_delete = changed
        else:
            to_delete = set()

        # Determine all models that need to be created, or re-created after deletion
        to_create = None
        if create_new and alter_changed:
            to_create = {*new, *changed}
        elif create_new and not alter_changed:
            to_create = new
        elif alter_changed and not create_new:
            to_create = changed
        else:
            to_create = set()

        # If relevant, add the detailed changes that need to occur on modified models
        changes_detail = {}
        for key, value in changes.items():
            if value['has_changed']:
                changes_detail[key] = value

        plan = {
            "schema": schema,
            "delete": list(to_delete),
            "create": list(to_create),
            "changes_detail": changes_detail
        }

        logger.debug("CHANGE PLAN FOR SCHEMA {}: {}".format(schema, plan))

        if DUMP_JSON:
            json_dump(plan, schema, 'DB_CHANGE_PLAN')

        return plan
Пример #6
0
def create_ORM_class(schema, model_name, model, unpack={}):
    """Constructs an sqlAlchemy ORM class definition on a declarative Base,
    that corresponds to a given model definition"""

    logger.debug("START CREATION of ORM class {}. Schema: {}".format(
        model_name, schema))

    construct = {
        '__tablename__': model_name,
        '__table_args__': {
            'schema': schema
        }
    }

    for field_name, field in model['fields'].items():

        field_construct = construct_field(field_name, field, unpack)
        construct.update(field_construct)

    # logger.debug("SQLAlchemy Construct: {}".format(construct))

    ORMClass = type(model_name, (AutoBase, ), construct)

    return ORMClass
Пример #7
0
    def apply_transforms(self, transforms):

        source = transforms['Source']
        table_list = transforms['Tables']
        dataframes = self.load_tables(source, table_list)

        df = None

        for step in transforms['Steps']:

            step_name = step['Step']
            logger.debug("STEP: {}".format(step))

            try:
                logger.info("{}::{} - Executing Step".format(
                    source, step_name))
                operation = step['type']
                params = step['params']
                output_name = step['output']

                # replace the dataframe names by the actual dataframes in the params
                input_name = step['input']
                params['origin_df'] = dataframes[input_name]

                if 'right_input' in step.keys():
                    right_name = step['right_input']
                    params['right_df'] = dataframes[right_name]

                logger.debug("STEP PARAMS: {}".format(params))
                # retrieve the right function to apply and pass the parameters as dict
                function = getattr(self, operation)
                df = function(**params)

                logger.debug(df.head(10))

                # store the output in the buffer_dfs for further chaining
                dataframes[output_name] = df

                if 'save' in step.keys() and (step['save']):
                    logger.info("Saving dataframe {}::{}".format(
                        source, output_name))
                    self.save(df, source, output_name)

            except Exception as e:
                errmsg = "{}::{} error: {}".format(source, step_name, e)
                logger.error(errmsg)
                continue

        return df
Пример #8
0
    def update_from_json(self, dataset):

        header = dataset['header']
        schema = header['schema']
        model_name = header['model']

        result = None

        logger.info("Loading DB schema: {}".format(schema))
        # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html
        AutoBase = automap_base()
        AutoBase.prepare(engine=self.engine, schema=schema, reflect=True)
        logger.debug("loading modelObject")
        modelObject = getattr(AutoBase.classes, model_name)

        logger.debug("Opening Session")
        session = self.SessionFactory()
        # This is very important, so the data is inserted in the right schema
        session.connection(
            execution_options={"schema_translate_map": {
                schema: schema
            }})

        logger.info("Saving JSON file to {}".format(self.dbname))
        logger.debug("JSON Header: {}".format(header))

        try:
            for dict_item in dataset['data']:

                id = dict_item['Id']
                objectInstance = session.query(modelObject).filter(
                    modelObject.Id == id).first()

                # if object not found in the db, create it
                if objectInstance is None:
                    logger.debug(
                        "Object {} with ID={} not found in DB. Creating.".
                        format(model_name, id))
                    objectInstance = modelObject(**dict_item)
                    session.add(objectInstance)

                # if already present, update all its fields
                else:
                    logger.debug(
                        "Object {} with ID={} found in DB. Updating.".format(
                            model_name, id))
                    id = dict_item.pop('Id')
                    for key, value in dict_item.items():
                        setattr(objectInstance, key, value)

                logger.debug("inserted record {}".format(dict_item.values()))

            logger.info("Committing...")
            session.commit()
            result = 'committed'

        except Exception as e:
            logger.error("SQL connector update_from_json: {}".format(e))
            session.rollback()
            result = 'rolled back'

        finally:
            session.close()

        return result