示例#1
0
def orchestrator_function(context: df.DurableOrchestrationContext):

    orc_input = context.get_input()
    logger.info("Orchestration Input : {}".format(orc_input))
    result1 = yield context.call_activity('pandas_transform', orc_input)

    return result1
示例#2
0
async def main(mytimer: azure.functions.TimerRequest, starter: str):
    
    try:
        orchestrator_name = "F_orchestrator"
        client = df.DurableOrchestrationClient(starter)

        req_params = {
            'trigger': 'scheduled',
            'source': 'prestashop',
            'last_days': '1',
            'model': None,
            'action': 'full'
        }

        req_body = {
            'status': 'TODO'
        }

        orc_input = {
            'params': req_params,
            'body': req_body
        }

        instance_id = await client.start_new(orchestrator_name, None, req_params)

        logger.info(f"Started orchestration with ID = '{instance_id}'.")


    except Exception as e:

        logger.error("F_prestashop_timer :: {}".format(traceback.print_exc()))
示例#3
0
    def execute_query(self, query_name, query_conf):

        model_name, queryPipeline = build_mongo_query(query_conf)
        collection = self.db[model_name]

        logger.info("Executing mongo Query on Collection {}: {}".format(
            model_name, queryPipeline))

        results = collection.aggregate(queryPipeline)
        results_list = list(results)

        logger.debug(results_list)

        result_dataset = {
            "header": {
                "schema": SCHEMA_NAME,
                "model": model_name,
                "query_name": query_name,
                "query_conf": query_conf,
                "count": len(results_list),
            },
            "data": results_list
        }

        return result_dataset
示例#4
0
def fetch():

    orc_input = {'params': params, 'body': 'TODO'}

    result = fetch_data.main(params)
    logger.info(result)

    return result
示例#5
0
    def create_db(self, schemas=CONNECTOR_MAP.keys()):
        """Creates all Table Metadata and db tables corresponding to the given connectors' models definitions"""

        for schema in schemas:
            self.create_models(schema)

        AutoBase.metadata.create_all(self.engine)
        tables_list = list(x.name for x in AutoBase.metadata.sorted_tables)
        logger.info(
            "Successfully created database. Models: {}".format(tables_list))

        result = {'schemas': schemas, 'created': tables_list}

        return result
示例#6
0
    def apply_transforms(self, transforms):

        source = transforms['Source']
        table_list = transforms['Tables']
        dataframes = self.load_tables(source, table_list)

        df = None

        for step in transforms['Steps']:

            step_name = step['Step']
            logger.debug("STEP: {}".format(step))

            try:
                logger.info("{}::{} - Executing Step".format(
                    source, step_name))
                operation = step['type']
                params = step['params']
                output_name = step['output']

                # replace the dataframe names by the actual dataframes in the params
                input_name = step['input']
                params['origin_df'] = dataframes[input_name]

                if 'right_input' in step.keys():
                    right_name = step['right_input']
                    params['right_df'] = dataframes[right_name]

                logger.debug("STEP PARAMS: {}".format(params))
                # retrieve the right function to apply and pass the parameters as dict
                function = getattr(self, operation)
                df = function(**params)

                logger.debug(df.head(10))

                # store the output in the buffer_dfs for further chaining
                dataframes[output_name] = df

                if 'save' in step.keys() and (step['save']):
                    logger.info("Saving dataframe {}::{}".format(
                        source, output_name))
                    self.save(df, source, output_name)

            except Exception as e:
                errmsg = "{}::{} error: {}".format(source, step_name, e)
                logger.error(errmsg)
                continue

        return df
示例#7
0
    def insert_dataset(self, dataset):

        header = dataset['header']
        schema_name = header['schema']
        model_name = header['model']
        line_count = header['count']
        data = dataset['data']

        logger.info(
            "Inserting dataset to Mongo Collection: {}".format(model_name))

        collection = self.db[model_name]
        result = collection.insert_many(data)

        return result
示例#8
0
    def delete_tables(self, schema, to_delete):

        AutoBase.prepare(engine=self.engine, schema=schema, reflect=True)
        tables_list = list(x.__table__ for x in AutoBase.classes
                           if x.__table__.name in to_delete)

        logger.info("DROPPING tables from schema {}: {}".format(
            schema, to_delete))

        AutoBase.metadata.drop_all(bind=self.engine, tables=tables_list)
        logger.info("Successfully dropped tables : {}".format(to_delete))

        result = {'schema': schema, 'deleted': to_delete}

        return result
示例#9
0
    def create_models(self, schema, models_list=None):
        """Creates Tabls Metadata and db tables corresponding to the given connectors' models definitions"""

        connector = import_module(CONNECTOR_MAP[schema])

        if models_list is None:
            models_list = connector.MODELS_LIST

        logger.info(
            "Creating MetadataClasses in schema {} from models: {}".format(
                schema, models_list))

        for model_name in models_list:
            ORMclass = create_ORM_class(schema, model_name,
                                        connector.MODELS[model_name],
                                        connector.UNPACKING)

        AutoBase.metadata.create_all(bind=self.engine)
示例#10
0
async def main(req: func.HttpRequest, starter: str) -> func.HttpResponse:
    
    try:
        client = df.DurableOrchestrationClient(starter)

        logger.info("request parameters: {}".format(req.params))

        expected_params = [
            'last_days',
            'source',
            'model',
            'action'
        ]

        # req_params = dict(req.params)
        params = {}
        # req_body = req.get_body()
        req_body = {
            'status': 'TODO'
        }

        for key in expected_params:
            params[key] = (req.params[key] if key in req.params.keys() else None)
        
        params['trigger'] = 'http'
        models_raw = params['model']
        params['model'] = (models_raw.split(',') if models_raw else None)

        orc_input = {
            'params': params,
            'body': req_body
        }

        instance_id = await client.start_new(req.route_params["functionName"], None, params)

        logger.info(f"Started orchestration with ID = '{instance_id}'.")

        return client.create_check_status_response(req, instance_id)

    except Exception as e:

        logger.error("F_starter :: {}".format(e))
示例#11
0
def orchestrator_function(context: df.DurableOrchestrationContext):

    orc_input = context.get_input()
    logger.info("Orchestration Input : {}".format(orc_input))
    action = orc_input['action']

    result = {}

    if (action == 'fetch') or (action == 'full'):
        fetch_result = yield context.call_activity('fetch_data', orc_input)
        result['fetch_data'] = fetch_result

    if (action == 'transform') or (action == 'full'):
        transform_result = yield context.call_activity('pandas_transform',
                                                       orc_input)
        result['pandas_transform'] = transform_result

    logger.info("Orchestration Results : {}".format(result))

    return result
示例#12
0
def extract():

    # # import the right connector
    # package_name = 'Connectors.{}'.format(CONNECTOR_MAP[source]['package'])
    # connector_name = CONNECTOR_MAP[source]['connector']
    # connector = import_module(connector_name, package_name)

    # # instantiate a connector client
    # client = getattr(connector,connector_name)

    client = get_client(source)

    full_results = []

    for model_name in models:
        logger.info("Extracting schema: {} - model: {}".format(
            source, model_name))
        jsonpath, dataset = client.get_data(model_name,
                                            last_days=params['last_days'])
        full_results += {'jsonpath': jsonpath, 'dataset': dataset},

    return full_results
示例#13
0
    def execute_queries(self, query_names=None):

        queries = {}
        #extracting a subset of the MONGO_QUERIES dicitonary if query names were explicitly provided
        if query_names:
            queries = {
                query_name: MONGO_QUERIES[query_name]
                for query_name in set(query_names)
            }
        else:
            queries = MONGO_QUERIES

        for query_name, query_conf in queries.items():
            logger.info("Preparing Mongo Query {}: {}".format(
                query_name, query_conf))
            result_dataset = self.execute_query(query_name, query_conf)

            if DUMP_JSON:
                json_dump(result_dataset, APP_NAME, query_name)

            if query_conf['dump_csv']:
                csv_dump(result_dataset['data'], APP_NAME, query_name)
示例#14
0
def main(params: dict) -> dict:

    returnStr = ""
    
    try:
        # params = orc_input['params']
        pdconn = PandasSQLConnector.load_default()
        schema = params['source']
        trigger = params['trigger']

        results = {}
  
        initStr = "Extend Data Table operation started. Trigger : {} - Schema: {}".format(trigger,schema)
        logger.info(initStr)

        for filename in os.listdir(TRANSFORMS_DIR):
            
            transform_def = load_conf(filename, subfolder='transforms')
            
            if transform_def['Source'] == schema:
                logger.info("Applying pandas transforms from manifest: {}".format(filename))
                df = pdconn.apply_transforms(transform_def)
                results[filename] = 'applied'
            
            else:
                logger.info("Skipping filtered schema : {}".format(transform_def))
                results[filename] = 'skipped'

        returnStr = "Extend Data Table ended. Results: {}".format(results)
        logger.info(returnStr)

        output_results = {
            'params': params,
            'results': results
        }

    except Exception as e:
        returnStr = '{}'.format(e)
        logger.error(e)

        output_results = {
            'params': params,
            'results': returnStr
        }

    return output_results
示例#15
0
    def apply_changes(self, plan):
        """Applies the changes specified in a given 'plan' JSON file. This approach is pretty much inspired by Terraform, but applied to SQLAlchemy db models :)"""

        returnmsg = ""
        result = {}

        schema = plan['schema']
        to_delete = plan['delete']
        to_create = plan['create']

        deletion = len(to_delete) > 0
        creation = len(to_create) > 0

        if deletion or creation:
            logger.info("DB CHANGE: Applying change plan: {}".format(plan))

            try:

                # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html
                AutoBase.prepare(engine=self.engine,
                                 schema=schema,
                                 reflect=True)

                # if tables need to be dropped, use SQLAlchemy to drop them
                if deletion:
                    # delete_tables = list(x.__table__ for x in AutoBase.classes if x.__table__.name in to_delete)
                    self.delete_tables(schema, to_delete)
                    AutoBase.metadata.clear()

                # if tables need to be (re)-created, create them from the connector's manifest definition
                if creation:
                    self.create_models(schema, to_create)
                    AutoBase.metadata.clear()

                returnmsg = "Successfully applied changes to the DB."
                logger.info(returnmsg)

                result['status'] = 'success'

            except Exception as e:
                returnmsg = "DB CHANGE: Error {}".format(e)
                logger.error(returnmsg)
                result['status'] = 'error'

        else:
            returnmsg = "DB CHANGE: Nothing to change in the current plan. No action will be applied on the db."
            logger.info(returnmsg)
            result['status'] = 'not applied'

        result['message'] = returnmsg
        result['plan'] = plan
        return result
示例#16
0
def main(params: dict) -> dict:

    returnStr = ""

    try:
        # params = orc_input['params']
        source, last_days, models = format_params(params)
        trigger = params['trigger']
        results = {}

        azconn = AzureSQLConnector.load_default()

        initStr = "Fetch operation started. Trigger: {} Source: {} - Models: {} - LAST_DAYS={}".format(
            trigger, source, models, last_days)
        logger.info(initStr)

        client = get_client(source)

        for model_name in models:
            logger.info('Extracting data from Model: {}'.format(model_name))
            jsonpath, dataset = client.get_data(model_name,
                                                last_days=last_days)
            # push to Azure SQL
            result = azconn.insert_dataset(dataset)
            results[model_name] = result

        returnStr = "Fetch operation ended. Trigger: {} - Source: {} - LAST_DAYS={}\nRESULTS: {}".format(
            trigger, source, last_days, results)
        logger.info(returnStr)
        output_results = {'params': params, 'results': results}

    except Exception as e:
        returnStr = 'F_fetch_data.fetch_data :: {}'.format(
            traceback.print_exc())
        logger.error(e)

        output_results = {'params': params, 'results': returnStr}

    return output_results
示例#17
0
def expand():

    result = extend_data.main(params)
    logger.info(result)
示例#18
0
    def compare_schema(self, schema):
        """Loads all table definitions from the db schema, and compares it with the connector's model definitions taken from the connector's YAML manifest.
        Returns several sets of strings:

        - new_models: Model names that were given in the connector's manifest but not found in the database
        - deleted_models: Model names that were found in the database, but absent from the connector's current manifest
        - intersect_models: Model names that were found both in the database and in the connector's manifest
        - model_changes: dict object reflecting if some intersecting models were changed i.e.,
            if fields were added or deleted from the manifest, compared to the current database state)
            - new_fields: list of field names that were found in the model manifest but not in the corresponding database table
            - deleted_fields: list of field names present in the db table but absent from the model manifest
        """

        connector = import_module(CONNECTOR_MAP[schema])
        logger.info("Comparing DB schema {} with connector models: {}".format(
            schema, connector.__name__))
        # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html
        # AutoBase = automap_base()
        AutoBase.prepare(engine=self.engine, schema=schema, reflect=True)

        table_names = set(x.__table__.name for x in AutoBase.classes)
        model_names = set(connector.MODELS_LIST)

        new_models = model_names - table_names
        deleted_models = table_names - model_names
        intersect_models = table_names & model_names
        changed_models = set()

        logger.info("NEW models: {}".format(new_models))
        logger.info("DELETED models: {}".format(deleted_models))
        logger.info("Intersecting models: {}".format(intersect_models))

        model_changes = {}

        for model_name in intersect_models:

            # logger.debug("Comparing Model: {}".format(model_name))
            model = connector.MODELS[model_name]
            table_obj = getattr(AutoBase.classes, model_name)

            table_fields = set(x.name for x in table_obj.__table__.columns)
            table_field_objects = set(x for x in table_obj.__table__.columns)
            table_field_dict = {}
            for field in table_field_objects:
                table_field_dict[field.name] = {'dbname'}

            model_fields = get_all_model_fields(connector, model_name)

            new_fields = model_fields - table_fields
            deleted_fields = table_fields - model_fields
            intersect_fields = table_fields & model_fields

            has_changed = len(new_fields) > 0 or len(deleted_fields) > 0
            if has_changed:
                changed_models.add(model_name)

            # logger.debug("HAS CHANGED: {}".format(has_changed))
            # logger.debug("NEW Fields: {}".format(new_fields))
            # logger.debug("DELETED Fields: {}".format(deleted_fields))
            # logger.debug("MATCHING Fields: {}".format(intersect_fields))

            model_changes[model_name] = {
                'has_changed': has_changed,
                'new_fields': list(new_fields),
                'deleted_fields': list(deleted_fields),
                'intersect_fields': list(intersect_fields)
            }

        logger.info("Models Comparison: {}".format(model_changes))

        return new_models, deleted_models, intersect_models, changed_models, model_changes
示例#19
0
    def update_from_json(self, dataset):

        header = dataset['header']
        schema = header['schema']
        model_name = header['model']

        result = None

        logger.info("Loading DB schema: {}".format(schema))
        # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html
        AutoBase = automap_base()
        AutoBase.prepare(engine=self.engine, schema=schema, reflect=True)
        logger.debug("loading modelObject")
        modelObject = getattr(AutoBase.classes, model_name)

        logger.debug("Opening Session")
        session = self.SessionFactory()
        # This is very important, so the data is inserted in the right schema
        session.connection(
            execution_options={"schema_translate_map": {
                schema: schema
            }})

        logger.info("Saving JSON file to {}".format(self.dbname))
        logger.debug("JSON Header: {}".format(header))

        try:
            for dict_item in dataset['data']:

                id = dict_item['Id']
                objectInstance = session.query(modelObject).filter(
                    modelObject.Id == id).first()

                # if object not found in the db, create it
                if objectInstance is None:
                    logger.debug(
                        "Object {} with ID={} not found in DB. Creating.".
                        format(model_name, id))
                    objectInstance = modelObject(**dict_item)
                    session.add(objectInstance)

                # if already present, update all its fields
                else:
                    logger.debug(
                        "Object {} with ID={} found in DB. Updating.".format(
                            model_name, id))
                    id = dict_item.pop('Id')
                    for key, value in dict_item.items():
                        setattr(objectInstance, key, value)

                logger.debug("inserted record {}".format(dict_item.values()))

            logger.info("Committing...")
            session.commit()
            result = 'committed'

        except Exception as e:
            logger.error("SQL connector update_from_json: {}".format(e))
            session.rollback()
            result = 'rolled back'

        finally:
            session.close()

        return result