Exemplo n.º 1
0
async def main(mytimer: azure.functions.TimerRequest, starter: str):
    
    try:
        orchestrator_name = "F_orchestrator"
        client = df.DurableOrchestrationClient(starter)

        req_params = {
            'trigger': 'scheduled',
            'source': 'prestashop',
            'last_days': '1',
            'model': None,
            'action': 'full'
        }

        req_body = {
            'status': 'TODO'
        }

        orc_input = {
            'params': req_params,
            'body': req_body
        }

        instance_id = await client.start_new(orchestrator_name, None, req_params)

        logger.info(f"Started orchestration with ID = '{instance_id}'.")


    except Exception as e:

        logger.error("F_prestashop_timer :: {}".format(traceback.print_exc()))
Exemplo n.º 2
0
    def apply_changes(self, plan):
        """Applies the changes specified in a given 'plan' JSON file. This approach is pretty much inspired by Terraform, but applied to SQLAlchemy db models :)"""

        returnmsg = ""
        result = {}

        schema = plan['schema']
        to_delete = plan['delete']
        to_create = plan['create']

        deletion = len(to_delete) > 0
        creation = len(to_create) > 0

        if deletion or creation:
            logger.info("DB CHANGE: Applying change plan: {}".format(plan))

            try:

                # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html
                AutoBase.prepare(engine=self.engine,
                                 schema=schema,
                                 reflect=True)

                # if tables need to be dropped, use SQLAlchemy to drop them
                if deletion:
                    # delete_tables = list(x.__table__ for x in AutoBase.classes if x.__table__.name in to_delete)
                    self.delete_tables(schema, to_delete)
                    AutoBase.metadata.clear()

                # if tables need to be (re)-created, create them from the connector's manifest definition
                if creation:
                    self.create_models(schema, to_create)
                    AutoBase.metadata.clear()

                returnmsg = "Successfully applied changes to the DB."
                logger.info(returnmsg)

                result['status'] = 'success'

            except Exception as e:
                returnmsg = "DB CHANGE: Error {}".format(e)
                logger.error(returnmsg)
                result['status'] = 'error'

        else:
            returnmsg = "DB CHANGE: Nothing to change in the current plan. No action will be applied on the db."
            logger.info(returnmsg)
            result['status'] = 'not applied'

        result['message'] = returnmsg
        result['plan'] = plan
        return result
Exemplo n.º 3
0
    def apply_transforms(self, transforms):

        source = transforms['Source']
        table_list = transforms['Tables']
        dataframes = self.load_tables(source, table_list)

        df = None

        for step in transforms['Steps']:

            step_name = step['Step']
            logger.debug("STEP: {}".format(step))

            try:
                logger.info("{}::{} - Executing Step".format(
                    source, step_name))
                operation = step['type']
                params = step['params']
                output_name = step['output']

                # replace the dataframe names by the actual dataframes in the params
                input_name = step['input']
                params['origin_df'] = dataframes[input_name]

                if 'right_input' in step.keys():
                    right_name = step['right_input']
                    params['right_df'] = dataframes[right_name]

                logger.debug("STEP PARAMS: {}".format(params))
                # retrieve the right function to apply and pass the parameters as dict
                function = getattr(self, operation)
                df = function(**params)

                logger.debug(df.head(10))

                # store the output in the buffer_dfs for further chaining
                dataframes[output_name] = df

                if 'save' in step.keys() and (step['save']):
                    logger.info("Saving dataframe {}::{}".format(
                        source, output_name))
                    self.save(df, source, output_name)

            except Exception as e:
                errmsg = "{}::{} error: {}".format(source, step_name, e)
                logger.error(errmsg)
                continue

        return df
Exemplo n.º 4
0
def main(params: dict) -> dict:

    returnStr = ""
    
    try:
        # params = orc_input['params']
        pdconn = PandasSQLConnector.load_default()
        schema = params['source']
        trigger = params['trigger']

        results = {}
  
        initStr = "Extend Data Table operation started. Trigger : {} - Schema: {}".format(trigger,schema)
        logger.info(initStr)

        for filename in os.listdir(TRANSFORMS_DIR):
            
            transform_def = load_conf(filename, subfolder='transforms')
            
            if transform_def['Source'] == schema:
                logger.info("Applying pandas transforms from manifest: {}".format(filename))
                df = pdconn.apply_transforms(transform_def)
                results[filename] = 'applied'
            
            else:
                logger.info("Skipping filtered schema : {}".format(transform_def))
                results[filename] = 'skipped'

        returnStr = "Extend Data Table ended. Results: {}".format(results)
        logger.info(returnStr)

        output_results = {
            'params': params,
            'results': results
        }

    except Exception as e:
        returnStr = '{}'.format(e)
        logger.error(e)

        output_results = {
            'params': params,
            'results': returnStr
        }

    return output_results
Exemplo n.º 5
0
async def main(req: func.HttpRequest, starter: str) -> func.HttpResponse:
    
    try:
        client = df.DurableOrchestrationClient(starter)

        logger.info("request parameters: {}".format(req.params))

        expected_params = [
            'last_days',
            'source',
            'model',
            'action'
        ]

        # req_params = dict(req.params)
        params = {}
        # req_body = req.get_body()
        req_body = {
            'status': 'TODO'
        }

        for key in expected_params:
            params[key] = (req.params[key] if key in req.params.keys() else None)
        
        params['trigger'] = 'http'
        models_raw = params['model']
        params['model'] = (models_raw.split(',') if models_raw else None)

        orc_input = {
            'params': params,
            'body': req_body
        }

        instance_id = await client.start_new(req.route_params["functionName"], None, params)

        logger.info(f"Started orchestration with ID = '{instance_id}'.")

        return client.create_check_status_response(req, instance_id)

    except Exception as e:

        logger.error("F_starter :: {}".format(e))
Exemplo n.º 6
0
def main(params: dict) -> dict:

    result = {}

    try:

        azconn = AzureSQLConnector.load_default()
        schema = params['source']
        schema_list = ([schema] if schema else CONNECTOR_MAP.keys())
        action = params['action']
        models = params['model']

        if action == 'build':
            result = azconn.create_db(schema_list)

        elif action == 'destroy':
            result = azconn.delete_db(schema_list)

        elif action == 'drop':
            result = azconn.delete_tables(schema, models)

        elif action == 'examine':
            for schema in schema_list:
                result[schema] = azconn.plan_changes(schema)

        elif action == 'apply':
            for schema in schema_list:
                plan = azconn.plan_changes(schema)
                result[schema] = azconn.apply_changes(plan)

        else:
            returnMsg = "F_db_activity :: Invalid value provided for 'action' parameter: {}".format(
                action)
            logger.warning(returnMsg)
            result = returnMsg

    except Exception as e:
        returnMsg = 'F_db_activity error :: {}'.format(traceback.print_exc())
        logger.error(returnMsg)
        result = returnMsg

    return result
Exemplo n.º 7
0
def main(params: dict) -> dict:

    returnStr = ""

    try:
        # params = orc_input['params']
        source, last_days, models = format_params(params)
        trigger = params['trigger']
        results = {}

        azconn = AzureSQLConnector.load_default()

        initStr = "Fetch operation started. Trigger: {} Source: {} - Models: {} - LAST_DAYS={}".format(
            trigger, source, models, last_days)
        logger.info(initStr)

        client = get_client(source)

        for model_name in models:
            logger.info('Extracting data from Model: {}'.format(model_name))
            jsonpath, dataset = client.get_data(model_name,
                                                last_days=last_days)
            # push to Azure SQL
            result = azconn.insert_dataset(dataset)
            results[model_name] = result

        returnStr = "Fetch operation ended. Trigger: {} - Source: {} - LAST_DAYS={}\nRESULTS: {}".format(
            trigger, source, last_days, results)
        logger.info(returnStr)
        output_results = {'params': params, 'results': results}

    except Exception as e:
        returnStr = 'F_fetch_data.fetch_data :: {}'.format(
            traceback.print_exc())
        logger.error(e)

        output_results = {'params': params, 'results': returnStr}

    return output_results
Exemplo n.º 8
0
    def update_from_json(self, dataset):

        header = dataset['header']
        schema = header['schema']
        model_name = header['model']

        result = None

        logger.info("Loading DB schema: {}".format(schema))
        # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html
        AutoBase = automap_base()
        AutoBase.prepare(engine=self.engine, schema=schema, reflect=True)
        logger.debug("loading modelObject")
        modelObject = getattr(AutoBase.classes, model_name)

        logger.debug("Opening Session")
        session = self.SessionFactory()
        # This is very important, so the data is inserted in the right schema
        session.connection(
            execution_options={"schema_translate_map": {
                schema: schema
            }})

        logger.info("Saving JSON file to {}".format(self.dbname))
        logger.debug("JSON Header: {}".format(header))

        try:
            for dict_item in dataset['data']:

                id = dict_item['Id']
                objectInstance = session.query(modelObject).filter(
                    modelObject.Id == id).first()

                # if object not found in the db, create it
                if objectInstance is None:
                    logger.debug(
                        "Object {} with ID={} not found in DB. Creating.".
                        format(model_name, id))
                    objectInstance = modelObject(**dict_item)
                    session.add(objectInstance)

                # if already present, update all its fields
                else:
                    logger.debug(
                        "Object {} with ID={} found in DB. Updating.".format(
                            model_name, id))
                    id = dict_item.pop('Id')
                    for key, value in dict_item.items():
                        setattr(objectInstance, key, value)

                logger.debug("inserted record {}".format(dict_item.values()))

            logger.info("Committing...")
            session.commit()
            result = 'committed'

        except Exception as e:
            logger.error("SQL connector update_from_json: {}".format(e))
            session.rollback()
            result = 'rolled back'

        finally:
            session.close()

        return result