def forge_query(self, model, page_size=PAGE_SIZE, count=False): class_scope = None if 'class' in model.keys(): class_scope = model['class'] else: class_scope = 'Resources' base_name = model['base_name'] fieldnames = '' for key in model['fields'].keys(): fieldnames += key + "," #remove trailing comma fieldnames = fieldnames[0:-1] queryStr = "{} | where type =~ '{}' | project {}".format( class_scope, base_name, fieldnames) if count: queryStr += " | count" else: queryStr += " | limit {}".format(page_size) logger.debug("QUERY STR : {}".format(queryStr)) return queryStr
def execute_query(self, query_name, query_conf): model_name, queryPipeline = build_mongo_query(query_conf) collection = self.db[model_name] logger.info("Executing mongo Query on Collection {}: {}".format( model_name, queryPipeline)) results = collection.aggregate(queryPipeline) results_list = list(results) logger.debug(results_list) result_dataset = { "header": { "schema": SCHEMA_NAME, "model": model_name, "query_name": query_name, "query_conf": query_conf, "count": len(results_list), }, "data": results_list } return result_dataset
def get_count(self, model, search_domains=[]): countStr = 'SELECT COUNT(*)' + self.build_domain_query( model, search_domains=search_domains) logger.debug('Applying count query string: {}'.format(countStr)) total_count = self.client.execute(countStr).fetchone()[0] return total_count
def read_query(self, model, search_domains=[], start_row=None): queryStr = self.build_read_query(model, search_domains=search_domains, offset=start_row) logger.debug('Applying the following queryStr: {}'.format(queryStr)) results = self.client.execute(queryStr).fetchall() return results
def plan_changes(self, schema, create_new=True, delete_old=False, alter_changed=True): """Establishes a comparison between the current connector's manifest and the current db schema state, and produces a 'change plan' JSON file""" new, old, matching, changed, changes = self.compare_schema(schema) # Determine all models that need to be dropped from the schema to_delete = None if delete_old and alter_changed: to_delete = {*old, *changed} elif delete_old and not alter_changed: to_delete = old elif alter_changed and not delete_old: to_delete = changed else: to_delete = set() # Determine all models that need to be created, or re-created after deletion to_create = None if create_new and alter_changed: to_create = {*new, *changed} elif create_new and not alter_changed: to_create = new elif alter_changed and not create_new: to_create = changed else: to_create = set() # If relevant, add the detailed changes that need to occur on modified models changes_detail = {} for key, value in changes.items(): if value['has_changed']: changes_detail[key] = value plan = { "schema": schema, "delete": list(to_delete), "create": list(to_create), "changes_detail": changes_detail } logger.debug("CHANGE PLAN FOR SCHEMA {}: {}".format(schema, plan)) if DUMP_JSON: json_dump(plan, schema, 'DB_CHANGE_PLAN') return plan
def create_ORM_class(schema, model_name, model, unpack={}): """Constructs an sqlAlchemy ORM class definition on a declarative Base, that corresponds to a given model definition""" logger.debug("START CREATION of ORM class {}. Schema: {}".format( model_name, schema)) construct = { '__tablename__': model_name, '__table_args__': { 'schema': schema } } for field_name, field in model['fields'].items(): field_construct = construct_field(field_name, field, unpack) construct.update(field_construct) # logger.debug("SQLAlchemy Construct: {}".format(construct)) ORMClass = type(model_name, (AutoBase, ), construct) return ORMClass
def apply_transforms(self, transforms): source = transforms['Source'] table_list = transforms['Tables'] dataframes = self.load_tables(source, table_list) df = None for step in transforms['Steps']: step_name = step['Step'] logger.debug("STEP: {}".format(step)) try: logger.info("{}::{} - Executing Step".format( source, step_name)) operation = step['type'] params = step['params'] output_name = step['output'] # replace the dataframe names by the actual dataframes in the params input_name = step['input'] params['origin_df'] = dataframes[input_name] if 'right_input' in step.keys(): right_name = step['right_input'] params['right_df'] = dataframes[right_name] logger.debug("STEP PARAMS: {}".format(params)) # retrieve the right function to apply and pass the parameters as dict function = getattr(self, operation) df = function(**params) logger.debug(df.head(10)) # store the output in the buffer_dfs for further chaining dataframes[output_name] = df if 'save' in step.keys() and (step['save']): logger.info("Saving dataframe {}::{}".format( source, output_name)) self.save(df, source, output_name) except Exception as e: errmsg = "{}::{} error: {}".format(source, step_name, e) logger.error(errmsg) continue return df
def update_from_json(self, dataset): header = dataset['header'] schema = header['schema'] model_name = header['model'] result = None logger.info("Loading DB schema: {}".format(schema)) # for documentation on this : refer to https://docs.sqlalchemy.org/en/14/orm/extensions/automap.html AutoBase = automap_base() AutoBase.prepare(engine=self.engine, schema=schema, reflect=True) logger.debug("loading modelObject") modelObject = getattr(AutoBase.classes, model_name) logger.debug("Opening Session") session = self.SessionFactory() # This is very important, so the data is inserted in the right schema session.connection( execution_options={"schema_translate_map": { schema: schema }}) logger.info("Saving JSON file to {}".format(self.dbname)) logger.debug("JSON Header: {}".format(header)) try: for dict_item in dataset['data']: id = dict_item['Id'] objectInstance = session.query(modelObject).filter( modelObject.Id == id).first() # if object not found in the db, create it if objectInstance is None: logger.debug( "Object {} with ID={} not found in DB. Creating.". format(model_name, id)) objectInstance = modelObject(**dict_item) session.add(objectInstance) # if already present, update all its fields else: logger.debug( "Object {} with ID={} found in DB. Updating.".format( model_name, id)) id = dict_item.pop('Id') for key, value in dict_item.items(): setattr(objectInstance, key, value) logger.debug("inserted record {}".format(dict_item.values())) logger.info("Committing...") session.commit() result = 'committed' except Exception as e: logger.error("SQL connector update_from_json: {}".format(e)) session.rollback() result = 'rolled back' finally: session.close() return result