def __init__( self, sesManager ): self.sesManager = sesManager hashed = hashThisList( [ "An ETL process run.", datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) ] ) self.runManager = EtlProcessManager( source_hash= hashed, created_at= datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ), started_at= datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ), ) self.commitThis()
def processData(srcDepartment, sesTarget): """ Takes in a source Department object from biopsmodels (mysql.bio_ps.Departments) and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.Departments), or that nothing needs doing. Selecting Booleans from the databases. Using conjunctions to make the exists() a boolean return from the query() method. Bit more syntax but a sqlalchemy object returned will not be truthy/falsey. (http://techspot.zzzeek.org/2008/09/09/selecting-booleans/) """ true, false = literal(True), literal(False) recordToList = [ srcDepartment.deptid, srcDepartment.effdt, srcDepartment.eff_status, srcDepartment.descr, srcDepartment.descrshort, srcDepartment.location, srcDepartment.budget_deptid ] srcHash = hashThisList(recordToList) def departmentExists(): """determine the department exists in the target database.""" (ret, ), = sesTarget.query( exists().where(Departments.deptid == srcDepartment.deptid)) return ret if departmentExists(): def departmentUpdateRequired(): """ Determine if the department that exists requires and update. @True: returned if source_hash is unchanged @False: returned if source_hash is different """ (ret, ), = sesTarget.query(exists().where( Departments.deptid == srcDepartment.deptid).where( Departments.source_hash == srcHash).where( Departments.deleted_at.is_(None))) return not ret if departmentUpdateRequired(): updateDepartment = sesTarget.query(Departments).filter( Departments.deptid == srcDepartment.deptid).one() updateDepartment.source_hash = srcHash updateDepartment.deptid = srcDepartment.deptid updateDepartment.effdt = srcDepartment.effdt updateDepartment.eff_status = srcDepartment.eff_status updateDepartment.descr = srcDepartment.descr updateDepartment.descrshort = srcDepartment.descrshort updateDepartment.location = srcDepartment.location updateDepartment.budget_deptid = srcDepartment.budget_deptid updateDepartment.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') updateDepartment.deleted_at = None return updateDepartment else: insertDepartment = Departments( source_hash=srcHash, deptid=srcDepartment.deptid, effdt=srcDepartment.effdt, eff_status=srcDepartment.eff_status, descr=srcDepartment.descr, descrshort=srcDepartment.descrshort, location=srcDepartment.location, budget_deptid=srcDepartment.budget_deptid, created_at=datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S')) return insertDepartment
def processData(srcJob, sesTarget): """ Takes in a source Job object from biopsmodels (mysql.bio_ps.JobCodes) and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.JobCodes), or that nothing needs doing. Selecting Booleans from the databases. Using conjunctions to make the exists() a boolean return from the query() method. Bit more syntax but a sqlalchemy object returned will not be truthy/falsey. (http://techspot.zzzeek.org/2008/09/09/selecting-booleans/) """ true, false = literal(True), literal(False) jobList = [ srcJob.jobcode, srcJob.effdt, srcJob.setid, srcJob.src_sys_id, srcJob.eff_status, srcJob.descr, srcJob.descrshort, srcJob.setid_salary, srcJob.sal_admin_plan, srcJob.grade, srcJob.manager_level, srcJob.job_family, srcJob.flsa_status ] srcHash = hashThisList(jobList) def jobExists(): """ determine the job exists in the target database. @True: The job exists in the database @False: The job does not exist in the database """ (ret, ), = sesTarget.query( exists().where(JobCodes.jobcode == srcJob.jobcode)) return ret if jobExists(): def jobUpdateRequired(): """ Determine if the job that exists requires and update. @True: returned if source_hash is unchanged @False: returned if source_hash is different """ (ret, ), = sesTarget.query( exists().where(JobCodes.jobcode == srcJob.jobcode).where( JobCodes.source_hash == srcHash).where( JobCodes.deleted_at.is_(None))) return not ret if jobUpdateRequired(): # retrive the tables object to update. updateJob = sesTarget.query(JobCodes).filter( JobCodes.jobcode == srcJob.jobcode).one() # repeat the following pattern for all mapped attributes: updateJob.source_hash = srcHash updateJob.jobcode = srcJob.jobcode updateJob.effdt = srcJob.effdt updateJob.setid = srcJob.setid updateJob.src_sys_id = srcJob.src_sys_id updateJob.eff_status = srcJob.eff_status updateJob.descr = srcJob.descr updateJob.descrshort = srcJob.descrshort updateJob.setid_salary = srcJob.setid_salary updateJob.sal_admin_plan = srcJob.sal_admin_plan updateJob.grade = srcJob.grade updateJob.manager_level = srcJob.manager_level updateJob.job_family = srcJob.job_family updateJob.flsa_status = srcJob.flsa_status updateJob.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') updateJob.deleted_at = None return updateJob else: insertJob = JobCodes(source_hash=srcHash, jobcode=srcJob.jobcode, effdt=srcJob.effdt, setid=srcJob.setid, src_sys_id=srcJob.src_sys_id, eff_status=srcJob.eff_status, descr=srcJob.descr, descrshort=srcJob.descrshort, setid_salary=srcJob.setid_salary, sal_admin_plan=srcJob.sal_admin_plan, grade=srcJob.grade, manager_level=srcJob.manager_level, job_family=srcJob.job_family, flsa_status=srcJob.flsa_status, created_at=datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S')) return insertJob
def processData( srcPerson, sesTarget ): """ Takes in a source person object from the asudw as a AsuDwPsPerson object and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.people), or that nothing needs doing, but each source record will have an action in the target database via the updated_flag. """ personList = [ srcPerson.emplid, srcPerson.asurite_id, srcPerson.asu_id, srcPerson.ferpa, srcPerson.last_name, srcPerson.first_name, srcPerson.middle_name, srcPerson.display_name, srcPerson.preferred_first_name, srcPerson.affiliations, srcPerson.email_address, srcPerson.eid, srcPerson.birthdate, srcPerson.last_update ] srcHash = hashThisList( personList ) def getTargetRecords(): """Returns a record set from the target database.""" """ Determine the person exists in the target database. @True: The person exists in the database @False: The person does not exist in the database """ ret = sesTarget.query( People ).filter( People.emplid == srcPerson.emplid ).all() return ret tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] tgtRecord.source_hash = srcHash tgtRecord.updated_flag = True tgtRecord.emplid = srcPerson.emplid tgtRecord.asurite_id = srcPerson.asurite_id tgtRecord.asu_id = srcPerson.asu_id tgtRecord.ferpa = srcPerson.ferpa tgtRecord.last_name = srcPerson.last_name tgtRecord.first_name = srcPerson.first_name tgtRecord.middle_name = srcPerson.middle_name tgtRecord.display_name = srcPerson.display_name tgtRecord.preferred_first_name = srcPerson.preferred_first_name tgtRecord.affiliations = srcPerson.affiliations tgtRecord.email_address = srcPerson.email_address tgtRecord.eid = srcPerson.eid tgtRecord.birthdate = srcPerson.birthdate tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) tgtRecord.deleted_at = None return tgtRecord else: # person wasn't in the target databases, add them now if srcPerson.emplid >= 2147483647L: raise ValueError insertPerson = People( source_hash = srcHash, updated_flag = True, emplid = srcPerson.emplid, asurite_id = srcPerson.asurite_id, asu_id = srcPerson.asu_id, ferpa = srcPerson.ferpa, last_name = srcPerson.last_name, first_name = srcPerson.first_name, middle_name = srcPerson.middle_name, display_name = srcPerson.display_name, preferred_first_name = srcPerson.preferred_first_name, affiliations = srcPerson.affiliations, email_address = srcPerson.email_address, eid = srcPerson.eid, birthdate = srcPerson.birthdate, created_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) ) return insertPerson
def processData( srcSubAffiliation, sesTarget ): """ Takes in a source SubAffiliation object from biopsmodels (mysql.bio_ps.SubAffiliations) and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.SubAffiliations), or that nothing needs doing. Selecting Booleans from the databases. Using conjunctions to make the exists() a boolean return from the query() method. Bit more syntax but a sqlalchemy object returned will not be truthy/falsey. (http://techspot.zzzeek.org/2008/09/09/selecting-booleans/) """ subAffiliationList = [ srcSubAffiliation.code, srcSubAffiliation.title, srcSubAffiliation.description, srcSubAffiliation.proximity_scope, srcSubAffiliation.service_access, srcSubAffiliation.distribution_lists ] srcHash = hashThisList( subAffiliationList ) def getTargetRecords(): """Returns a record set from the target database""" ret = sesTarget.query( SubAffiliations ).filter( SubAffiliations.code == srcSubAffiliation.code ).filter( SubAffiliations.updated_flag == False ).all() return ret tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] tgtRecord.updated_flag = True tgtRecord.source_hash = srcHash # list of the fields that will be updated... tgtRecord.code = srcSubAffiliation.code tgtRecord.title = srcSubAffiliation.title tgtRecord.description = srcSubAffiliation.description tgtRecord.proximity_scope = srcSubAffiliation.proximity_scope tgtRecord.service_access = srcSubAffiliation.service_access tgtRecord.distribution_lists = srcSubAffiliation.distribution_lists tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) tgtRecord.deleted_at = None return tgtRecord else: insertSubAffiliation = SubAffiliations( updated_flag = True, source_hash = srcHash, code = srcSubAffiliation.code, title = srcSubAffiliation.title, description = srcSubAffiliation.description, proximity_scope = srcSubAffiliation.proximity_scope, service_access = srcSubAffiliation.service_access, distribution_lists = srcSubAffiliation.distribution_lists, created_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) ) return insertSubAffiliation
def processData(srcJobLog, sesTarget): """ Takes in a source JobLog object from biopsmodels (mysql.bio_ps.JobsLog) and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.JobsLog), or that nothing needs doing. Selecting Booleans from the databases. Using conjunctions to make the exists() a boolean return from the query() method. Bit more syntax but a sqlalchemy object returned will not be truthy/falsey. (http://techspot.zzzeek.org/2008/09/09/selecting-booleans/) """ jobLogList = [ srcJobLog.emplid, srcJobLog.deptid, srcJobLog.jobcode, srcJobLog.supervisor_id, srcJobLog.reports_to, srcJobLog.main_appt_num_jpn, srcJobLog.effdt, srcJobLog.action, srcJobLog.action_reason, srcJobLog.action_dt, srcJobLog.job_entry_dt, srcJobLog.dept_entry_dt, srcJobLog.position_entry_dt, srcJobLog.hire_dt, srcJobLog.last_hire_dt, srcJobLog.termination_dt ] srcHash = hashThisList(jobLogList) def getTargetRecords(): """ determine the jobLog exists in the target database. @return recordset of jobLog objects """ ret = sesTarget.query(JobsLog).filter( JobsLog.emplid == srcJobLog.emplid).filter( JobsLog.deptid == srcJobLog.deptid).filter( JobsLog.jobcode == srcJobLog.jobcode).filter( JobsLog.effdt == srcJobLog.effdt).filter( JobsLog.action == srcJobLog.action).filter( JobsLog.action_reason == srcJobLog.action_reason).filter( JobsLog.updated_flag == False).all() return ret tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] # repeat the following pattern for all mapped attributes: tgtRecord.source_hash = srcHash tgtRecord.emplid = srcJobLog.emplid tgtRecord.deptid = srcJobLog.deptid tgtRecord.jobcode = srcJobLog.jobcode tgtRecord.supervisor_id = srcJobLog.supervisor_id tgtRecord.reports_to = srcJobLog.reports_to tgtRecord.main_appt_num_jpn = srcJobLog.main_appt_num_jpn tgtRecord.effdt = srcJobLog.effdt tgtRecord.action = srcJobLog.action tgtRecord.action_reason = srcJobLog.action_reason tgtRecord.action_dt = srcJobLog.action_dt tgtRecord.job_entry_dt = srcJobLog.job_entry_dt tgtRecord.dept_entry_dt = srcJobLog.dept_entry_dt tgtRecord.position_entry_dt = srcJobLog.position_entry_dt tgtRecord.hire_dt = srcJobLog.hire_dt tgtRecord.last_hire_dt = srcJobLog.last_hire_dt tgtRecord.termination_dt = srcJobLog.termination_dt tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') tgtRecord.deleted_at = None return tgtRecord else: try: srcGetPersonId = sesTarget.query( People.id).filter(People.emplid == srcJobLog.emplid).one() except NoResultFound as e: raise e else: srcGetDepartmentId = sesTarget.query(Departments.id).filter( Departments.deptid == srcJobLog.deptid).one() srcGetJobId = sesTarget.query(JobCodes.id).filter( JobCodes.jobcode == srcJobLog.jobcode).one() insertJobLog = JobsLog( source_hash=srcHash, person_id=srcGetPersonId.id, department_id=srcGetDepartmentId.id, job_id=srcGetJobId.id, emplid=srcJobLog.emplid, deptid=srcJobLog.deptid, jobcode=srcJobLog.jobcode, supervisor_id=srcJobLog.supervisor_id, reports_to=srcJobLog.reports_to, main_appt_num_jpn=srcJobLog.main_appt_num_jpn, effdt=srcJobLog.effdt, action=srcJobLog.action, action_reason=srcJobLog.action_reason, action_dt=srcJobLog.action_dt, job_entry_dt=srcJobLog.job_entry_dt, dept_entry_dt=srcJobLog.dept_entry_dt, position_entry_dt=srcJobLog.position_entry_dt, hire_dt=srcJobLog.hire_dt, last_hire_dt=srcJobLog.last_hire_dt, termination_dt=srcJobLog.termination_dt, created_at=datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S')) return insertJobLog
def processData(srcPersonSubAffiliation, sesTarget): """ Takes in a source PersonSubAffiliation object from biopsmodels (mysql.bio_ps.PersonSubAffiliations) and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.PersonSubAffiliations), or that nothing needs doing. Selecting Booleans from the databases. Using conjunctions to make the exists() a boolean return from the query() method. Bit more syntax but a sqlalchemy object returned will not be truthy/falsey. (http://techspot.zzzeek.org/2008/09/09/selecting-booleans/) """ recordToList = [ srcPersonSubAffiliation.emplid, srcPersonSubAffiliation.deptid, srcPersonSubAffiliation.subaffiliation_code, srcPersonSubAffiliation.campus, srcPersonSubAffiliation.title, srcPersonSubAffiliation.short_description, srcPersonSubAffiliation.description, srcPersonSubAffiliation.directory_publish, srcPersonSubAffiliation.department, srcPersonSubAffiliation.last_update, srcPersonSubAffiliation.department_directory ] srcHash = hashThisList(recordToList) def getSubAffiliations(): return sesTarget.query( SubAffiliations.id, SubAffiliations.display_title).filter( SubAffiliations.code == srcPersonSubAffiliation.subaffiliation_code).first() def getTargetRecords(): """ determine the personSubAffiliation exists in the target database. @True: The personSubAffiliation exists in the database @False: The personSubAffiliation does not exist in the database """ ret = sesTarget.query(PersonSubAffiliations).filter( PersonSubAffiliations.emplid == srcPersonSubAffiliation.emplid ).filter(PersonSubAffiliations.deptid == srcPersonSubAffiliation.deptid).filter( PersonSubAffiliations.subaffiliation_code == srcPersonSubAffiliation.subaffiliation_code).filter( PersonSubAffiliations.updated_flag == False).all() return ret tgtRecords = getTargetRecords() srcGetSubAffiliationId = getSubAffiliations() if srcGetSubAffiliationId: srcSubaffiliationId = srcGetSubAffiliationId.id srcSubaffiliationDisplayTitle = srcGetSubAffiliationId.display_title else: srcSubaffiliationId = None srcSubaffiliationDisplayTitle = srcPersonSubAffiliation.description if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None tgtRecord.subaffiliation_id = srcSubaffiliationId tgtRecord.bio_override_description = srcSubaffiliationDisplayTitle return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] tgtRecord.source_hash = srcHash tgtRecord.updated_flag = True tgtRecord.emplid = srcPersonSubAffiliation.emplid tgtRecord.deptid = srcPersonSubAffiliation.deptid tgtRecord.subaffiliation_id = srcSubaffiliationId tgtRecord.bio_override_description = srcSubaffiliationDisplayTitle tgtRecord.subaffiliation_code = srcPersonSubAffiliation.subaffiliation_code tgtRecord.campus = srcPersonSubAffiliation.campus tgtRecord.title = srcPersonSubAffiliation.title tgtRecord.short_description = srcPersonSubAffiliation.short_description tgtRecord.description = srcPersonSubAffiliation.description tgtRecord.directory_publish = srcPersonSubAffiliation.directory_publish tgtRecord.department = srcPersonSubAffiliation.department tgtRecord.department_directory = srcPersonSubAffiliation.department_directory tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') tgtRecord.deleted_at = None return tgtRecord else: srcGetPersonId = sesTarget.query(People.id).filter( People.emplid == srcPersonSubAffiliation.emplid).one() srcGetDepartmentId = sesTarget.query(Departments.id).filter( Departments.deptid == srcPersonSubAffiliation.deptid).one() if srcGetPersonId and srcGetDepartmentId: insertPersonSubAffiliation = PersonSubAffiliations( person_id=srcGetPersonId.id, department_id=srcGetDepartmentId.id, updated_flag=True, subaffiliation_id=srcSubaffiliationId, source_hash=srcHash, emplid=srcPersonSubAffiliation.emplid, deptid=srcPersonSubAffiliation.deptid, subaffiliation_code=srcPersonSubAffiliation. subaffiliation_code, campus=srcPersonSubAffiliation.campus, title=srcPersonSubAffiliation.title, short_description=srcPersonSubAffiliation.short_description, bio_override_description=srcSubaffiliationDisplayTitle, description=srcPersonSubAffiliation.description, directory_publish=srcPersonSubAffiliation.directory_publish, department=srcPersonSubAffiliation.department, department_directory=srcPersonSubAffiliation. department_directory, created_at=datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S')) return insertPersonSubAffiliation
def processData(srcPersonPhone, sesTarget): """ Takes in a source AsuDwPsPhone object from the asudw database and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.person_phones), or that nothing needs doing, but each source record will have an action in the target database via the updated_flag. """ def getTargetRecords(): """Returns a record set from the target database.""" ret = sesTarget.query(Phones).filter( Phones.emplid == srcPersonPhone.emplid ).filter(Phones.phone_type == srcPersonPhone.phone_type).filter( Phones.phone == cleanPhoneNumber(srcPersonPhone.phone)).filter( Phones.updated_flag == False).all() return ret recordToList = [ srcPersonPhone.emplid, srcPersonPhone.phone_type, srcPersonPhone.phone, srcPersonPhone.last_update ] srcHash = hashThisList(recordToList) tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] tgtRecord.source_hash = srcHash tgtRecord.updated_flag = True tgtRecord.phone = cleanPhoneNumber(srcPersonPhone.phone) tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') tgtRecord.deleted_at = None return tgtRecord else: srcGetPersonId = sesTarget.query( People.id).filter(People.emplid == srcPersonPhone.emplid).one() insertPhone = Phones(person_id=srcGetPersonId.id, updated_flag=True, source_hash=srcHash, emplid=srcPersonPhone.emplid, phone_type=srcPersonPhone.phone_type, phone=cleanPhoneNumber(srcPersonPhone.phone), created_at=datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S')) return insertPhone
def processData(srcPersonAddress, sesTarget): """ Determine what processing action is required for a person address record selected from the source database. Using the target database as the test, a record will be either returned as ingored record (no changes), insert new object, or update an existing object. Selecting Booleans from the databases. Using conjunctions to make the exists() a boolean return from the query() method. Bit more syntax but a sqlalchemy object returned will not be truthy/falsey. (http://techspot.zzzeek.org/2008/09/09/selecting-booleans/) """ recordToList = [ srcPersonAddress.emplid, srcPersonAddress.address_type, srcPersonAddress.address1, srcPersonAddress.address2, srcPersonAddress.address3, srcPersonAddress.address4, srcPersonAddress.city, srcPersonAddress.state, srcPersonAddress.postal, srcPersonAddress.country_code, srcPersonAddress.country_descr, srcPersonAddress.last_update ] srcHash = hashThisList(recordToList) def getTargetRecords(): """ Determine the person address exists in the target database. @True: The person address exists and requires update checks @False: The person doesn't exist and will prepare for insert """ ret = sesTarget.query(Addresses).filter( Addresses.emplid == srcPersonAddress.emplid).filter( Addresses.address_type == srcPersonAddress.address_type ).filter(Addresses.address1 == srcPersonAddress.address1).filter( Addresses.city == srcPersonAddress.city).filter( Addresses.postal == srcPersonAddress.postal).filter( Addresses.updated_flag == False).all() return ret tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] # the record in the target database that will be updated. tgtRecord.source_hash = srcHash tgtRecord.updated_flag = True tgtRecord.address1 = srcPersonAddress.address1 tgtRecord.address2 = srcPersonAddress.address2 tgtRecord.address3 = srcPersonAddress.address3 tgtRecord.address4 = srcPersonAddress.address4 tgtRecord.city = srcPersonAddress.city tgtRecord.state = srcPersonAddress.state tgtRecord.postal = srcPersonAddress.postal tgtRecord.country_code = srcPersonAddress.country_code tgtRecord.country_descr = srcPersonAddress.country_descr # update the bio timestamp tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') tgtRecord.deleted_at = None # add the object to the session to commit the updated return tgtRecord else: getPersonId = sesTarget.query( People.id).filter(People.emplid == srcPersonAddress.emplid).one() insertPersonAddress = Addresses( person_id=getPersonId.id, source_hash=srcHash, updated_flag=True, emplid=srcPersonAddress.emplid, address_type=srcPersonAddress.address_type, address1=srcPersonAddress.address1, address2=srcPersonAddress.address2, address3=srcPersonAddress.address3, address4=srcPersonAddress.address4, city=srcPersonAddress.city, state=srcPersonAddress.state, postal=srcPersonAddress.postal, country_code=srcPersonAddress.country_code, country_descr=srcPersonAddress.country_descr, created_at=datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S')) return insertPersonAddress
def processData( srcPersonWebProfile, sesTarget ): """ Takes in a source PersonWebProfile object from the asudw as a person record and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.PersonWebProfile), or that nothing needs doing, but each source record will have an action in the target database via the updated_flag. """ personWebProfileList = [ srcPersonWebProfile.emplid, srcPersonWebProfile.bio, srcPersonWebProfile.research_interests, srcPersonWebProfile.cv, srcPersonWebProfile.website, srcPersonWebProfile.teaching_website, srcPersonWebProfile.grad_faculties, srcPersonWebProfile.professional_associations, srcPersonWebProfile.work_history, srcPersonWebProfile.education, srcPersonWebProfile.research_group, srcPersonWebProfile.research_website, srcPersonWebProfile.honors_awards, srcPersonWebProfile.editorships, srcPersonWebProfile.presentations ] if any( personWebProfileList[1:] ): srcHash = hashThisList( personWebProfileList ) def getTargetRecords(): """Returns a record set from the target database.""" ret = sesTarget.query( PersonWebProfile ).filter( PersonWebProfile.emplid == srcPersonWebProfile.emplid ).filter( PersonWebProfile.updated_flag == False ).all() return ret tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] # repeat the following pattern for all mapped attributes: tgtRecord.source_hash = srcHash tgtRecord.updated_flag = True tgtRecord.emplid = srcPersonWebProfile.emplid tgtRecord.bio = srcPersonWebProfile.bio tgtRecord.research_interests = srcPersonWebProfile.research_interests tgtRecord.cv = srcPersonWebProfile.cv tgtRecord.website = srcPersonWebProfile.website tgtRecord.teaching_website = srcPersonWebProfile.teaching_website tgtRecord.grad_faculties = srcPersonWebProfile.grad_faculties tgtRecord.professional_associations = srcPersonWebProfile.professional_associations tgtRecord.work_history = srcPersonWebProfile.work_history tgtRecord.education = srcPersonWebProfile.education tgtRecord.research_group = srcPersonWebProfile.research_group tgtRecord.research_website = srcPersonWebProfile.research_website tgtRecord.honors_awards = srcPersonWebProfile.honors_awards tgtRecord.editorships = srcPersonWebProfile.editorships tgtRecord.presentations = srcPersonWebProfile.presentations tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) tgtRecord.deleted_at = None return tgtRecord else: srcGetPersonId = sesTarget.query( People.id ).filter( People.emplid == srcPersonWebProfile.emplid ).one() insertPersonWebProfile = PersonWebProfile( person_id = srcGetPersonId.id, # person_id = person.id, source_hash = srcHash, updated_flag = True, emplid = srcPersonWebProfile.emplid, bio = srcPersonWebProfile.bio, research_interests = srcPersonWebProfile.research_interests, cv = srcPersonWebProfile.cv, website = srcPersonWebProfile.website, teaching_website = srcPersonWebProfile.teaching_website, grad_faculties = srcPersonWebProfile.grad_faculties, professional_associations = srcPersonWebProfile.professional_associations, work_history = srcPersonWebProfile.work_history, education = srcPersonWebProfile.education, research_group = srcPersonWebProfile.research_group, research_website = srcPersonWebProfile.research_website, honors_awards = srcPersonWebProfile.honors_awards, editorships = srcPersonWebProfile.editorships, presentations = srcPersonWebProfile.presentations, created_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) ) return insertPersonWebProfile else: return None
def processData( srcJob, sesTarget ): """ Takes in a source Job object from biopsmodels (mysql.bio_ps.Jobs) and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.Jobs), or that nothing needs doing. Selecting Booleans from the databases. Using conjunctions to make the exists() a boolean return from the query() method. Bit more syntax but a sqlalchemy object returned will not be truthy/falsey. (http://techspot.zzzeek.org/2008/09/09/selecting-booleans/) """ recordToList = [ srcJob.emplid, srcJob.empl_rcd, srcJob.title, srcJob.department, srcJob.mailcode, srcJob.empl_class, srcJob.job_indicator, srcJob.location, srcJob.hr_status, srcJob.deptid, srcJob.empl_status, srcJob.fte, srcJob.last_update, srcJob.department_directory ] srcHash = hashThisList( recordToList ) def getTargetRecords(): """ determine the job exists in the target database. @True: The job exists in the database @False: The job does not exist in the database """ ret = sesTarget.query( Jobs ).filter( Jobs.emplid == srcJob.emplid ).filter( Jobs.title == srcJob.title ).filter( Jobs.deptid == srcJob.deptid ).filter( Jobs.job_indicator == srcJob.job_indicator).filter( Jobs.updated_flag == False ).all() return ret tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] # repeat the following pattern for all mapped attributes: tgtRecord.source_hash = srcHash tgtRecord.updated_flag = True tgtRecord.emplid = srcJob.emplid tgtRecord.empl_rcd = srcJob.empl_rcd tgtRecord.title = srcJob.title tgtRecord.department = srcJob.department tgtRecord.mailcode = srcJob.mailcode tgtRecord.empl_class = srcJob.empl_class tgtRecord.job_indicator = srcJob.job_indicator tgtRecord.location = srcJob.location tgtRecord.hr_status = srcJob.hr_status tgtRecord.deptid = srcJob.deptid tgtRecord.empl_status = srcJob.empl_status tgtRecord.fte = srcJob.fte tgtRecord.department_directory = srcJob.department_directory tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) tgtRecord.deleted_at = None return tgtRecord else: # get the ids required to maintain relationships # THIS IS WHERE WE WOULD CATCH Mid Pull new people.... srcGetPersonId = sesTarget.query( People.id ).filter( People.emplid == srcJob.emplid ).one() srcGetDepartmentId = sesTarget.query( Departments.id ).filter( Departments.deptid == srcJob.deptid ).one() insertJob = Jobs( person_id = srcGetPersonId.id, department_id = srcGetDepartmentId.id, updated_flag = True, source_hash = srcHash, emplid = srcJob.emplid, empl_rcd = srcJob.empl_rcd, title = srcJob.title, department = srcJob.department, mailcode = srcJob.mailcode, empl_class = srcJob.empl_class, job_indicator = srcJob.job_indicator, location = srcJob.location, hr_status = srcJob.hr_status, deptid = srcJob.deptid, empl_status = srcJob.empl_status, fte = srcJob.fte, department_directory = srcJob.department_directory, created_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S' ) ) return insertJob
def processData(srcPersonExternalLink, sesTarget): """ Takes in a source PersonExternalLink object from the asudw as a person record and determines if the object needs to be updated, inserted in the target database (mysql.bio_public.PersonExternalLinks), or that nothing needs doing, but each source record will have an action in the target database via the updated_flag. """ recordToList = [ srcPersonExternalLink.emplid, srcPersonExternalLink.facebook, srcPersonExternalLink.twitter, srcPersonExternalLink.google_plus, srcPersonExternalLink.linkedin ] if any(recordToList[1:]): srcHash = hashThisList(recordToList) def getTargetRecords(): """ determine the personExternalLink exists in the target database. @True: The personExternalLink exists in the database @False: The personExternalLink does not exist in the database """ ret = sesTarget.query(PersonExternalLinks).filter( PersonExternalLinks.emplid == srcPersonExternalLink.emplid ).filter(PersonExternalLinks.updated_flag == False).all() return ret tgtRecords = getTargetRecords() if tgtRecords: """ If true then an update is required, else an insert is required @True: Because there might be many recornds returned from the db, a loop is required. Trying not to update the data if it is not required, but the source data will require an action. @Else Block (NO BREAK REACHED): If the condition is not reached in the for block the else block will insure that a record is updated. It might not update the record that was initially created previously, but all source data has to be represented in the target database. @False: insert the new data from the source database. """ for tgtRecord in tgtRecords: if tgtRecord.source_hash == srcHash: tgtRecord.updated_flag = True tgtRecord.deleted_at = None return tgtRecord break else: # NO BREAK REACHED tgtRecord = tgtRecords[0] # repeat the following pattern for all mapped attributes: tgtRecord.source_hash = srcHash tgtRecord.updated_flag = True tgtRecord.emplid = srcPersonExternalLink.emplid tgtRecord.facebook = srcPersonExternalLink.facebook tgtRecord.twitter = srcPersonExternalLink.twitter tgtRecord.google_plus = srcPersonExternalLink.google_plus tgtRecord.linkedin = srcPersonExternalLink.linkedin tgtRecord.updated_at = datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') tgtRecord.deleted_at = None return tgtRecord else: srcGetPersonId = sesTarget.query(People.id).filter( People.emplid == srcPersonExternalLink.emplid).first() insertPersonExternalLink = PersonExternalLinks( source_hash=srcHash, updated_flag=True, person_id=srcGetPersonId.id, emplid=srcPersonExternalLink.emplid, facebook=srcPersonExternalLink.facebook, twitter=srcPersonExternalLink.twitter, google_plus=srcPersonExternalLink.google_plus, linkedin=srcPersonExternalLink.linkedin, created_at=datetime.datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S')) return insertPersonExternalLink else: return None