def processing_function(raw): """ This is the top-level function for processing data. The function is meant to be passed to the importer (in this case GuiIO). The importer will call this function after it has parsed the raw data. """ kdata = raw # import Registrar's Office Schedule information sdata = kt.importrosched(RO_DATA) # extract schedules for each person and add column to knack data errors = [] for umid in kdata.keys(): person = kdata[umid] lname = person['Name: Last'] # Grab all schedules with this person's last name # (since that's the only info the registrar gives us) try: schedules = sdata[lname] except KeyError: TAB = 20 - len(lname) if TAB < 1: TAB = 1 kdata[umid]['Schedule'] = '' msg = 'Failed to find: \t"'+lname+'"'+' '*TAB+'in department: '+\ person['Department'] errors.append([lname, msg]) print(msg) continue # Choose most likely from all schedules with that last name schedules = choose_schedule(person, schedules) if not schedules: TAB = 20 - len(lname) if TAB < 1: TAB = 1 kdata[umid]['Schedule'] = '' msg = 'Failed to choose:\t"'+lname+'"'+' '*TAB+'in department: '+\ person['Department'] errors.append([lname, msg]) print(msg) continue # Format result for output # print(person['Individual']) hr_schedules = make_human_readable(schedules) # for s in hr_schedules: # print(s) # print('\n') # Add to output data kdata[umid]['Schedule'] = '\n'.join(hr_schedules) print('Number of failures: ' + str(len(errors))) kt.writecsv_summary(errors, ERROR_FILE) return kdata
def processing_function(data): """ This is the top-level function for processing data. The function is meant to be passed to the importer (in this case GuiIO). The importer will call this function after it has parsed the raw data. """ global data_ global newdata global person global query data_ = data newdata = data numremaining = len(data) errors = [] ids = data.keys() tracker = len(ids) # get uniqnames print "Getting affiliations ..." for umid in ids: if not tracker % 10: print "\tnumremaining: %d" % tracker # Don't do anything if person is already listed as retired if newdata[umid]['Employment Status'] == 'Retired': tracker -= 1 continue query = data[umid]['Name: First'] + " " + data[umid]['Name: Last'] result = mc.querydb(query) if result: person, score = mc.choose_person(result, data[umid]) if person: # check affiliation try: if person['affiliation'] == 'Alumni': newdata[umid]['Employment Status'] = 'Retired' except KeyError: errors.append((query, 'No Affiliation: ' + query)) print(errors[-1][1]) # print(person) else: errors.append((query, "Can't choose person: " + query)) print(errors[-1][1]) # print(query) # print(score) else: errors.append((query, "Query Failed: " + query)) print(errors[-1][1]) tracker -= 1 time.sleep(.01) kt.writecsv_summary(errors, OUT_FILE) return newdata
def processing_function(raw): """ This is the top-level function for processing data. The function is meant to be passed to the importer (in this case GuiIO). The importer will call this function after it has parsed the raw data. """ # Sort stewarded & unstewarded depts STEWARDED_ENGIN_DEPTS = set(stewards.keys()) & kt.ENGINEERING_DEPTS UNSTEWARDED_ENGIN_DEPTS = kt.ENGINEERING_DEPTS - STEWARDED_ENGIN_DEPTS STEWARDED_NC_DEPTS = set(stewards.keys()) & kt.NORTH_CAMPUS_DEPTS UNSTEWARDED_NC_DEPTS = kt.NORTH_CAMPUS_DEPTS - STEWARDED_NC_DEPTS ############################## # Filter data in all the ways ############################## actives = kt.filterdata(raw, kt.selectors.allactives) nc = kt.filterdata(actives, kt.selectors.northcampus) engin = kt.filterdata(actives, kt.selectors.engineers) # International Students nc_itnl = kt.filterdata(nc, kt.selectors.itnl) nc_permres = kt.filterdata(nc, kt.selectors.permres) # Stewarded / Unstewarded nc_stewarded= kt.filterdata( nc, lambda person: kt.selectors.bydept(person,STEWARDED_NC_DEPTS) ) nc_unstewarded= kt.filterdata( nc, lambda person: kt.selectors.bydept(person,UNSTEWARDED_NC_DEPTS) ) engin_stewarded= kt.filterdata( engin, lambda person: kt.selectors.bydept(person,STEWARDED_ENGIN_DEPTS) ) engin_unstewarded= kt.filterdata( engin, lambda person: kt.selectors.bydept(person,UNSTEWARDED_ENGIN_DEPTS) ) # Hire Date nc_newhires = kt.filterdata( nc, lambda person: kt.selectors.hiredafter(person,NEW_HIRE_DATE) ) nc_oldhires = kt.filterdata( nc, lambda person: kt.selectors.hiredbefore(person,NEW_HIRE_DATE) ) nc_nohiredate = kt.filterdata(nc, kt.selectors.nohiredate) # Degree Program engin_phd = kt.filterdata( engin, lambda person: kt.selectors.bydegree(person,['PhD']) ) engin_masters = kt.filterdata( engin, lambda person: kt.selectors.bydegree(person, kt.MASTERS) ) ############### # Count things ############### # Unit sizes bargaining_unit_size = len(actives) north_campus_size = len(nc) engineering_size = len(engin) overall_members = kt.count_duespayers(actives) nc_members = kt.count_duespayers(nc) engin_members = kt.count_duespayers(engin) # Number of actives currently stewarded total_stewarded_nc = len(nc_stewarded) total_unstewarded_nc = len(nc_unstewarded) total_stewarded_engin = len(engin_stewarded) total_unstewarded_engin= len(engin_unstewarded) nc_stewarded_members = kt.count_duespayers(nc_stewarded) nc_unstewarded_members = kt.count_duespayers(nc_unstewarded) engin_stewarded_members= kt.count_duespayers(engin_stewarded) engin_unstewarded_members= kt.count_duespayers(engin_unstewarded) # International students total_intl = len(nc_itnl) total_permres = len(nc_permres) intl_members = kt.count_duespayers(nc_itnl) permres_members = kt.count_duespayers(nc_permres) # New Hires total_newhires = len(nc_newhires) total_oldhires = len(nc_oldhires) total_nohiredate= len(nc_nohiredate) newhire_members = kt.count_duespayers(nc_newhires) oldhire_members = kt.count_duespayers(nc_oldhires) # Degree Program total_phd = len(engin_phd) total_masters = len(engin_masters) phd_members = kt.count_duespayers(engin_phd) masters_members = kt.count_duespayers(engin_masters) ###################################### # Derived Results ###################################### labels = [] results= [] labels += ['Current Bargaining Unit Size'] results+= [bargaining_unit_size] labels += ['Relative Size of North Campus (%)'] results+= [(100.0*north_campus_size)/bargaining_unit_size] labels += ['Relative Number of Engineers on NC (%)'] results+= [(100.0*engineering_size)/north_campus_size] labels += ['Relative Number of NC GSIs with >1 Steward (%)'] results+= [(100.0*total_stewarded_nc)/north_campus_size] labels += ['Relative Number of NC International Students (%)'] results+= [(100.0*total_intl)/north_campus_size] # labels += ['Relative Number of NC Permanent Resident Students (%)'] # results+= [(100.0*total_permres)/north_campus_size] labels += [''] results+= [''] labels += ['Overall GEO Membership (%)'] results+= [(100.0*overall_members)/bargaining_unit_size] labels += ['North Campus Membership (%)'] results+= [(100.0*nc_members)/north_campus_size] labels += ['Engineering Membership (%)'] results+= [(100.0*engin_members)/engineering_size] labels += ['Membership Among Stewarded NC Depts (%)'] results+= [(100.0*nc_stewarded_members)/total_stewarded_nc] labels += ['Membership Among Unstewarded NC Depts (%)'] results+= [(100.0*nc_unstewarded_members)/total_unstewarded_nc] labels += [''] results+= [''] labels += ['Relative # of International Students on NC (%)'] results+= [(100.0*total_intl)/north_campus_size] labels += ['Membership Among International Students (%)'] results+= [(100.0*intl_members)/total_intl] # labels += ['Membership Among Permanent Residents (%)'] # results+= [(100.0*permres_members)/total_permres] labels += [''] results+= [''] labels += ['Relative # of New Hires on NC (%)'] results+= [(100.0*total_newhires)/north_campus_size] labels += ['Membership Among New Hires (%)'] results+= [(100.0*newhire_members)/total_newhires] labels += ['Membership Among Old Hires (%)'] results+= [(100.0*oldhire_members)/total_oldhires] labels += ['Number of People w/o Known Hire Dates'] results+= [total_nohiredate] labels += [''] results+= [''] labels += ['Relative # of Masters Students in Engineering (%)'] results+= [(100.0*total_masters)/engineering_size] labels += ['Membership Among Engineering PhDs (%)'] results+= [(100.0*phd_members)/total_phd] labels += ['Membership Among Engineering Masters (%)'] results+= [(100.0*masters_members)/total_masters] labels += [''] results+= [''] # Display summary results print('\n') display_results(labels,results) print('Unstewarded Departments:') for d in UNSTEWARDED_NC_DEPTS: print(d) print('\n') # Print summary results to csv kt.writecsv_summary(zip(labels,results), OUT_FILE) # dump all local variables to file # v = locals() return None
print('Overall Membership:') for term in TERMS: print( str_2col('Membership ' + term + ':', str(memberships[term] * 100) + ' %', 30)) print('\n') print('Unit Turnover:') for term in TERMS: print(str_2col(term + ':', str(nh_percent[term] * 100) + ' %', 30)) print('\n') print('New Hire Membership:') for term in TERMS: print( str_2col('Membership ' + term + ':', str(nh_memberships[term] * 100) + ' %', 30)) # Export results to csv file results = [[ 'Term', 'Overall Membership (%)', 'Turnover (%)', 'New Hire Membership (%)' ]] for term in TERMS: results.append([ term, str(memberships[term] * 100), str(nh_percent[term] * 100), str(nh_memberships[term] * 100) ]) kt.writecsv_summary(results, OUTPATH + 'historical_membership.csv')
def processing_function(raw): """ This is the top-level function for processing data. The function is meant to be passed to the importer (in this case GuiIO). The importer will call this function after it has parsed the raw data. """ global sdata # Only work with small group for now... actives = kt.filterdata(raw, kt.selectors.allactives) # kdata = kt.filterdata( # actives, # lambda person: kt.selectors.bydept(person,depts) # ) kdata = actives # actives = kt.filterdata(raw, kt.selectors.allactives) # engin = kt.filterdata(actives, kt.selectors.engineers) # kdata = kt.filterdata( # engin, # lambda person: kt.selectors.hiredafter(person,NEW_HIRE_DATE) # ) # import Registrar's Office Schedule information sdata = kt.importrosched(RO_DATA_FILE) # extract schedules for each person and add column to knack data errors = [] for umid in kdata.keys(): person = kdata[umid] lname = person['Name: Last'] # Grab all schedules with this person's last name # (since that's the only info the registrar gives us) try: schedules = sdata[lname] except KeyError: TAB = 20 - len(lname) if TAB < 1: TAB = 1 kdata[umid]['Schedule'] = '' msg = 'Failed to find: \t"'+lname+'"'+' '*TAB+'in department: '+\ person['Department'] errors.append([lname, msg]) print(msg) continue # Choose most likely from all schedules with that last name schedules = choose_schedule(person, schedules) if not schedules: TAB = 20 - len(lname) if TAB < 1: TAB = 1 kdata[umid]['Schedule'] = '' msg = 'Failed to choose:\t"'+lname+'"'+' '*TAB+'in department: '+\ person['Department'] errors.append([lname, msg]) print(msg) continue # Not sure how to deal with multiple results right now... if len(schedules) > 1: TAB = 20 - len(lname) if TAB < 1: TAB = 1 msg = 'Multiple schedules for:\t"'+lname+'"'+' '*TAB+'in department: '+\ person['Department'] errors.append([lname, msg]) continue # Add to output data s = schedules[0] days = ''.join( [s['M'], s['T'], s['W'], s['TH'], s['F'], s['S'], s['SU']]) kdata[umid]['Schedule - Days'] = days empty_cols = dict() empty_cols['Days'] = u'' for col in schedules[0].keys(): out_col = 'Schedule - ' + col kdata[umid][out_col] = schedules[0][col] empty_cols[out_col] = u'' # Add empty entries for people that have been skipped for umid in kdata: if not 'Schedule - Course Title' in kdata[umid]: kdata[umid].update(empty_cols) # Don't output anyone we don't have schedule info for kdata_filtered = kt.filterdata( kdata, lambda person: kt.selectors.column_is_empty( person, 'Schedule - Course Title')) print('Number of failures: ' + str(len(errors))) kt.writecsv_summary(errors, ERROR_FILE) return kdata_filtered
def processing_function(data): """ This is the top-level function for processing data. The function is meant to be passed to the importer (in this case GuiIO). The importer will call this function after it has parsed the raw data. """ newdata = data numremaining = len(data) errors = [] ids = data.keys() tracker = len(ids) # get uniqnames print "Getting uniqnames ..." for umid in ids: if not tracker%10: print "\tnumremaining: %d" % tracker # Don't do anything if person has uniqname already if newdata[umid]['Employer Unique Name'] and newdata[umid]['Secondary Email'] and \ newdata[umid]['Secondary Department'] and newdata[umid]['Education']: # Don't forget to add the new columns newdata[umid]['Enrolled Department'] = newdata[umid]['Secondary Department'] newdata[umid]['Degree'] = newdata[umid]['Education'] tracker -= 1 continue query = data[umid]['Name: First']+" "+data[umid]['Name: Last'] result = mc.querydb(query) if result: person, score = mc.choose_person(result,data[umid]) if person: # uniqname & email try: uniq = person['uniqname'] newdata[umid]['Employer Unique Name'] = uniq newdata[umid]['Secondary Email'] = uniq+"@umich.edu" except KeyError: errors.append( (query, 'No Uniqname: '+query) ) print(errors[-1][1]) # enrolled department enrolled_dept, degree = mc.getenrolled(person) newdata[umid]['Enrolled Department'] = enrolled_dept newdata[umid]['Degree'] = degree else: # Don't forget to add the new columns newdata[umid]['Enrolled Department'] = '' newdata[umid]['Degree'] = '' errors.append( (query, "Can't choose person: "+query) ) print(errors[-1][1]) # print(query) # print(score) else: # Don't forget to add the new columns newdata[umid]['Enrolled Department'] = '' newdata[umid]['Degree'] = '' errors.append( (query, "Query Failed: "+query) ) print(errors[-1][1]) tracker -= 1 time.sleep(.01) kt.writecsv_summary(errors, ERROR_FILE) return newdata