def test_uni_parse(self): X = get_test_universities() institutions = parse_institution_records(X) self.assertEqual(institutions['Yale University']['Region'], 'Northeast') self.assertEqual(institutions['Princeton University']['NRC95'], 6) self.assertEqual(institutions['Carnegie Mellon University']['USN2010'], 1) self.assertEqual(institutions['Harvard University']['pi'], 6.12) self.assertEqual(institutions['Harvard University']['pi_inv'], 1. / 6.12) self.assertEqual(institutions['Stanford University']['pi_rescaled'], 1.) self.assertEqual(institutions['MIT']['u'], 3)
args = argparse.ArgumentParser() args.add_argument('-f', '--fac-file', help='Faculty file', required=True) args.add_argument('-i', '--inst-file', help='Institutions file', required=True) args.add_argument('-p', '--prob-function', help='Candidate probability/matching function', required=True) args.add_argument('-n', '--num-iters', help='Number of iterations to est. error', default=100, type=int) args.add_argument('-w', '--weights', help='Model parameters (as comma-separated string)', type=allow_negatives) args.add_argument('-r', '--ranking', help='Which ranking to use', default='pi_rescaled') args.add_argument('-v', '--validation', help='Years to hold out', default='') args = args.parse_args() return args if __name__=="__main__": args = interface() inst = parse_institution_records(open(args.inst_file, 'rU')) candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools(open(args.fac_file), school_info=inst, ranking='pi_rescaled', year_start=1970, year_stop=2012, year_step=1) if args.validation: # if specified years are to be evaluated hold_out = [int(year) for year in args.validation.split(',')] testing_candidates, testing_jobs, testing_job_ranks = [], [], [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i])
DBLP_FILE = "DBLP_%s_file_0.html" def interface(): args = argparse.ArgumentParser() args.add_argument("-i", "--inst-file", help="Institution profiles") args.add_argument("-f", "--faculty-file", help="Faculty profiles") args.add_argument("-g", "--gs-dir", help="Directory of GS profiles") args.add_argument("-d", "--dblp-dir", help="Directory of DBLP profiles") args = args.parse_args() return args if __name__ == "__main__": args = interface() inst = institution_parser.parse_institution_records(open(args.inst_file, "rU")) faculty = load_assistant_profs(open(args.faculty_file, "rU"), inst) # gs_prefix = os.path.join(args.gs_dir, 'GSP_') dblp_prefix = os.path.join(args.dblp_dir, "DBLP_") for f in faculty: # Check for each profile, download if missing """ if 'gs' in f: gs_file = os.path.join(args.gs_dir, GS_FILE % f['gs']) if not os.path.isfile(gs_file): print 'GS -> ', f['facultyName'] download_all_gs_pages(f['gs'], gs_prefix) """ if "dblp" in f:
from faculty_hiring.parse.institution_parser import parse_institution_records def interface(): args = argparse.ArgumentParser() args.add_argument('-f', '--fac-file', help='Faculty file', required=True) args.add_argument('-i', '--inst-file', help='Institutions file', required=True) args.add_argument('-s', '--orders-file', help='Input (pickle) file', required=True) args = args.parse_args() return args if __name__=="__main__": args = interface() inst = parse_institution_records(open(args.inst_file, 'rU')) candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools(open(args.fac_file), school_info=inst, ranking='pi_rescaled', year_start=1970, year_stop=2012, year_step=1) hiring_orders, hiring_probs = load_hiring_order_set(args.orders_file) if len(hiring_orders) != len(job_pools): raise ValueError('Incorrect number of pools!') for i, pool in enumerate(job_pools): pool_size = len(pool) if len(pool) != len(hiring_orders[i][0]):
name = line.split(':', 1)[-1].strip() if name == next_name: output.write('# dblp_n : %d\n' % num_papers[next_ind]) output.write('# dblp_n_2011 : %d\n' % num_papers_2011[next_ind]) next_ind += 1 if next_ind < max_ind: next_name = names[next_ind] else: done = True if not done: print 'WARNING: failed to link all z-scores!' output.close() if __name__ == "__main__": args = interface() inst = institution_parser.parse_institution_records(open(args.inst_file)) faculty = load_assistant_profs(open(args.faculty_file, 'rU'), inst) load.load_all_publications(faculty, args.dblp_dir, gs_dir=None) dists, tots = get_paper_counts_by_topic(faculty) means, stds = get_topic_means_stds(dists, tots) print means print stds set_zscores(faculty, means, stds) #add_zscores_to_file(faculty, args.faculty_file, args.output_file) add_counts_to_file(faculty, args.faculty_file, args.output_file)
GS_DIR = '/Users/samfway/Documents/Work/ClausetLab/Projects/research_env/data/gs_profiles_042617/' BS_FACULTY_FILE = os.path.join(DATA_DIR, 'faculty_bs_CURRENT.txt') CS_FACULTY_FILE = os.path.join(DATA_DIR, 'faculty_cs_CURRENT.txt') HS_FACULTY_FILE = os.path.join(DATA_DIR, 'faculty_hs_CURRENT.txt') BS_INST_FILE = os.path.join(DATA_DIR, 'inst_bs_CURRENT.txt') CS_INST_FILE = os.path.join(DATA_DIR, 'inst_cs_CURRENT.txt') HS_INST_FILE = os.path.join(DATA_DIR, 'inst_hs_CURRENT.txt') # Colors ACCENT_COLOR_1 = np.array([176., 116., 232.]) / 255. # Load the standard set of files # Business bs_inst = institution_parser.parse_institution_records(open( BS_INST_FILE, 'rU')) all_bs_faculty = [ person for person in faculty_parser.parse_faculty_records( open(BS_FACULTY_FILE, 'rU'), school_info=bs_inst, ranking='pi') ] bs_faculty = load.load_assistant_profs(open(BS_FACULTY_FILE, 'rU'), school_info=bs_inst, ranking='pi', year_start=1970, year_stop=2012) # bs_faculty_df = convert_faculty_list_to_df(bs_faculty) # Computer Science cs_inst = institution_parser.parse_institution_records(open( CS_INST_FILE, 'rU')) all_cs_faculty = [