Пример #1
0
 def test_uni_parse(self):
     X = get_test_universities()
     institutions = parse_institution_records(X)
     self.assertEqual(institutions['Yale University']['Region'],
                      'Northeast')
     self.assertEqual(institutions['Princeton University']['NRC95'], 6)
     self.assertEqual(institutions['Carnegie Mellon University']['USN2010'],
                      1)
     self.assertEqual(institutions['Harvard University']['pi'], 6.12)
     self.assertEqual(institutions['Harvard University']['pi_inv'],
                      1. / 6.12)
     self.assertEqual(institutions['Stanford University']['pi_rescaled'],
                      1.)
     self.assertEqual(institutions['MIT']['u'], 3)
Пример #2
0
    args = argparse.ArgumentParser()
    args.add_argument('-f', '--fac-file', help='Faculty file', required=True)
    args.add_argument('-i', '--inst-file', help='Institutions file', required=True)
    args.add_argument('-p', '--prob-function', help='Candidate probability/matching function', required=True)
    args.add_argument('-n', '--num-iters', help='Number of iterations to est. error', default=100, type=int)
    args.add_argument('-w', '--weights', help='Model parameters (as comma-separated string)', type=allow_negatives)
    args.add_argument('-r', '--ranking', help='Which ranking to use', default='pi_rescaled')
    args.add_argument('-v', '--validation', help='Years to hold out', default='')
    args = args.parse_args()
    return args


if __name__=="__main__":
    args = interface()
    
    inst = parse_institution_records(open(args.inst_file, 'rU'))
    candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools(open(args.fac_file), 
                                                                                  school_info=inst, 
                                                                                  ranking='pi_rescaled',
                                                                                  year_start=1970, 
                                                                                  year_stop=2012, 
                                                                                  year_step=1)

    if args.validation:  # if specified years are to be evaluated
        hold_out = [int(year) for year in args.validation.split(',')]
        testing_candidates, testing_jobs, testing_job_ranks = [], [], []
        for i, year in enumerate(year_range):
            if year in hold_out:
                testing_candidates.append(candidate_pools[i])
                testing_jobs.append(job_pools[i])
                testing_job_ranks.append(job_ranks[i])
Пример #3
0
DBLP_FILE = "DBLP_%s_file_0.html"


def interface():
    args = argparse.ArgumentParser()
    args.add_argument("-i", "--inst-file", help="Institution profiles")
    args.add_argument("-f", "--faculty-file", help="Faculty profiles")
    args.add_argument("-g", "--gs-dir", help="Directory of GS profiles")
    args.add_argument("-d", "--dblp-dir", help="Directory of DBLP profiles")
    args = args.parse_args()
    return args


if __name__ == "__main__":
    args = interface()
    inst = institution_parser.parse_institution_records(open(args.inst_file, "rU"))
    faculty = load_assistant_profs(open(args.faculty_file, "rU"), inst)
    # gs_prefix = os.path.join(args.gs_dir, 'GSP_')
    dblp_prefix = os.path.join(args.dblp_dir, "DBLP_")

    for f in faculty:
        # Check for each profile, download if missing
        """
        if 'gs' in f:
            gs_file = os.path.join(args.gs_dir, GS_FILE % f['gs'])
            if not os.path.isfile(gs_file):
                print 'GS -> ', f['facultyName']
                download_all_gs_pages(f['gs'], gs_prefix)
        """

        if "dblp" in f:
Пример #4
0
from faculty_hiring.parse.institution_parser import parse_institution_records


def interface():
    args = argparse.ArgumentParser()
    args.add_argument('-f', '--fac-file', help='Faculty file', required=True)
    args.add_argument('-i', '--inst-file', help='Institutions file', required=True)
    args.add_argument('-s', '--orders-file', help='Input (pickle) file', required=True)
    args = args.parse_args()
    return args


if __name__=="__main__":
    args = interface()
    
    inst = parse_institution_records(open(args.inst_file, 'rU'))
    candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools(open(args.fac_file), 
                                                                                  school_info=inst, 
                                                                                  ranking='pi_rescaled',
                                                                                  year_start=1970, 
                                                                                  year_stop=2012, 
                                                                                  year_step=1)


    hiring_orders, hiring_probs = load_hiring_order_set(args.orders_file)
    if len(hiring_orders) != len(job_pools):
        raise ValueError('Incorrect number of pools!')

    for i, pool in enumerate(job_pools):
        pool_size = len(pool)
        if len(pool) != len(hiring_orders[i][0]):
Пример #5
0
            name = line.split(':', 1)[-1].strip()
            if name == next_name:
                output.write('# dblp_n      : %d\n' % num_papers[next_ind])
                output.write('# dblp_n_2011 : %d\n' %
                             num_papers_2011[next_ind])
                next_ind += 1
                if next_ind < max_ind:
                    next_name = names[next_ind]
                else:
                    done = True

    if not done:
        print 'WARNING: failed to link all z-scores!'

    output.close()


if __name__ == "__main__":
    args = interface()

    inst = institution_parser.parse_institution_records(open(args.inst_file))
    faculty = load_assistant_profs(open(args.faculty_file, 'rU'), inst)
    load.load_all_publications(faculty, args.dblp_dir, gs_dir=None)
    dists, tots = get_paper_counts_by_topic(faculty)
    means, stds = get_topic_means_stds(dists, tots)
    print means
    print stds
    set_zscores(faculty, means, stds)
    #add_zscores_to_file(faculty, args.faculty_file, args.output_file)
    add_counts_to_file(faculty, args.faculty_file, args.output_file)
Пример #6
0
GS_DIR = '/Users/samfway/Documents/Work/ClausetLab/Projects/research_env/data/gs_profiles_042617/'

BS_FACULTY_FILE = os.path.join(DATA_DIR, 'faculty_bs_CURRENT.txt')
CS_FACULTY_FILE = os.path.join(DATA_DIR, 'faculty_cs_CURRENT.txt')
HS_FACULTY_FILE = os.path.join(DATA_DIR, 'faculty_hs_CURRENT.txt')

BS_INST_FILE = os.path.join(DATA_DIR, 'inst_bs_CURRENT.txt')
CS_INST_FILE = os.path.join(DATA_DIR, 'inst_cs_CURRENT.txt')
HS_INST_FILE = os.path.join(DATA_DIR, 'inst_hs_CURRENT.txt')

# Colors
ACCENT_COLOR_1 = np.array([176., 116., 232.]) / 255.

# Load the standard set of files
# Business
bs_inst = institution_parser.parse_institution_records(open(
    BS_INST_FILE, 'rU'))
all_bs_faculty = [
    person for person in faculty_parser.parse_faculty_records(
        open(BS_FACULTY_FILE, 'rU'), school_info=bs_inst, ranking='pi')
]
bs_faculty = load.load_assistant_profs(open(BS_FACULTY_FILE, 'rU'),
                                       school_info=bs_inst,
                                       ranking='pi',
                                       year_start=1970,
                                       year_stop=2012)
# bs_faculty_df = convert_faculty_list_to_df(bs_faculty)

# Computer Science
cs_inst = institution_parser.parse_institution_records(open(
    CS_INST_FILE, 'rU'))
all_cs_faculty = [