def create_dating_schedule(person_df, n_meeting=10): """ Function to create speed dating schedule at CCN 2018 conference Parameters ========== person_df: pandas dataframe contains - PersonID, FullName, Abstract n_meeting: int, number of meeting we would like to have Output ====== schedule: list, list of person id and person ids to meet in the following format: [PersonID, [PersonID to meet]] """ # linear programming persons_1 = list(map(preprocess, list(person_df['Abstract']))) persons_2 = list(map(preprocess, list(person_df['Abstract']))) A = affinity_computation(persons_1, persons_2, n_components=10, min_df=1, max_df=0.8, weighting='tfidf', projection='pca') # constraints, conflict of interest A[np.arange(len(A)), np.arange(len(A))] = -1000 # for dating at CCN v, K, d = create_lp_matrix(A, min_reviewers_per_paper=n_meeting, max_reviewers_per_paper=n_meeting, min_papers_per_reviewer=n_meeting, max_papers_per_reviewer=n_meeting) x_sol = linprog(v, K, d)['x'] b = create_assignment(x_sol, A) output = [] for i in range(len(b)): r = [list(person_df['PersonID'])[b_] for b_ in np.nonzero(b[i])[0]] output.append([list(person_df.PersonID)[i], r]) # make optimal schedule schedule = nest_answer( output, format_answer(color_graph(build_line_graph(output)))) return schedule
def create_dating_schedule(person_df, n_meeting=10): """ Function to create speed dating schedule at CCN 2018 conference Parameters ========== person_df: pandas dataframe contains - PersonID, FullName, Abstract n_meeting: int, number of meeting we would like to have Output ====== schedule: list, list of person id and person ids to meet in the following format: [PersonID, [PersonID to meet]] """ # linear programming persons_1 = list(map(preprocess, list(person_df['Abstract']))) persons_2 = list(map(preprocess, list(person_df['Abstract']))) A = affinity_computation(persons_1, persons_2, n_components=10, min_df=1, max_df=0.8, weighting='tfidf', projection='pca') # constraints, conflict of interest A[np.arange(len(A)), np.arange(len(A))] = -1000 # for dating at CCN v, K, d = create_lp_matrix( A, min_reviewers_per_paper=n_meeting, max_reviewers_per_paper=n_meeting, min_papers_per_reviewer=n_meeting, max_papers_per_reviewer=n_meeting ) x_sol = linprog(v, K, d)['x'] b = create_assignment(x_sol, A) output = [] for i in range(len(b)): r = [list(person_df['PersonID'])[b_] for b_ in np.nonzero(b[i])[0]] output.append([list(person_df.PersonID)[i], r]) # make optimal schedule schedule = nest_answer(output, format_answer(color_graph(build_line_graph(output)))) return schedule
def assign_articles_to_reviewers(article_df, reviewer_df, people_df): """ Perform reviewer-assignment from dataframe of article, reviewer, and people Parameters ========== article_df: a dataframe that has columns `PaperID`, `Title`, `Abstract`, and `PersonIDList` where PersonIDList contains string of simicolon separated list of PersonID reviewer_df: a dataframe that has columns `PersonID` and `Abstract` people_df: dataframe that has columns `PersonID`, `FullName` We assume `PersonID` is an integer Output ====== article_assignment_df: an assigned reviewers dataframe, each row of article will have list of reviewers in `ReviewerIDList` column and their name in reviewer_names """ papers = list( (article_df['Title'] + ' ' + article_df['Abstract']).map(preprocess)) reviewers = list(reviewer_df['Abstract'].map(preprocess)) # Calculate conflict of interest based on co-authors coauthors_df = pd.DataFrame( [[int(r.PaperID), int(co_author)] for _, r in article_df.iterrows() for co_author in r.PersonIDList.split(';')], columns=['PaperID', 'PersonID']) article_df['paper_id'] = list(range(len(article_df))) reviewer_df['person_id'] = list(range(len(reviewer_df))) coi_df = coauthors_df.merge(article_df[['PaperID', 'paper_id']], on='PaperID').merge( reviewer_df[['PersonID', 'person_id']], on='PersonID')[['paper_id', 'person_id']] # calculate affinity matrix A = affinity_computation(papers, reviewers, n_components=10, min_df=2, max_df=0.8, weighting='tfidf', projection='pca') # trim distance that are too high A_trim = [] for r in range(len(A)): a = A[r, :] a[np.argsort(a)[0:200]] = 0 A_trim.append(a) A_trim = np.vstack(A_trim) # assign conflict of interest to have high negative cost for i, j in zip(coi_df.paper_id.tolist(), coi_df.person_id.tolist()): A_trim[i, j] = -1000 # for CCN case, v, K, d = create_lp_matrix(A_trim, min_reviewers_per_paper=6, max_reviewers_per_paper=6, min_papers_per_reviewer=4, max_papers_per_reviewer=6) x_sol = linprog(v, K, d)['x'] b = create_assignment(x_sol, A_trim) reviewer_ids = list(reviewer_df.PersonID) reviewer_name_dict = { r['PersonID']: r['FullName'] for _, r in people_df.iterrows() } # map reviewer id to reviewer name assignments = [] for i in range(len(b)): assignments.append([ i, [reviewer_ids[b_] for b_ in np.nonzero(b[i])[0]], [ reviewer_name_dict[reviewer_ids[b_]] for b_ in np.nonzero(b[i])[0] ] ]) assignments_df = pd.DataFrame( assignments, columns=['paper_id', 'ReviewerIDList', 'reviewer_names']) assignments_df['ReviewerIDList'] = assignments_df.ReviewerIDList.map( lambda e: ';'.join(str(e_) for e_ in e)) assignments_df['reviewer_names'] = assignments_df.reviewer_names.map( lambda x: ';'.join(x)) article_assignment_df = article_df.merge(assignments_df, on='paper_id').drop('paper_id', axis=1) return article_assignment_df
# trimming affinity matrix to reduce the problem size if n_trim != 0: A_trim = [] for r in range(len(A)): a = A[r, :] a[np.argsort(a)[0:n_trim]] = 0 A_trim.append(a) A_trim = np.vstack(A_trim) else: A_trim = A print('Solving a matching problem...') v, K, d = create_lp_matrix(A_trim, min_reviewers_per_paper=n_match, max_reviewers_per_paper=n_match, min_papers_per_reviewer=n_match, max_papers_per_reviewer=n_match) x_sol = linprog(v, K, d)['x'] b = create_assignment(x_sol, A_trim) if (b.sum() == 0): print( 'Seems like the problem does not converge, try reducing <n_trim> but not too low!' ) else: print('Successfully assigned all the match!') if (b.sum() != 0): output = [] user_ids_map = {ri: r['user_id'] for ri, r in df.iterrows()} for i in range(len(b)):
# COIs cois_ids = submission_df.AuthorIds.map( lambda x: create_coi_author_ids(x, reviewer_df)) cois = submission_df.AuthorsList.map( lambda x: create_coi_list(x, reviewer_df)) cois_df = pd.DataFrame(cois + cois_ids, columns=['AuthorsList']) for i, r in cois_df.iterrows(): if len(r['AuthorsList']) > 0: for idx in r['AuthorsList']: A[i, idx] = -1000 # assignment A_a, A_b = A[:, :len(reviewer_a_df)], A[:, len(reviewer_a_df):] v, K, d = create_lp_matrix(A_a, min_reviewers_per_paper=2, max_reviewers_per_paper=2, min_papers_per_reviewer=10, max_papers_per_reviewer=12) x_sol = linprog(v, K, d)['x'] b_a = create_assignment(x_sol, A_a) v, K, d = create_lp_matrix(A_b, min_reviewers_per_paper=2, max_reviewers_per_paper=2, min_papers_per_reviewer=10, max_papers_per_reviewer=12) x_sol = linprog(v, K, d)['x'] b_b = create_assignment(x_sol, A_b) reviewer_a_map = {i: r['UserID'] for i, r in reviewer_a_df.iterrows()} reviewer_b_map = {i: r['UserID'] for i, r in reviewer_b_df.iterrows()} paper_id_map = {i: r['PaperID'] for i, r in submission_df.iterrows()} assignments_a_df = create_assignment_dataframe(b_a, reviewer_a_map,