def create_dating_schedule(person_df, n_meeting=10):
    """
    Function to create speed dating schedule at CCN 2018 conference

    Parameters
    ==========
    person_df: pandas dataframe contains - PersonID, FullName, Abstract
    n_meeting: int, number of meeting we would like to have

    Output
    ======
    schedule: list, list of person id and person ids to meet in the 
        following format: [PersonID, [PersonID to meet]]
    """
    # linear programming
    persons_1 = list(map(preprocess, list(person_df['Abstract'])))
    persons_2 = list(map(preprocess, list(person_df['Abstract'])))

    A = affinity_computation(persons_1,
                             persons_2,
                             n_components=10,
                             min_df=1,
                             max_df=0.8,
                             weighting='tfidf',
                             projection='pca')
    # constraints, conflict of interest
    A[np.arange(len(A)), np.arange(len(A))] = -1000

    # for dating at CCN
    v, K, d = create_lp_matrix(A,
                               min_reviewers_per_paper=n_meeting,
                               max_reviewers_per_paper=n_meeting,
                               min_papers_per_reviewer=n_meeting,
                               max_papers_per_reviewer=n_meeting)
    x_sol = linprog(v, K, d)['x']
    b = create_assignment(x_sol, A)

    output = []
    for i in range(len(b)):
        r = [list(person_df['PersonID'])[b_] for b_ in np.nonzero(b[i])[0]]
        output.append([list(person_df.PersonID)[i], r])

    # make optimal schedule
    schedule = nest_answer(
        output, format_answer(color_graph(build_line_graph(output))))

    return schedule
def create_dating_schedule(person_df, n_meeting=10):
    """
    Function to create speed dating schedule at CCN 2018 conference

    Parameters
    ==========
    person_df: pandas dataframe contains - PersonID, FullName, Abstract
    n_meeting: int, number of meeting we would like to have

    Output
    ======
    schedule: list, list of person id and person ids to meet in the 
        following format: [PersonID, [PersonID to meet]]
    """
    # linear programming
    persons_1 = list(map(preprocess, list(person_df['Abstract'])))
    persons_2 = list(map(preprocess, list(person_df['Abstract'])))

    A = affinity_computation(persons_1, persons_2,
                             n_components=10, min_df=1, max_df=0.8,
                             weighting='tfidf', projection='pca')
    # constraints, conflict of interest
    A[np.arange(len(A)), np.arange(len(A))] = -1000

    # for dating at CCN
    v, K, d = create_lp_matrix(
        A, 
        min_reviewers_per_paper=n_meeting, max_reviewers_per_paper=n_meeting,
        min_papers_per_reviewer=n_meeting, max_papers_per_reviewer=n_meeting
    )
    x_sol = linprog(v, K, d)['x']
    b = create_assignment(x_sol, A)

    output = []
    for i in range(len(b)):
        r = [list(person_df['PersonID'])[b_] for b_ in np.nonzero(b[i])[0]]
        output.append([list(person_df.PersonID)[i], r])

    # make optimal schedule
    schedule = nest_answer(output, format_answer(color_graph(build_line_graph(output))))

    return schedule
def assign_articles_to_reviewers(article_df, reviewer_df, people_df):
    """
    Perform reviewer-assignment from dataframe of article, reviewer, and people

    Parameters
    ==========
    article_df: a dataframe that has columns `PaperID`, `Title`, `Abstract`, and `PersonIDList`
        where PersonIDList contains string of simicolon separated list of PersonID
    reviewer_df: a dataframe that has columns `PersonID` and `Abstract`
    people_df:  dataframe that has columns `PersonID`, `FullName`

    We assume `PersonID` is an integer

    Output
    ======
    article_assignment_df: an assigned reviewers dataframe, each row of article will have 
        list of reviewers in `ReviewerIDList` column and their name in reviewer_names
    """
    papers = list(
        (article_df['Title'] + ' ' + article_df['Abstract']).map(preprocess))
    reviewers = list(reviewer_df['Abstract'].map(preprocess))

    # Calculate conflict of interest based on co-authors
    coauthors_df = pd.DataFrame(
        [[int(r.PaperID), int(co_author)] for _, r in article_df.iterrows()
         for co_author in r.PersonIDList.split(';')],
        columns=['PaperID', 'PersonID'])
    article_df['paper_id'] = list(range(len(article_df)))
    reviewer_df['person_id'] = list(range(len(reviewer_df)))
    coi_df = coauthors_df.merge(article_df[['PaperID', 'paper_id']],
                                on='PaperID').merge(
                                    reviewer_df[['PersonID', 'person_id']],
                                    on='PersonID')[['paper_id', 'person_id']]

    # calculate affinity matrix
    A = affinity_computation(papers,
                             reviewers,
                             n_components=10,
                             min_df=2,
                             max_df=0.8,
                             weighting='tfidf',
                             projection='pca')

    # trim distance that are too high
    A_trim = []
    for r in range(len(A)):
        a = A[r, :]
        a[np.argsort(a)[0:200]] = 0
        A_trim.append(a)
    A_trim = np.vstack(A_trim)

    # assign conflict of interest to have high negative cost
    for i, j in zip(coi_df.paper_id.tolist(), coi_df.person_id.tolist()):
        A_trim[i, j] = -1000

    # for CCN case,
    v, K, d = create_lp_matrix(A_trim,
                               min_reviewers_per_paper=6,
                               max_reviewers_per_paper=6,
                               min_papers_per_reviewer=4,
                               max_papers_per_reviewer=6)
    x_sol = linprog(v, K, d)['x']
    b = create_assignment(x_sol, A_trim)
    reviewer_ids = list(reviewer_df.PersonID)
    reviewer_name_dict = {
        r['PersonID']: r['FullName']
        for _, r in people_df.iterrows()
    }  # map reviewer id to reviewer name
    assignments = []
    for i in range(len(b)):
        assignments.append([
            i, [reviewer_ids[b_] for b_ in np.nonzero(b[i])[0]],
            [
                reviewer_name_dict[reviewer_ids[b_]]
                for b_ in np.nonzero(b[i])[0]
            ]
        ])
    assignments_df = pd.DataFrame(
        assignments, columns=['paper_id', 'ReviewerIDList', 'reviewer_names'])
    assignments_df['ReviewerIDList'] = assignments_df.ReviewerIDList.map(
        lambda e: ';'.join(str(e_) for e_ in e))
    assignments_df['reviewer_names'] = assignments_df.reviewer_names.map(
        lambda x: ';'.join(x))
    article_assignment_df = article_df.merge(assignments_df,
                                             on='paper_id').drop('paper_id',
                                                                 axis=1)
    return article_assignment_df
Exemplo n.º 4
0
    # trimming affinity matrix to reduce the problem size
    if n_trim != 0:
        A_trim = []
        for r in range(len(A)):
            a = A[r, :]
            a[np.argsort(a)[0:n_trim]] = 0
            A_trim.append(a)
        A_trim = np.vstack(A_trim)
    else:
        A_trim = A

    print('Solving a matching problem...')
    v, K, d = create_lp_matrix(A_trim,
                               min_reviewers_per_paper=n_match,
                               max_reviewers_per_paper=n_match,
                               min_papers_per_reviewer=n_match,
                               max_papers_per_reviewer=n_match)
    x_sol = linprog(v, K, d)['x']
    b = create_assignment(x_sol, A_trim)
    if (b.sum() == 0):
        print(
            'Seems like the problem does not converge, try reducing <n_trim> but not too low!'
        )
    else:
        print('Successfully assigned all the match!')

    if (b.sum() != 0):
        output = []
        user_ids_map = {ri: r['user_id'] for ri, r in df.iterrows()}
        for i in range(len(b)):
Exemplo n.º 5
0
    # COIs
    cois_ids = submission_df.AuthorIds.map(
        lambda x: create_coi_author_ids(x, reviewer_df))
    cois = submission_df.AuthorsList.map(
        lambda x: create_coi_list(x, reviewer_df))
    cois_df = pd.DataFrame(cois + cois_ids, columns=['AuthorsList'])
    for i, r in cois_df.iterrows():
        if len(r['AuthorsList']) > 0:
            for idx in r['AuthorsList']:
                A[i, idx] = -1000

    # assignment
    A_a, A_b = A[:, :len(reviewer_a_df)], A[:, len(reviewer_a_df):]
    v, K, d = create_lp_matrix(A_a,
                               min_reviewers_per_paper=2, max_reviewers_per_paper=2,
                               min_papers_per_reviewer=10, max_papers_per_reviewer=12)
    x_sol = linprog(v, K, d)['x']
    b_a = create_assignment(x_sol, A_a)

    v, K, d = create_lp_matrix(A_b,
                               min_reviewers_per_paper=2, max_reviewers_per_paper=2,
                               min_papers_per_reviewer=10, max_papers_per_reviewer=12)
    x_sol = linprog(v, K, d)['x']
    b_b = create_assignment(x_sol, A_b)

    reviewer_a_map = {i: r['UserID'] for i, r in reviewer_a_df.iterrows()}
    reviewer_b_map = {i: r['UserID'] for i, r in reviewer_b_df.iterrows()}
    paper_id_map = {i: r['PaperID'] for i, r in submission_df.iterrows()}

    assignments_a_df = create_assignment_dataframe(b_a, reviewer_a_map,