示例#1
0
# This example uses the supreme court corpus to compute some simple information:
# - Which justices coordinate the most to others?
# - Which justices are coordinated to the most?

import convokit

# set up corpus 
corpus = convokit.Corpus(filename=convokit.download("supreme-corpus"))
coord = convokit.Coordination(corpus)

# get set of all justices
justices = corpus.users(lambda user: user.info["is-justice"])
# get set of all users
everyone = corpus.users()

# compute coordination from each justice to everyone
print("Justices, ranked by how much they coordinate to others:")
justices_to_everyone = coord.score(justices, everyone)
for justice, score in sorted(justices_to_everyone.averages_by_user().items(),
    key=lambda x: x[1], reverse=True):
    print(justice.name, round(score, 5))
print()

# compute coordination from everyone to each justice
print("Justices, ranked by how much others coordinate to them:")
everyone_to_justices = coord.score(everyone, justices, focus="targets")
for justice, score in sorted(everyone_to_justices.averages_by_user().items(), 
    key=lambda x: x[1], reverse=True):
    print(justice.name, round(score, 5))
print()
def command_line_main():
    parser = argparse.ArgumentParser(description="Social features toolkit.")
    parser.add_argument("filename", help="file to process")
    parser.add_argument("--groups",
                        dest="groups",
                        help="file containing groups")
    args = parser.parse_args()

    corpus = convokit.Corpus(filename=args.filename)

    ### coordination
    coord = convokit.Coordination(corpus)

    # pairwise scores
    pairwise_scores = coord.pairwise_scores(
        corpus.speaking_pairs(user_names_only=True))
    pairwise_scores_s = {
        "'" + s + "' -> '" + t + "'": v
        for (s, t), v in pairwise_scores.items()
    }
    _, pairwise_average_scores, _, _, _ = coord.score_report(pairwise_scores_s)

    out = {}
    out["pairwise"] = pairwise_scores_s
    out["meta"] = {"pairwise-averages": pairwise_average_scores}

    # user scores
    coord_to = defaultdict(lambda: defaultdict(list))
    coord_from = defaultdict(lambda: defaultdict(list))
    for (speaker, target), m in pairwise_scores.items():
        for cat, value in m.items():
            coord_to[speaker][cat].append(value)
            coord_from[target][cat].append(value)
    coord_to_avg, coord_from_avg = {}, {}
    for user, m in coord_to.items():
        coord_to[user] = {
            cat: sum(values) / len(values)
            for cat, values in m.items()
        }
    for user, m in coord_from.items():
        coord_from[user] = {
            cat: sum(values) / len(values)
            for cat, values in m.items()
        }
    user_scores = {}
    for user in coord_to.keys() | coord_from.keys():
        user_scores[user] = {}
        if user in coord_to:
            user_scores[user]["out"] = coord_to[user]
            user_scores[user]["out-average"] = sum(coord_to[user].values()) / \
                    len(coord_to[user].values())
        if user in coord_from:
            user_scores[user]["in"] = coord_from[user]
            user_scores[user]["in-average"] = sum(coord_from[user].values()) / \
                    len(coord_from[user].values())
    out["user"] = user_scores

    # group scores
    if args.groups is not None:
        groups = json.load(open(args.groups, "r"))
        scores = {}
        score_reports = {}
        for name, (a, b) in groups.items():
            scores[name] = coord.score(a, b)
            score_reports[name] = coord.score_report(scores[name])
        out["group"] = {name: scores[name] for name in scores}
        out["meta"]["group-averages"] = {
            name: {
                "average-by-marker": mkr,
                "aggregate-1": agg1,
                "aggregate-2": agg2,
                "aggregate-3": agg3
            }
            for name, (_, mkr, agg1, agg2, agg3) in score_reports.items()
        }

    json.dump(out,
              open(os.path.splitext(args.filename)[0] + "-out.json", "w"),
              indent=2,
              sort_keys=True)
        if conv.n_speakers() == 2:
            conv_2pers.append(conv)
    return conv_2pers


if __name__ == "__main__":
    with open('pickles.p', 'rb') as f:
        conversations = pickle.load(f)

    corpus_speakers = create_speakers(conversations)
    utterance_list = create_utterances(conversations)

    # Creating a convokit corpus class
    BCN_corpus = Corpus(utterances=utterance_list)
    # Create a convokit coordination class.
    coord = convokit.Coordination()
    # Fit the underlying model (calculate LIWC scores)
    coord.fit(BCN_corpus)
    # Transform the BCN corpus into a coordination class
    # (this can deal with the LIWC scores)
    coord.transform(BCN_corpus)

    # Example code to create alignment scores from the 40-49 age group to all others
    # Creating the speaker sets based on age groups
    _10_19 = list(
        BCN_corpus.iter_speakers(lambda speaker: speaker.meta['age'] > 9 and
                                 speaker.meta['age'] < 20))
    _20_29 = list(
        BCN_corpus.iter_speakers(lambda speaker: speaker.meta['age'] > 19 and
                                 speaker.meta['age'] < 30))
    _30_39 = list(