# This example uses the supreme court corpus to compute some simple information: # - Which justices coordinate the most to others? # - Which justices are coordinated to the most? import convokit # set up corpus corpus = convokit.Corpus(filename=convokit.download("supreme-corpus")) coord = convokit.Coordination(corpus) # get set of all justices justices = corpus.users(lambda user: user.info["is-justice"]) # get set of all users everyone = corpus.users() # compute coordination from each justice to everyone print("Justices, ranked by how much they coordinate to others:") justices_to_everyone = coord.score(justices, everyone) for justice, score in sorted(justices_to_everyone.averages_by_user().items(), key=lambda x: x[1], reverse=True): print(justice.name, round(score, 5)) print() # compute coordination from everyone to each justice print("Justices, ranked by how much others coordinate to them:") everyone_to_justices = coord.score(everyone, justices, focus="targets") for justice, score in sorted(everyone_to_justices.averages_by_user().items(), key=lambda x: x[1], reverse=True): print(justice.name, round(score, 5)) print()
def command_line_main(): parser = argparse.ArgumentParser(description="Social features toolkit.") parser.add_argument("filename", help="file to process") parser.add_argument("--groups", dest="groups", help="file containing groups") args = parser.parse_args() corpus = convokit.Corpus(filename=args.filename) ### coordination coord = convokit.Coordination(corpus) # pairwise scores pairwise_scores = coord.pairwise_scores( corpus.speaking_pairs(user_names_only=True)) pairwise_scores_s = { "'" + s + "' -> '" + t + "'": v for (s, t), v in pairwise_scores.items() } _, pairwise_average_scores, _, _, _ = coord.score_report(pairwise_scores_s) out = {} out["pairwise"] = pairwise_scores_s out["meta"] = {"pairwise-averages": pairwise_average_scores} # user scores coord_to = defaultdict(lambda: defaultdict(list)) coord_from = defaultdict(lambda: defaultdict(list)) for (speaker, target), m in pairwise_scores.items(): for cat, value in m.items(): coord_to[speaker][cat].append(value) coord_from[target][cat].append(value) coord_to_avg, coord_from_avg = {}, {} for user, m in coord_to.items(): coord_to[user] = { cat: sum(values) / len(values) for cat, values in m.items() } for user, m in coord_from.items(): coord_from[user] = { cat: sum(values) / len(values) for cat, values in m.items() } user_scores = {} for user in coord_to.keys() | coord_from.keys(): user_scores[user] = {} if user in coord_to: user_scores[user]["out"] = coord_to[user] user_scores[user]["out-average"] = sum(coord_to[user].values()) / \ len(coord_to[user].values()) if user in coord_from: user_scores[user]["in"] = coord_from[user] user_scores[user]["in-average"] = sum(coord_from[user].values()) / \ len(coord_from[user].values()) out["user"] = user_scores # group scores if args.groups is not None: groups = json.load(open(args.groups, "r")) scores = {} score_reports = {} for name, (a, b) in groups.items(): scores[name] = coord.score(a, b) score_reports[name] = coord.score_report(scores[name]) out["group"] = {name: scores[name] for name in scores} out["meta"]["group-averages"] = { name: { "average-by-marker": mkr, "aggregate-1": agg1, "aggregate-2": agg2, "aggregate-3": agg3 } for name, (_, mkr, agg1, agg2, agg3) in score_reports.items() } json.dump(out, open(os.path.splitext(args.filename)[0] + "-out.json", "w"), indent=2, sort_keys=True)
if conv.n_speakers() == 2: conv_2pers.append(conv) return conv_2pers if __name__ == "__main__": with open('pickles.p', 'rb') as f: conversations = pickle.load(f) corpus_speakers = create_speakers(conversations) utterance_list = create_utterances(conversations) # Creating a convokit corpus class BCN_corpus = Corpus(utterances=utterance_list) # Create a convokit coordination class. coord = convokit.Coordination() # Fit the underlying model (calculate LIWC scores) coord.fit(BCN_corpus) # Transform the BCN corpus into a coordination class # (this can deal with the LIWC scores) coord.transform(BCN_corpus) # Example code to create alignment scores from the 40-49 age group to all others # Creating the speaker sets based on age groups _10_19 = list( BCN_corpus.iter_speakers(lambda speaker: speaker.meta['age'] > 9 and speaker.meta['age'] < 20)) _20_29 = list( BCN_corpus.iter_speakers(lambda speaker: speaker.meta['age'] > 19 and speaker.meta['age'] < 30)) _30_39 = list(