def bar_chart_data(docket_number): the_transcript = transcript.get_transcript_from_PDF("transcripts/" + docket_number + ".pdf") petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript) arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument) for petitioner, argument in arguments_by_advocate["petitioner"].iteritems(): statements = transcript.get_statements_in_argument(argument, petitioner) return jsonify(scores.bar_chart_speaker(statements)) return {}
def predict_case(docket_number, leaning=None): log = logging.getLogger('PREDICT_CASE') if not leaning: leaning = scdb.get_case_info(docket_number)["decisionDirection"] the_transcript = transcript.get_transcript_from_PDF("transcripts/" + docket_number + ".pdf") petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript) arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument) inputs = [] for petitioner, argument in arguments_by_advocate["petitioner"].iteritems(): statements = transcript.get_statements_in_argument(argument, petitioner) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) normalized = scores.normalize_feature_list(flat_features) myscores = scores.get_feature_vector(normalized) inputs.extend(myscores) for respondent, argument in arguments_by_advocate["respondent"].iteritems(): statements = transcript.get_statements_in_argument(argument, respondent) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) normalized = scores.normalize_feature_list(flat_features) myscores = scores.get_feature_vector(normalized) inputs.extend(myscores) if len(inputs) < 774: # We were unable to match all arguments # Not gonna help us, continue print "Parsing error? Did not create enough features." return 0 if len(inputs) > 774: # More than one argument per advocate # just get the first two inputs = inputs[:774] print "Parsing error? Had to truncate features." inputs.append(leaning) log.info(inputs[:25]) log.info("Querying network. . .") return net.activate(inputs)[0]
f.write("Scores:\n") f.close() for file in files: print "Now processing " + file slash = file.find('/') end_docket = file.find('_') if file.find('q') != -1: docket_number = file.find('q') docket_number = file[slash+1:end_docket] print "Checking database for docket #%s" % docket_number print "Winner identified: %s" % scdb.get_winning_party(docket_number) raw_input() the_transcript = transcript.get_transcript_from_PDF(file) petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript) arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument) for petitioner, argument in arguments_by_advocate["petitioner"].iteritems(): statements = transcript.get_statements_in_argument(argument, petitioner) #scores.get_statistics_from_statements(statements) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) #import matplotlib.pyplot as plt #from mpltools import style #style.use('ggplot') #plt.ion()
import sys import transcript import scores if __name__ == "__main__": for arg in sys.argv: if '.pdf' in arg: the_transcript = transcript.get_transcript_from_PDF(arg) petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript) arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument) for petitioner, argument in arguments_by_advocate["petitioner"].iteritems(): statements = transcript.get_statements_in_argument(argument, petitioner) scores.get_statistics_from_statements(statements) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) for respondent, argument in arguments_by_advocate["respondent"].iteritems(): statements = transcript.get_statements_in_argument(argument, respondent) scores.get_statistics_from_statements(statements) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) normalized = scores.normalize_feature_list(flat_features) print scores.get_feature_vector(normalized)