示例#1
0
文件: web.py 项目: jbottel/OyezOracle
def bar_chart_data(docket_number):
    the_transcript = transcript.get_transcript_from_PDF("transcripts/" + docket_number + ".pdf")
    petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript)
    argument = transcript.get_argument(the_transcript)

    arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument)
    for petitioner, argument in arguments_by_advocate["petitioner"].iteritems():
        statements = transcript.get_statements_in_argument(argument, petitioner)
        return jsonify(scores.bar_chart_speaker(statements))

    return {}
示例#2
0
def predict_case(docket_number, leaning=None):
    log = logging.getLogger('PREDICT_CASE')
    if not leaning:
        leaning = scdb.get_case_info(docket_number)["decisionDirection"]
    the_transcript = transcript.get_transcript_from_PDF("transcripts/" + docket_number + ".pdf")
    petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript)
    argument = transcript.get_argument(the_transcript)

    arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument)

    inputs = []

    for petitioner, argument in arguments_by_advocate["petitioner"].iteritems():
        statements = transcript.get_statements_in_argument(argument, petitioner)
        number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements)
        features = scores.get_features_from_statements(statements)
        flat_features = scores.flatten_features(features)
        normalized = scores.normalize_feature_list(flat_features)
        myscores = scores.get_feature_vector(normalized)
        inputs.extend(myscores)

    for respondent, argument in arguments_by_advocate["respondent"].iteritems():
        statements = transcript.get_statements_in_argument(argument, respondent)
        number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements)
        features = scores.get_features_from_statements(statements)
        flat_features = scores.flatten_features(features)
        normalized = scores.normalize_feature_list(flat_features)
        myscores = scores.get_feature_vector(normalized)
        inputs.extend(myscores)

    if len(inputs) < 774:
        # We were unable to match all arguments
        # Not gonna help us, continue
        print "Parsing error? Did not create enough features."
        return 0

    if len(inputs) > 774:
        # More than one argument per advocate
        # just get the first two
        inputs = inputs[:774]
        print "Parsing error? Had to truncate features."

    inputs.append(leaning)

    log.info(inputs[:25])
    log.info("Querying network. . .")
    return net.activate(inputs)[0]
示例#3
0
f.write("Scores:\n")
f.close()

for file in files:
    print "Now processing " + file

    slash = file.find('/')
    end_docket = file.find('_')
    if file.find('q') != -1:
        docket_number = file.find('q')
    docket_number = file[slash+1:end_docket]
    print "Checking database for docket #%s" % docket_number
    print "Winner identified: %s" % scdb.get_winning_party(docket_number)
    raw_input()

    the_transcript = transcript.get_transcript_from_PDF(file)
    petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript)
    argument = transcript.get_argument(the_transcript)

    arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument)

    for petitioner, argument in arguments_by_advocate["petitioner"].iteritems():
        statements = transcript.get_statements_in_argument(argument, petitioner)
        #scores.get_statistics_from_statements(statements)
        number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements)
        features = scores.get_features_from_statements(statements)
        flat_features = scores.flatten_features(features)
        #import matplotlib.pyplot as plt
        #from mpltools import style
        #style.use('ggplot')
        #plt.ion()
示例#4
0
import sys
import transcript
import scores

if __name__ == "__main__":
    for arg in sys.argv:
        if '.pdf' in arg:
            the_transcript = transcript.get_transcript_from_PDF(arg)
            petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript)
            argument = transcript.get_argument(the_transcript)

            arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument)
            for petitioner, argument in arguments_by_advocate["petitioner"].iteritems():
                statements = transcript.get_statements_in_argument(argument, petitioner)
                scores.get_statistics_from_statements(statements)
                number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements)
                features = scores.get_features_from_statements(statements)
                flat_features = scores.flatten_features(features)

            for respondent, argument in arguments_by_advocate["respondent"].iteritems():
                statements = transcript.get_statements_in_argument(argument, respondent)
                scores.get_statistics_from_statements(statements)
                number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements)
                features = scores.get_features_from_statements(statements)
                flat_features = scores.flatten_features(features)
                normalized = scores.normalize_feature_list(flat_features)
                print scores.get_feature_vector(normalized)