def get_judge_and_court(case_path): raw_text = open(case_path).read() clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text(raw_text) judge = get_judge(clean_html_tree, case_path) court = get_court_object(clean_html_tree, case_path=case_path) if judge in judge_stats: if court in judge_stats[judge]: judge_stats[judge][court] += 1 else: judge_stats[judge][court] = 1 else: judge_stats[judge] = {court: 1}
def get_judge_and_court(case_path): raw_text = open(case_path).read() clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text( raw_text) judge = get_judge(clean_html_tree, case_path) court = get_court_object(clean_html_tree, case_path=case_path) if judge in judge_stats: if court in judge_stats[judge]: judge_stats[judge][court] += 1 else: judge_stats[judge][court] = 1 else: judge_stats[judge] = {court: 1}
def import_law_box_case(case_path): raw_text = open(case_path).read() clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text(raw_text) citations = get_citations_from_tree(complete_html_tree, case_path) court = get_court_object(clean_html_tree, citations, case_path) dates = get_date_filed(clean_html_tree, citations, case_path=case_path, court=court) if not dates and ('review_issues' in DEBUG or 'log_bad_values' in DEBUG): if 'review_issues' in DEBUG: subprocess.Popen(['firefox', 'file://%s' % case_path], shell=False).communicate() raw_input("No date identified! Can we fix this and restart, or just press enter to log it? ") if 'log_bad_values' in DEBUG: # Write the failed case out to file. with open('missing_dates_post_focus.txt', 'a') as out: out.write('%s\n' % case_path)
def import_law_box_case(case_path): raw_text = open(case_path).read() clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text( raw_text) citations = get_citations_from_tree(complete_html_tree, case_path) court = get_court_object(clean_html_tree, citations, case_path) if not court and 'review_court_issues' in DEBUG: if 'review_court_issues' in DEBUG: subprocess.Popen(['firefox', 'file://%s' % case_path], shell=False).communicate() raw_input( "No court identified! Can we fix this and restart, or just press enter to log it? " ) if 'log_bad_courts' in DEBUG: # Write the failed case out to file. with open('missing_courts_post_focus.txt', 'a') as out: out.write('%s\n' % case_path)
def cleaner(simulate=False, verbose=False): """Find items that are in californiad and change them to be in caed by using an updated set of regexes. """ conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw') q = {'fq': ['court_exact:%s' % 'californiad']} results = conn.raw_query(**q) for r in results: if verbose: print "Running tests on item %s" % r['id'] doc = Document.objects.get(pk=r['id']) # Make the HTML element, then figure out the court clean_html_tree = html.fromstring(doc.html_lawbox) correct_court = get_court_object(clean_html_tree) if verbose: print " - https://www.courtlistener.com%s" % doc.get_absolute_url( ) print " - Old value was: %s" % doc.court_id print " - New value is: %s" % correct_court if doc.court_id == correct_court: # No change needed, simply move on. if verbose: print " - Proceeding to next item: Values are equal." continue elif correct_court != 'caed': # Attempting to change to an unexpected value. if verbose: print " - Proceeding to next item: New value is not what we expected." continue else: if verbose: print " - Updating with new value." if not simulate: doc.court_id = correct_court doc.save(index=True, force_commit=False) # Do one big commit at the end conn.commit()
def cleaner(simulate=False, verbose=False): """Find items that are in californiad and change them to be in caed by using an updated set of regexes. """ conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw') q = {'fq': ['court_exact:%s' % 'californiad']} results = conn.raw_query(**q) for r in results: if verbose: print "Running tests on item %s" % r['id'] doc = Document.objects.get(pk=r['id']) # Make the HTML element, then figure out the court clean_html_tree = html.fromstring(doc.html_lawbox) correct_court = get_court_object(clean_html_tree) if verbose: print " - https://www.courtlistener.com%s" % doc.get_absolute_url() print " - Old value was: %s" % doc.court_id print " - New value is: %s" % correct_court if doc.court_id == correct_court: # No change needed, simply move on. if verbose: print " - Proceeding to next item: Values are equal." continue elif correct_court != 'caed': # Attempting to change to an unexpected value. if verbose: print " - Proceeding to next item: New value is not what we expected." continue else: if verbose: print " - Updating with new value." if not simulate: doc.court_id = correct_court doc.save(index=True, force_commit=False) # Do one big commit at the end conn.commit()