def generate_data(self): """Make a CSV of the data extracted from the database. CSV will have the following format: Court, Name, Title, Count, 2000, 2011... { 'ca2': { "harold baller": { "Mag judge": { "years": { "1999': 22, "2000': 14, }, 'total count': 36, }, } } } """ courts = Court.objects.filter( jurisdiction__in=Court.FEDERAL_JURISDICTIONS, ) out = {} for court in courts: out[court.pk] = {} dockets = (court.dockets .exclude(Q(assigned_to_str='') & Q(referred_to_str='')) .filter(source__in=Docket.RECAP_SOURCES) .only('assigned_to_str', 'referred_to_str', 'date_filed')) logger.info("Processing %s dockets in %s" % (dockets.count(), court.pk)) for docket in dockets: for judge_type in ['assigned', 'referred']: judge = getattr(docket, '%s_to_str' % judge_type) if not judge: continue name, title = normalize_judge_string(unidecode(judge)) if not name: continue if name not in out[court.pk]: # No entry for this person. out[court.pk][name] = { title: Counter([docket.date_filed.year]), } else: # Person already exists. if title not in out[court.pk][name]: # Title not yet found. out[court.pk][name][title] = Counter( [docket.date_filed.year]) else: # Title already exists. out[court.pk][name][title][ docket.date_filed.year] += 1 self.export_files(out)
def generate_data(self): """Make a CSV of the data extracted from the database. CSV will have the following format: Court, Name, Title, Count, 2000, 2011... { 'ca2': { "harold baller": { "Mag judge": { "years": { "1999': 22, "2000': 14, }, 'total count': 36, }, } } } """ courts = Court.objects.filter( jurisdiction__in=Court.FEDERAL_JURISDICTIONS, ) out = {} for court in courts: out[court.pk] = {} dockets = (court.dockets.exclude( Q(assigned_to_str="") & Q(referred_to_str="")).filter( source__in=Docket.RECAP_SOURCES).only( "assigned_to_str", "referred_to_str", "date_filed")) logger.info("Processing %s dockets in %s" % (dockets.count(), court.pk)) for docket in dockets: for judge_type in ["assigned", "referred"]: judge = getattr(docket, "%s_to_str" % judge_type) if not judge: continue name, title = normalize_judge_string(unidecode(judge)) if not name: continue if name not in out[court.pk]: # No entry for this person. out[court.pk][name] = { title: Counter([docket.date_filed.year]), } else: # Person already exists. if title not in out[court.pk][name]: # Title not yet found. out[court.pk][name][title] = Counter( [docket.date_filed.year]) else: # Title already exists. out[court.pk][name][title][ docket.date_filed.year] += 1 self.export_files(out)
def test_title_name_splitter(self): pairs = [ { "q": "Magistrate Judge George T. Swartz", "a": ("George T. Swartz", "mag"), }, {"q": "J. Frederick Motz", "a": ("Frederick Motz", "jud"),}, { "q": "Honorable Susan W. Wright", "a": ("Susan W. Wright", "jud"), }, ] for pair in pairs: self.assertEqual(pair["a"], normalize_judge_string(pair["q"]))
def test_title_name_splitter(self): pairs = [{ 'q': 'Magistrate Judge George T. Swartz', 'a': ('George T. Swartz', 'mag'), }, { 'q': 'J. Frederick Motz', 'a': ('Frederick Motz', 'jud'), }, { 'q': 'Honorable Susan W. Wright', 'a': ('Susan W. Wright', 'jud'), }, ] for pair in pairs: self.assertEqual(pair['a'], normalize_judge_string(pair['q']))