def generate_data(self):
        """Make a CSV of the data extracted from the database.

        CSV will have the following format:
            Court, Name, Title, Count, 2000, 2011...

        {
            'ca2': {
                "harold baller": {
                    "Mag judge": {
                        "years": {
                            "1999': 22,
                            "2000': 14,
                        },
                        'total count': 36,
                    },
                }
            }
        }
        """
        courts = Court.objects.filter(
            jurisdiction__in=Court.FEDERAL_JURISDICTIONS,
        )
        out = {}
        for court in courts:
            out[court.pk] = {}
            dockets = (court.dockets
                       .exclude(Q(assigned_to_str='') & Q(referred_to_str=''))
                       .filter(source__in=Docket.RECAP_SOURCES)
                       .only('assigned_to_str', 'referred_to_str',
                             'date_filed'))
            logger.info("Processing %s dockets in %s" % (dockets.count(),
                                                         court.pk))
            for docket in dockets:
                for judge_type in ['assigned', 'referred']:
                    judge = getattr(docket, '%s_to_str' % judge_type)
                    if not judge:
                        continue

                    name, title = normalize_judge_string(unidecode(judge))
                    if not name:
                        continue
                    if name not in out[court.pk]:
                        # No entry for this person.
                        out[court.pk][name] = {
                            title: Counter([docket.date_filed.year]),
                        }
                    else:
                        # Person already exists.
                        if title not in out[court.pk][name]:
                            # Title not yet found.
                            out[court.pk][name][title] = Counter(
                                [docket.date_filed.year])
                        else:
                            # Title already exists.
                            out[court.pk][name][title][
                                docket.date_filed.year] += 1

        self.export_files(out)
    def generate_data(self):
        """Make a CSV of the data extracted from the database.

        CSV will have the following format:
            Court, Name, Title, Count, 2000, 2011...

        {
            'ca2': {
                "harold baller": {
                    "Mag judge": {
                        "years": {
                            "1999': 22,
                            "2000': 14,
                        },
                        'total count': 36,
                    },
                }
            }
        }
        """
        courts = Court.objects.filter(
            jurisdiction__in=Court.FEDERAL_JURISDICTIONS, )
        out = {}
        for court in courts:
            out[court.pk] = {}
            dockets = (court.dockets.exclude(
                Q(assigned_to_str="") & Q(referred_to_str="")).filter(
                    source__in=Docket.RECAP_SOURCES).only(
                        "assigned_to_str", "referred_to_str", "date_filed"))
            logger.info("Processing %s dockets in %s" %
                        (dockets.count(), court.pk))
            for docket in dockets:
                for judge_type in ["assigned", "referred"]:
                    judge = getattr(docket, "%s_to_str" % judge_type)
                    if not judge:
                        continue

                    name, title = normalize_judge_string(unidecode(judge))
                    if not name:
                        continue
                    if name not in out[court.pk]:
                        # No entry for this person.
                        out[court.pk][name] = {
                            title: Counter([docket.date_filed.year]),
                        }
                    else:
                        # Person already exists.
                        if title not in out[court.pk][name]:
                            # Title not yet found.
                            out[court.pk][name][title] = Counter(
                                [docket.date_filed.year])
                        else:
                            # Title already exists.
                            out[court.pk][name][title][
                                docket.date_filed.year] += 1

        self.export_files(out)
    def test_title_name_splitter(self):
        pairs = [
            {
                "q": "Magistrate Judge George T. Swartz",
                "a": ("George T. Swartz", "mag"),
            },
            {"q": "J. Frederick Motz", "a": ("Frederick Motz", "jud"),},
            {
                "q": "Honorable Susan W. Wright",
                "a": ("Susan W. Wright", "jud"),
            },
        ]

        for pair in pairs:
            self.assertEqual(pair["a"], normalize_judge_string(pair["q"]))
    def test_title_name_splitter(self):
        pairs = [{
            'q': 'Magistrate Judge George T. Swartz',
            'a': ('George T. Swartz', 'mag'),
        },
            {
                'q': 'J. Frederick Motz',
                'a': ('Frederick Motz', 'jud'),
            },
            {
                'q': 'Honorable Susan W. Wright',
                'a': ('Susan W. Wright', 'jud'),
            },
        ]

        for pair in pairs:
            self.assertEqual(pair['a'], normalize_judge_string(pair['q']))