Exemplo n.º 1
0
class StaticFilesTest(TestCase):
    good_mp3_path = 'mp3/2014/06/09/ander_v._leo.mp3'
    good_txt_path = 'txt/2015/12/28/opinion_text.txt'
    good_pdf_path = 'pdf/2013/06/12/' + \
                    'in_re_motion_for_consent_to_disclosure_of_court_records.pdf'

    def setUp(self):
        self.court = Court.objects.get(pk='test')
        self.docket = Docket(case_name=u'Docket',
                             court=self.court,
                             source=Docket.DEFAULT)
        self.docket.save()

        self.audio = Audio(local_path_original_file=self.good_mp3_path,
                           local_path_mp3=self.good_mp3_path,
                           docket=self.docket,
                           blocked=False,
                           case_name_full='Ander v. Leo',
                           date_created=datetime.date(2014, 6, 9))
        self.audio.save(index=False)

        self.opinioncluster = OpinionCluster(
            case_name=u'Hotline Bling',
            docket=self.docket,
            date_filed=datetime.date(2015, 12, 14),
        )
        self.opinioncluster.save(index=False)

        self.txtopinion = Opinion(cluster=self.opinioncluster,
                                  type='Lead Opinion',
                                  local_path=self.good_txt_path)
        self.txtopinion.save(index=False)

        self.pdfopinion = Opinion(cluster=self.opinioncluster,
                                  type='Lead Opinion',
                                  local_path=self.good_pdf_path)
        self.pdfopinion.save(index=False)

    def test_serve_static_file_serves_mp3(self):
        request = HttpRequest()
        file_path = self.audio.local_path_mp3
        response = serve_static_file(request, file_path=self.good_mp3_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response['Content-Type'], 'audio/mpeg')
        self.assertIn('inline;', response['Content-Disposition'])

    def test_serve_static_file_serves_txt(self):
        request = HttpRequest()
        response = serve_static_file(request, file_path=self.good_txt_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response['Content-Type'], 'text/plain')
        self.assertIn('inline;', response['Content-Disposition'])
        self.assertIn('FOR THE DISTRICT OF COLUMBIA CIRCUIT', response.content)

    def test_serve_static_file_serves_pdf(self):
        request = HttpRequest()
        response = serve_static_file(request, file_path=self.good_pdf_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response['Content-Type'], 'application/pdf')
        self.assertIn('inline;', response['Content-Disposition'])
Exemplo n.º 2
0
    def setUp(self):
        self.court = Court.objects.get(pk='test')
        self.docket = Docket(case_name=u'Docket', court=self.court, source=Docket.DEFAULT)
        self.docket.save()

        self.audio = Audio(
            local_path_original_file=self.good_mp3_path,
            local_path_mp3=self.good_mp3_path,
            docket=self.docket,
            blocked=False,
            case_name_full='Ander v. Leo',
            date_created=datetime.date(2014, 6, 9)
        )
        self.audio.save(index=False)

        self.opinioncluster = OpinionCluster(
            case_name=u'Hotline Bling',
            docket=self.docket,
            date_filed=datetime.date(2015, 12, 14),
        )
        self.opinioncluster.save(index=False)

        self.txtopinion = Opinion(
            cluster=self.opinioncluster,
            type='Lead Opinion',
            local_path=self.good_txt_path
        )
        self.txtopinion.save(index=False)

        self.pdfopinion = Opinion(
            cluster=self.opinioncluster,
            type='Lead Opinion',
            local_path=self.good_pdf_path
        )
        self.pdfopinion.save(index=False)
    def make_objects(self, item, court, sha1_hash, content):
        blocked = item["blocked_statuses"]
        if blocked:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = item.get(
            "case_name_shorts"
        ) or self.cnt.make_case_name_short(item["case_names"])

        docket = Docket(
            docket_number=item.get("docket_numbers", ""),
            case_name=item["case_names"],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            date_argued=item["case_dates"],
            source=Docket.SCRAPER,
        )

        audio_file = Audio(
            judges=item.get("judges", ""),
            source="C",
            case_name=item["case_names"],
            case_name_short=case_name_short,
            sha1=sha1_hash,
            download_url=item["download_urls"],
            blocked=blocked,
            date_blocked=date_blocked,
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            if extension not in [".mp3", ".wma"]:
                extension = (
                    "." + item["download_urls"].lower().rsplit(".", 1)[1]
                )
            # See bitbucket issue #215 for why this must be
            # lower-cased.
            file_name = trunc(item["case_names"].lower(), 75) + extension
            audio_file.file_with_date = docket.date_argued
            audio_file.local_path_original_file.save(file_name, cf, save=False)
        except:
            msg = (
                "Unable to save binary to disk. Deleted audio file: %s.\n "
                "%s" % (item["case_names"], traceback.format_exc())
            )
            logger.critical(msg.encode("utf-8"))
            ErrorLog(log_level="CRITICAL", court=court, message=msg).save()
            error = True

        return docket, audio_file, error
Exemplo n.º 4
0
    def make_objects(self, item, court, sha1_hash, content):
        blocked = item['blocked_statuses']
        if blocked:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = (item.get('case_name_shorts') or
                           self.cnt.make_case_name_short(item['case_names']))

        docket = Docket(
            docket_number=item.get('docket_numbers', ''),
            case_name=item['case_names'],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            date_argued=item['case_dates'],
            source=Docket.SCRAPER,
        )

        audio_file = Audio(
            judges=item.get('judges', ''),
            source='C',
            case_name=item['case_names'],
            case_name_short=case_name_short,
            sha1=sha1_hash,
            download_url=item['download_urls'],
            blocked=blocked,
            date_blocked=date_blocked,
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            if extension not in ['.mp3', '.wma']:
                extension = '.' + item['download_urls'].lower().rsplit('.',
                                                                       1)[1]
            # See bitbucket issue #215 for why this must be
            # lower-cased.
            file_name = trunc(item['case_names'].lower(), 75) + extension
            audio_file.file_with_date = docket.date_argued
            audio_file.local_path_original_file.save(file_name, cf, save=False)
        except:
            msg = 'Unable to save binary to disk. Deleted audio file: %s.\n ' \
                  '%s' % (item['case_names'], traceback.format_exc())
            logger.critical(msg.encode('utf-8'))
            ErrorLog(log_level='CRITICAL', court=court, message=msg).save()
            error = True

        return docket, audio_file, error
    def make_objects(self, item, court, sha1_hash, content):
        blocked = item['blocked_statuses']
        if blocked:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = (item.get('case_name_shorts') or
                           self.cnt.make_case_name_short(item['case_names']))

        docket = Docket(
            docket_number=item.get('docket_numbers', ''),
            case_name=item['case_names'],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            date_argued=item['case_dates'],
            source=Docket.SCRAPER,
        )

        audio_file = Audio(
            judges=item.get('judges', ''),
            source='C',
            case_name=item['case_names'],
            case_name_short=case_name_short,
            sha1=sha1_hash,
            download_url=item['download_urls'],
            blocked=blocked,
            date_blocked=date_blocked,
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            if extension not in ['.mp3', '.wma']:
                extension = '.' + item['download_urls'].lower().rsplit('.', 1)[1]
            # See bitbucket issue #215 for why this must be
            # lower-cased.
            file_name = trunc(item['case_names'].lower(), 75) + extension
            audio_file.file_with_date = docket.date_argued
            audio_file.local_path_original_file.save(file_name, cf, save=False)
        except:
            msg = 'Unable to save binary to disk. Deleted audio file: %s.\n ' \
                  '%s' % (item['case_names'], traceback.format_exc())
            logger.critical(msg.encode('utf-8'))
            ErrorLog(log_level='CRITICAL', court=court, message=msg).save()
            error = True

        return docket, audio_file, error
Exemplo n.º 6
0
    def setUp(self):
        self.court = Court.objects.get(pk='test')
        self.docket = Docket(case_name=u'Docket', court=self.court, source=Docket.DEFAULT)
        self.docket.save()

        self.audio = Audio(
            local_path_original_file=self.good_mp3_path,
            local_path_mp3=self.good_mp3_path,
            docket=self.docket,
            blocked=False,
            case_name_full='Ander v. Leo',
            date_created=datetime.date(2014, 6, 9)
        )
        self.audio.save(index=False)

        self.opinioncluster = OpinionCluster(
            case_name=u'Hotline Bling',
            docket=self.docket,
            date_filed=datetime.date(2015, 12, 14),
        )
        self.opinioncluster.save(index=False)

        self.txtopinion = Opinion(
            cluster=self.opinioncluster,
            type='Lead Opinion',
            local_path=self.good_txt_path
        )
        self.txtopinion.save(index=False)

        self.pdfopinion = Opinion(
            cluster=self.opinioncluster,
            type='Lead Opinion',
            local_path=self.good_pdf_path
        )
        self.pdfopinion.save(index=False)
def make_objects(
    item: Dict[str, Any],
    court: Court,
    sha1_hash: str,
    content: str,
) -> Tuple[Docket, Audio]:
    blocked = item["blocked_statuses"]
    if blocked:
        date_blocked = date.today()
    else:
        date_blocked = None

    case_name_short = item.get("case_name_shorts") or cnt.make_case_name_short(
        item["case_names"]
    )

    docket = Docket(
        docket_number=item.get("docket_numbers", ""),
        case_name=item["case_names"],
        case_name_short=case_name_short,
        court=court,
        blocked=blocked,
        date_blocked=date_blocked,
        date_argued=item["case_dates"],
        source=item.get("source") or Docket.SCRAPER,
    )

    audio_file = Audio(
        judges=item.get("judges", ""),
        source=item.get("cluster_source") or "C",
        case_name=item["case_names"],
        case_name_short=case_name_short,
        sha1=sha1_hash,
        download_url=item["download_urls"],
        blocked=blocked,
        date_blocked=date_blocked,
    )

    cf = ContentFile(content)
    extension = get_extension(content)
    if extension not in [".mp3", ".wma"]:
        extension = "." + item["download_urls"].lower().rsplit(".", 1)[1]
    file_name = trunc(item["case_names"].lower(), 75) + extension
    audio_file.file_with_date = docket.date_argued
    audio_file.local_path_original_file.save(file_name, cf, save=False)

    return docket, audio_file
Exemplo n.º 8
0
    def migrate_opinions_oral_args_and_dockets(self):
        self.stdout.write("Migrating dockets, audio files, and opinions to new "
                          "database...")
        q = DocketOld.objects.using('old').all()
        old_dockets = queryset_generator(q)
        num_dockets = q.count()

        progress = 0
        self._print_progress(progress, num_dockets)
        for old_docket in old_dockets:
            # First do the docket, then create the cluster and opinion objects.
            try:
                old_audio = old_docket.audio_files.all()[0]
            except IndexError:
                old_audio = None
            try:
                old_document = old_docket.documents.all()[0]
            except IndexError:
                old_document = None
            if old_document is not None:
                old_citation = old_document.citation
                old_doc_case_name, old_doc_case_name_full, old_doc_case_name_short = self._get_case_names(old_citation.case_name)
            if old_audio is not None:
                old_audio_case_name, old_audio_case_name_full, old_audio_case_name_short = self._get_case_names(old_audio.case_name)

            court = CourtNew.objects.get(pk=old_docket.court_id)  # Courts are in place thanks to initial data.

            new_docket = DocketNew(
                pk=old_docket.pk,
                date_modified=old_docket.date_modified,
                date_created=old_docket.date_modified,
                court=court,
                case_name=old_doc_case_name,
                case_name_full=old_doc_case_name_full,
                case_name_short=old_doc_case_name_short,
                slug=self._none_to_blank(old_docket.slug),
                docket_number=self._none_to_blank(old_citation.docket_number),
                date_blocked=old_docket.date_blocked,
                blocked=old_docket.blocked,
            )
            if old_audio is not None:
                new_docket.date_argued = old_audio.date_argued
            new_docket.save(using='default')

            if old_document is not None:
                new_opinion_cluster = OpinionClusterNew(
                    pk=old_document.pk,
                    docket=new_docket,
                    judges=self._none_to_blank(old_document.judges),
                    date_modified=old_document.date_modified,
                    date_created=old_document.date_modified,
                    date_filed=old_document.date_filed,
                    slug=self._none_to_blank(old_citation.slug),
                    citation_id=old_document.citation_id,
                    case_name_short=old_doc_case_name_short,
                    case_name=old_doc_case_name,
                    case_name_full=old_doc_case_name_full,
                    federal_cite_one=self._none_to_blank(
                        old_citation.federal_cite_one),
                    federal_cite_two=self._none_to_blank(
                        old_citation.federal_cite_two),
                    federal_cite_three=self._none_to_blank(
                        old_citation.federal_cite_three),
                    state_cite_one=self._none_to_blank(
                        old_citation.state_cite_one),
                    state_cite_two=self._none_to_blank(
                        old_citation.state_cite_two),
                    state_cite_three=self._none_to_blank(
                        old_citation.state_cite_three),
                    state_cite_regional=self._none_to_blank(
                        old_citation.state_cite_regional),
                    specialty_cite_one=self._none_to_blank(
                        old_citation.specialty_cite_one),
                    scotus_early_cite=self._none_to_blank(
                        old_citation.scotus_early_cite),
                    lexis_cite=self._none_to_blank(old_citation.lexis_cite),
                    westlaw_cite=self._none_to_blank(old_citation.westlaw_cite),
                    neutral_cite=self._none_to_blank(old_citation.neutral_cite),
                    scdb_id=self._none_to_blank(
                        old_document.supreme_court_db_id),
                    source=old_document.source,
                    nature_of_suit=old_document.nature_of_suit,
                    citation_count=old_document.citation_count,
                    precedential_status=old_document.precedential_status,
                    date_blocked=old_document.date_blocked,
                    blocked=old_document.blocked,
                )
                new_opinion_cluster.save(
                    using='default',
                    index=False,
                )

                new_opinion = OpinionNew(
                    pk=old_document.pk,
                    cluster=new_opinion_cluster,
                    date_modified=old_document.date_modified,
                    date_created=old_document.time_retrieved,
                    type='010combined',
                    sha1=old_document.sha1,
                    download_url=old_document.download_url,
                    local_path=old_document.local_path,
                    plain_text=old_document.plain_text,
                    html=self._none_to_blank(old_document.html),
                    html_lawbox=self._none_to_blank(old_document.html_lawbox),
                    html_with_citations=old_document.html_with_citations,
                    extracted_by_ocr=old_document.extracted_by_ocr,
                )
                new_opinion.save(
                    using='default',
                    index=False,
                )

            if old_audio is not None:
                new_audio_file = AudioNew(
                    pk=old_audio.pk,
                    docket=new_docket,
                    source=old_audio.source,
                    case_name=old_audio_case_name,
                    case_name_short=old_audio_case_name_short,
                    case_name_full=old_audio_case_name_full,
                    judges=self._none_to_blank(old_audio.judges),
                    date_created=old_audio.time_retrieved,
                    date_modified=old_audio.date_modified,
                    sha1=old_audio.sha1,
                    download_url=old_audio.download_url,
                    local_path_mp3=old_audio.local_path_mp3,
                    local_path_original_file=old_audio.local_path_original_file,
                    duration=old_audio.duration,
                    processing_complete=old_audio.processing_complete,
                    date_blocked=old_audio.date_blocked,
                    blocked=old_audio.blocked,
                )
                new_audio_file.save(
                    using='default',
                    index=False,
                )

            progress += 1
            self._print_progress(progress, num_dockets)
        self.stdout.write(u'')  # Newline
Exemplo n.º 9
0
class StaticFilesTest(TestCase):
    good_mp3_path = "mp3/2014/06/09/ander_v._leo.mp3"
    good_txt_path = "txt/2015/12/28/opinion_text.txt"
    good_pdf_path = (
        "pdf/2013/06/12/" +
        "in_re_motion_for_consent_to_disclosure_of_court_records.pdf")

    def setUp(self):
        self.court = Court.objects.get(pk="test")
        self.docket = Docket(case_name=u"Docket",
                             court=self.court,
                             source=Docket.DEFAULT)
        self.docket.save()

        self.audio = Audio(
            local_path_original_file=self.good_mp3_path,
            local_path_mp3=self.good_mp3_path,
            docket=self.docket,
            blocked=False,
            case_name_full="Ander v. Leo",
            date_created=datetime.date(2014, 6, 9),
        )
        self.audio.save(index=False)

        self.opinioncluster = OpinionCluster(
            case_name=u"Hotline Bling",
            docket=self.docket,
            date_filed=datetime.date(2015, 12, 14),
        )
        self.opinioncluster.save(index=False)

        self.txtopinion = Opinion(
            cluster=self.opinioncluster,
            type="Lead Opinion",
            local_path=self.good_txt_path,
        )
        self.txtopinion.save(index=False)

        self.pdfopinion = Opinion(
            cluster=self.opinioncluster,
            type="Lead Opinion",
            local_path=self.good_pdf_path,
        )
        self.pdfopinion.save(index=False)

    def test_serve_static_file_serves_mp3(self):
        request = HttpRequest()
        file_path = self.audio.local_path_mp3
        response = serve_static_file(request, file_path=self.good_mp3_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response["Content-Type"], "audio/mpeg")
        self.assertIn("inline;", response["Content-Disposition"])

    def test_serve_static_file_serves_txt(self):
        request = HttpRequest()
        response = serve_static_file(request, file_path=self.good_txt_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response["Content-Type"], "text/plain")
        self.assertIn("inline;", response["Content-Disposition"])
        self.assertIn("FOR THE DISTRICT OF COLUMBIA CIRCUIT", response.content)

    def test_serve_static_file_serves_pdf(self):
        request = HttpRequest()
        response = serve_static_file(request, file_path=self.good_pdf_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response["Content-Type"], "application/pdf")
        self.assertIn("inline;", response["Content-Disposition"])
Exemplo n.º 10
0
def download_and_save():
    """This function is run in many threads simultaneously. Each thread
    runs so long as there are items in the queue. Once an item is found, it's
    downloaded and saved.

    The number of items that can be concurrently saved is determined by the
    number of threads that are running this function.
    """
    while True:
        item = queue.get()
        logger.info("%s: Attempting to add item at: %s" %
                    (threading.current_thread().name, item['url']))
        try:
            msg, r = get_binary_content(
                item['url'],
                {},
            )
        except:
            logger.info("%s: Unable to get item at: %s" %
                        (threading.current_thread().name, item['url']))
            queue.task_done()

        if msg:
            logger.warn(msg)
            queue.task_done()
            continue

        sha1_hash = hashlib.sha1(r.content).hexdigest()
        if Audio.objects.filter(sha1=sha1_hash).exists():
            # Simpsons did it! Try the next one.
            logger.info("%s: Item already exists, moving to next item." %
                        threading.current_thread().name)
            queue.task_done()
            continue
        else:
            # New item, onwards!
            logger.info('%s: Adding new document found at: %s' %
                        (threading.current_thread().name, item['url']))
            audio_file = Audio(
                source='H',
                sha1=sha1_hash,
                case_name=item['case_name'],
                download_url=item['url'],
                processing_complete=False,
            )
            if item['judges']:
                audio_file.judges = item['judges']
            if item['docket_number']:
                audio_file.docket.docket_number = item['docket_number']

            court = Court.objects.get(pk=item['court_code'])

            docket = Docket(
                case_name=item['case_name'],
                court=court,
                date_argued=item['date_argued'],
            )
            # Make and associate the file object
            try:
                cf = ContentFile(r.content)
                extension = get_extension(r.content)
                if extension not in ['.mp3', '.wma']:
                    extension = '.' + item['url'].rsplit('.', 1)[1]
                # See bitbucket issue #215 for why this must be
                # lower-cased.
                file_name = trunc(item['case_name'].lower(), 75) + extension
                audio_file.local_path_original_file.save(file_name, cf,
                                                         save=False)
            except:
                msg = 'Unable to save binary. Deleted document: %s.\n%s' % \
                      (item['case_name'], traceback.format_exc())
                logger.critical(msg)
                queue.task_done()

            docket.save()
            audio_file.docket = docket
            audio_file.save(index=False)

            random_delay = random.randint(0, 3600)
            process_audio_file.apply_async(
                (audio_file.pk,),
                countdown=random_delay
            )

            logger.info("%s: Successfully added audio file %s: %s" %
                        (threading.current_thread().name,
                         audio_file.pk,
                         audio_file.case_name))
Exemplo n.º 11
0
    def migrate_opinions_oral_args_and_dockets(self):
        """Migrate the core objects across, diffing as you go.

        :param start_date: Items changed after this date will be processed.
        :return: None
        """
        self.stdout.write("Migrating dockets, audio files, and opinions...")
        # Find dockets modified after date or with sub-items modified after
        # date.
        q = Q(date_modified__gte=self.start)
        q |= Q(documents__date_modified__gte=self.start)
        q |= Q(audio_files__date_modified__gte=self.start)
        old_dockets = DocketOld.objects.using('old').filter(q)

        for old_docket in old_dockets:
            try:
                old_audio = old_docket.audio_files.all()[0]
            except IndexError:
                old_audio = None
            try:
                old_document = old_docket.documents.all()[0]
            except IndexError:
                old_document = None
            if old_document is None and old_audio is None:
                continue

            if old_document is not None:
                old_citation = old_document.citation
                old_docket.case_name, old_docket.case_name_full, old_docket.case_name_short = self._get_case_names(
                    old_citation.case_name)
            else:
                # Fall back on the docket if needed. Assumes they docket and
                # document case_names are always the same.
                old_docket.case_name, old_docket.case_name_full, old_docket.case_name_short = self._get_case_names(
                    old_docket.case_name)
            if old_audio is not None:
                old_audio.case_name, old_audio.case_name_full, old_audio.case_name_short = self._get_case_names(
                    old_audio.case_name)

            # Courts are in place thanks to initial data. Get the court.
            court = CourtNew.objects.get(pk=old_docket.court_id)

            # Do Dockets
            try:
                existing_docket = (DocketNew.objects.using('default').get(
                    pk=old_docket.pk))
            except DocketNew.DoesNotExist:
                existing_docket = None
            if existing_docket is not None:
                # Intersection. No need for complicated merge as all differences
                # have been resolved by hand.
                new_docket = existing_docket
            else:
                # New docket in old system. Create it in the new system.
                new_docket = DocketNew(
                    pk=old_docket.pk,
                    date_modified=old_docket.date_modified,
                    date_created=old_docket.date_modified,
                    court=court,
                    case_name=old_docket.case_name,
                    case_name_full=old_docket.case_name_full,
                    case_name_short=old_docket.case_name_short,
                    slug=self._none_to_blank(old_docket.slug),
                    docket_number=self._none_to_blank(
                        old_citation.docket_number),
                    date_blocked=old_docket.date_blocked,
                    blocked=old_docket.blocked,
                )
                if old_audio is not None:
                    new_docket.date_argued = old_audio.date_argued
                new_docket.save(using='default')

            # Do Documents/Clusters
            if old_document is not None:
                try:
                    existing_oc = (
                        OpinionClusterNew.objects.using('default').get(
                            pk=old_document.pk))
                except OpinionClusterNew.DoesNotExist:
                    existing_oc = None
                try:
                    existing_o = (OpinionNew.objects.using('default').get(
                        pk=old_document.pk))
                except OpinionNew.DoesNotExist:
                    existing_o = None
                if existing_oc is not None or existing_o is not None:
                    # Run the conflict algo.
                    if self.find_conflicts(old_document, old_citation,
                                           old_docket, existing_oc,
                                           existing_o):
                        self.stdout.write("Found conflict. Resolve that.")
                    else:
                        # No conflicts. Update the existing item.
                        self.add_oc_and_o(old_document, old_citation,
                                          old_docket, new_docket)
                else:
                    # New item. Just add it.
                    self.add_oc_and_o(old_document, old_citation, old_docket,
                                      new_docket)

            # Finally we do Audio. No checks needed because we haven't changed
            # anything on the new server.
            if old_audio is not None:
                new_audio_file = AudioNew(
                    pk=old_audio.pk,
                    docket=new_docket,
                    source=old_audio.source,
                    case_name=old_audio.case_name,
                    case_name_short=old_audio.case_name_short,
                    case_name_full=old_audio.case_name_full,
                    judges=self._none_to_blank(old_audio.judges),
                    date_created=old_audio.time_retrieved,
                    date_modified=old_audio.date_modified,
                    sha1=old_audio.sha1,
                    download_url=old_audio.download_url,
                    local_path_mp3=old_audio.local_path_mp3,
                    local_path_original_file=old_audio.
                    local_path_original_file,
                    duration=old_audio.duration,
                    processing_complete=old_audio.processing_complete,
                    date_blocked=old_audio.date_blocked,
                    blocked=old_audio.blocked,
                )
                new_audio_file.save(
                    using='default',
                    index=False,
                )
Exemplo n.º 12
0
class StaticFilesTest(TestCase):
    good_mp3_path = 'mp3/2014/06/09/ander_v._leo.mp3'
    good_txt_path = 'txt/2015/12/28/opinion_text.txt'
    good_pdf_path = 'pdf/2013/06/12/' + \
                    'in_re_motion_for_consent_to_disclosure_of_court_records.pdf'

    def setUp(self):
        self.court = Court.objects.get(pk='test')
        self.docket = Docket(case_name=u'Docket', court=self.court, source=Docket.DEFAULT)
        self.docket.save()

        self.audio = Audio(
            local_path_original_file=self.good_mp3_path,
            local_path_mp3=self.good_mp3_path,
            docket=self.docket,
            blocked=False,
            case_name_full='Ander v. Leo',
            date_created=datetime.date(2014, 6, 9)
        )
        self.audio.save(index=False)

        self.opinioncluster = OpinionCluster(
            case_name=u'Hotline Bling',
            docket=self.docket,
            date_filed=datetime.date(2015, 12, 14),
        )
        self.opinioncluster.save(index=False)

        self.txtopinion = Opinion(
            cluster=self.opinioncluster,
            type='Lead Opinion',
            local_path=self.good_txt_path
        )
        self.txtopinion.save(index=False)

        self.pdfopinion = Opinion(
            cluster=self.opinioncluster,
            type='Lead Opinion',
            local_path=self.good_pdf_path
        )
        self.pdfopinion.save(index=False)

    def test_serve_static_file_serves_mp3(self):
        request = HttpRequest()
        file_path = self.audio.local_path_mp3
        response = serve_static_file(request, file_path=self.good_mp3_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response['Content-Type'], 'audio/mpeg')
        self.assertIn('inline;', response['Content-Disposition'])

    def test_serve_static_file_serves_txt(self):
        request = HttpRequest()
        response = serve_static_file(request, file_path=self.good_txt_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response['Content-Type'], 'text/plain')
        self.assertIn('inline;', response['Content-Disposition'])
        self.assertIn(
            'FOR THE DISTRICT OF COLUMBIA CIRCUIT',
            response.content
        )

    def test_serve_static_file_serves_pdf(self):
        request = HttpRequest()
        response = serve_static_file(request, file_path=self.good_pdf_path)
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response['Content-Type'], 'application/pdf')
        self.assertIn('inline;', response['Content-Disposition'])
    def migrate_opinions_oral_args_and_dockets(self):
        """Migrate the core objects across, diffing as you go.

        :param start_date: Items changed after this date will be processed.
        :return: None
        """
        self.stdout.write("Migrating dockets, audio files, and opinions...")
        # Find dockets modified after date or with sub-items modified after
        # date.
        q = Q(date_modified__gte=self.start)
        q |= Q(documents__date_modified__gte=self.start)
        q |= Q(audio_files__date_modified__gte=self.start)
        old_dockets = DocketOld.objects.using('old').filter(q)

        for old_docket in old_dockets:
            try:
                old_audio = old_docket.audio_files.all()[0]
            except IndexError:
                old_audio = None
            try:
                old_document = old_docket.documents.all()[0]
            except IndexError:
                old_document = None
            if old_document is None and old_audio is None:
                continue

            if old_document is not None:
                old_citation = old_document.citation
                old_docket.case_name, old_docket.case_name_full, old_docket.case_name_short = self._get_case_names(
                    old_citation.case_name)
            else:
                # Fall back on the docket if needed. Assumes they docket and
                # document case_names are always the same.
                old_docket.case_name, old_docket.case_name_full, old_docket.case_name_short = self._get_case_names(
                        old_docket.case_name)
            if old_audio is not None:
                old_audio.case_name, old_audio.case_name_full, old_audio.case_name_short = self._get_case_names(
                    old_audio.case_name)

            # Courts are in place thanks to initial data. Get the court.
            court = CourtNew.objects.get(pk=old_docket.court_id)

            # Do Dockets
            try:
                existing_docket = (DocketNew.objects
                                   .using('default')
                                   .get(pk=old_docket.pk))
            except DocketNew.DoesNotExist:
                existing_docket = None
            if existing_docket is not None:
                # Intersection. No need for complicated merge as all differences
                # have been resolved by hand.
                new_docket = existing_docket
            else:
                # New docket in old system. Create it in the new system.
                new_docket = DocketNew(
                    pk=old_docket.pk,
                    date_modified=old_docket.date_modified,
                    date_created=old_docket.date_modified,
                    court=court,
                    case_name=old_docket.case_name,
                    case_name_full=old_docket.case_name_full,
                    case_name_short=old_docket.case_name_short,
                    slug=self._none_to_blank(old_docket.slug),
                    docket_number=self._none_to_blank(
                        old_citation.docket_number),
                    date_blocked=old_docket.date_blocked,
                    blocked=old_docket.blocked,
                )
                if old_audio is not None:
                    new_docket.date_argued = old_audio.date_argued
                new_docket.save(using='default')

            # Do Documents/Clusters
            if old_document is not None:
                try:
                    existing_oc = (OpinionClusterNew.objects
                                   .using('default')
                                   .get(pk=old_document.pk))
                except OpinionClusterNew.DoesNotExist:
                    existing_oc = None
                try:
                    existing_o = (OpinionNew.objects
                                  .using('default')
                                  .get(pk=old_document.pk))
                except OpinionNew.DoesNotExist:
                    existing_o = None
                if existing_oc is not None or existing_o is not None:
                    # Run the conflict algo.
                    if self.find_conflicts(old_document, old_citation,
                                           old_docket, existing_oc,
                                           existing_o):
                        self.stdout.write("Found conflict. Resolve that.")
                    else:
                        # No conflicts. Update the existing item.
                        self.add_oc_and_o(old_document, old_citation,
                                          old_docket, new_docket)
                else:
                    # New item. Just add it.
                    self.add_oc_and_o(old_document, old_citation, old_docket,
                                      new_docket)

            # Finally we do Audio. No checks needed because we haven't changed
            # anything on the new server.
            if old_audio is not None:
                new_audio_file = AudioNew(
                    pk=old_audio.pk,
                    docket=new_docket,
                    source=old_audio.source,
                    case_name=old_audio.case_name,
                    case_name_short=old_audio.case_name_short,
                    case_name_full=old_audio.case_name_full,
                    judges=self._none_to_blank(old_audio.judges),
                    date_created=old_audio.time_retrieved,
                    date_modified=old_audio.date_modified,
                    sha1=old_audio.sha1,
                    download_url=old_audio.download_url,
                    local_path_mp3=old_audio.local_path_mp3,
                    local_path_original_file=old_audio.local_path_original_file,
                    duration=old_audio.duration,
                    processing_complete=old_audio.processing_complete,
                    date_blocked=old_audio.date_blocked,
                    blocked=old_audio.blocked,
                )
                new_audio_file.save(
                    using='default',
                    index=False,
                )
def download_and_save():
    """This function is run in many threads simultaneously. Each thread
    runs so long as there are items in the queue. Once an item is found, it's
    downloaded and saved.

    The number of items that can be concurrently saved is determined by the
    number of threads that are running this function.
    """
    while True:
        item = queue.get()
        logger.info("%s: Attempting to add item at: %s" %
                    (threading.current_thread().name, item['url']))
        try:
            msg, r = get_binary_content(
                item['url'],
                {},
            )
        except:
            logger.info("%s: Unable to get item at: %s" %
                        (threading.current_thread().name, item['url']))
            queue.task_done()

        if msg:
            logger.warn(msg)
            queue.task_done()
            continue

        sha1_hash = hashlib.sha1(r.content).hexdigest()
        if Audio.objects.filter(sha1=sha1_hash).exists():
            # Simpsons did it! Try the next one.
            logger.info("%s: Item already exists, moving to next item." %
                        threading.current_thread().name)
            queue.task_done()
            continue
        else:
            # New item, onwards!
            logger.info('%s: Adding new document found at: %s' %
                        (threading.current_thread().name, item['url']))
            audio_file = Audio(
                source='H',
                sha1=sha1_hash,
                case_name=item['case_name'],
                download_url=item['url'],
                processing_complete=False,
            )
            if item['judges']:
                audio_file.judges = item['judges']
            if item['docket_number']:
                audio_file.docket.docket_number = item['docket_number']

            court = Court.objects.get(pk=item['court_code'])

            docket = Docket(
                case_name=item['case_name'],
                court=court,
                date_argued=item['date_argued'],
            )
            # Make and associate the file object
            try:
                cf = ContentFile(r.content)
                extension = get_extension(r.content)
                if extension not in ['.mp3', '.wma']:
                    extension = '.' + item['url'].rsplit('.', 1)[1]
                # See bitbucket issue #215 for why this must be
                # lower-cased.
                file_name = trunc(item['case_name'].lower(), 75) + extension
                audio_file.local_path_original_file.save(file_name,
                                                         cf,
                                                         save=False)
            except:
                msg = 'Unable to save binary. Deleted document: %s.\n%s' % \
                      (item['case_name'], traceback.format_exc())
                logger.critical(msg)
                queue.task_done()

            docket.save()
            audio_file.docket = docket
            audio_file.save(index=False)

            random_delay = random.randint(0, 3600)
            process_audio_file.apply_async((audio_file.pk, ),
                                           countdown=random_delay)

            logger.info("%s: Successfully added audio file %s: %s" %
                        (threading.current_thread().name, audio_file.pk,
                         audio_file.case_name))