Exemplo n.º 1
0
def test_overlapping_error():
    """Test that important values within the lose list cause an exception."""
    results = [[Meta2()]]
    lose = ["key2-3"]
    with pytest.raises(OverlappingLoseAndImportantException) as e_info:
        generate_metadata_dict(results, lose)
    assert ("The given lose dict contains values that are marked as important"
            in six.text_type(e_info.value))
Exemplo n.º 2
0
def test_generate_metadata_dict():
    """Test generating metadata dict using the metadata objects."""
    results = [[Meta1()], [Meta2()], [Meta3()]]
    lose = ["value2-1"]
    metadata_dict = generate_metadata_dict(results, lose)
    assert metadata_dict == {
        0: {
            "index": 0,
            "key1": "value1-1",
            "key2": "value2",
            "key3": "key2-3",
            "key4": "importantvalue",
            "mimetype": "mime",
            "version": 1.0,
            "stream_type": "binary"
        },
        1: {
            "index": 1,
            "key1": "value1",
            "key2": "value2",
            "mimetype": "anothermime",
            "version": 2,
            "stream_type": "audio"
        }
    }
Exemplo n.º 3
0
def test_generate_metadata_dict(meta_class_fx, meta_classes, lose, valid_dict,
                                expected_conflicts):
    """Test generating metadata dict using the metadata objects.
    Tests both a successful case and a case with conflicts in
    metadata, both while filling import values and while merging the
    results.
    """
    results = []
    for meta_class in meta_classes:
        results.append([meta_class_fx(meta_class)])
    (metadata_dict, conflicts) = generate_metadata_dict(results, lose)
    if valid_dict:
        assert metadata_dict == {
            0: {
                "index": 0,
                "key1": "value1-1",
                "key2": "value2",
                "key3": "key2-3",
                "key4": "importantvalue",
                "mimetype": "mime",
                "version": 1.0,
                "stream_type": "binary"
            },
            1: {
                "index": 1,
                "key1": "value1",
                "key2": "value2",
                "mimetype": "anothermime",
                "version": 2,
                "stream_type": "audio"
            }
        }
    assert conflicts == expected_conflicts
Exemplo n.º 4
0
 def scrape_file(self):
     """
     No need to scrape anything, just merge already collected metadata.
     """
     streams, conflicts = generate_metadata_dict(self._scraper_results,
                                                 LOSE)
     self.streams = streams
     for error_message in conflicts:
         self._errors.append(error_message)
     self._messages.append("Scraper results merged into streams")
Exemplo n.º 5
0
    def scrape(self, check_wellformed=True):
        """Scrape file and collect metadata.
        :check_wellformed: True, full scraping; False, skip well-formed check.
        """
        self.detect_filetype()

        # File not found or MIME type could not be determined
        if not self.mimetype:
            self.streams = {}
            return

        self._params["mimetype_guess"] = self.mimetype
        for scraper_class in iter_scrapers(mimetype=self.mimetype,
                                           version=self.version,
                                           check_wellformed=check_wellformed,
                                           params=self._params):
            scraper = scraper_class(self.filename, check_wellformed,
                                    self._params)
            self._scrape_file(scraper)
        self.streams = generate_metadata_dict(self._scraper_results, LOSE)
        self._check_utf8(check_wellformed)
        self._check_mimetype_version()