示例#1
0
def test_from_swh_edge_cases_convert_invalid_utf8_bytes():
    some_input = {
        "a": "something",
        "b": "someone",
        "c": b"a name \xff",
        "d": b"an email \xff",
    }

    expected_output = {
        "a": "something",
        "b": "someone",
        "c": "a name \\xff",
        "d": "an email \\xff",
        "decoding_failures": ["c", "d"],
    }

    actual_output = converters.from_swh(some_input,
                                        hashess={"a", "b"},
                                        bytess={"c", "d"})
    for v in ["a", "b", "c", "d"]:
        assert expected_output[v] == actual_output[v]
    assert len(expected_output["decoding_failures"]) == len(
        actual_output["decoding_failures"])
    for v in expected_output["decoding_failures"]:
        assert v in actual_output["decoding_failures"]
示例#2
0
def lookup_content_license(q):
    """Return license information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        license information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)
    if not sha1:
        return None
    licenses = list(idx_storage.content_fossology_license_get([sha1]))

    if not licenses:
        return None
    license_dicts = [license.to_dict() for license in licenses]
    for license_dict in license_dicts:
        del license_dict["id"]
    lic = {
        "id": sha1,
        "facts": license_dicts,
    }
    return converters.from_swh(lic, hashess={"id"})
示例#3
0
 def content_get_license(cls, cnt_id):
     cnt_id_bytes = hash_to_bytes(cnt_id)
     lic = next(cls.idx_storage.content_fossology_license_get(
                [cnt_id_bytes]))
     return converters.from_swh({'id': cnt_id_bytes,
                                 'facts': lic[cnt_id_bytes]},
                                hashess={'id'})
示例#4
0
    def test_from_swh_edge_cases_convert_invalid_utf8_bytes(self):
        some_input = {
            'a': 'something',
            'b': 'someone',
            'c': b'a name \xff',
            'd': b'an email \xff',
        }

        expected_output = {
            'a': 'something',
            'b': 'someone',
            'c': 'a name \\xff',
            'd': 'an email \\xff',
            'decoding_failures': ['c', 'd']
        }

        actual_output = converters.from_swh(some_input,
                                            hashess={'a', 'b'},
                                            bytess={'c', 'd'})
        for v in ['a', 'b', 'c', 'd']:
            self.assertEqual(expected_output[v], actual_output[v])
        self.assertEqual(len(expected_output['decoding_failures']),
                         len(actual_output['decoding_failures']))
        for v in expected_output['decoding_failures']:
            self.assertTrue(v in actual_output['decoding_failures'])
示例#5
0
 def content_get(self, cnt_id: str) -> Dict[str, Any]:
     cnt_id_bytes = hash_to_bytes(cnt_id)
     content = self.storage.content_get([cnt_id_bytes])[0]
     if content:
         content_d = content.to_dict()
         content_d.pop("ctime", None)
     else:
         content_d = None
     return converters.from_swh(
         content_d, hashess={"sha1", "sha1_git", "sha256", "blake2s256"})
示例#6
0
def lookup_snapshot_alias(snapshot_id: str,
                          alias_name: str) -> Optional[Dict[str, Any]]:
    """Try to resolve a branch alias in a snapshot.

    Args:
        snapshot_id: hexadecimal representation of a snapshot id
        alias_name: name of the branch alias to resolve

    Returns:
        Target branch information or None if the alias does not exist
        or target a dangling branch.
    """
    resolved_alias = snapshot_resolve_alias(storage, _to_sha1_bin(snapshot_id),
                                            alias_name.encode())
    return (converters.from_swh(resolved_alias.to_dict(), hashess={"target"})
            if resolved_alias is not None else None)
示例#7
0
def lookup_content_language(q):
    """Return language information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        language information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)
    if not sha1:
        return None
    lang = _first_element(list(idx_storage.content_language_get([sha1])))
    if not lang:
        return None
    return converters.from_swh(lang, hashess={'id'})
示例#8
0
def lookup_content_license(q):
    """Return license information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        license information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)
    if not sha1:
        return None
    lic = _first_element(idx_storage.content_fossology_license_get([sha1]))

    if not lic:
        return None
    return converters.from_swh({'id': sha1, 'facts': lic[sha1]},
                               hashess={'id'})
示例#9
0
def lookup_content_ctags(q):
    """Return ctags information from a specified content.

    Args:
        q: query string of the form <hash_algo:hash>

    Yields:
        ctags information (dict) list if the content is found.

    """
    sha1 = _lookup_content_sha1(q)

    if not sha1:
        return None

    ctags = list(idx_storage.content_ctags_get([sha1]))
    if not ctags:
        return None

    for ctag in ctags:
        yield converters.from_swh(ctag, hashess={"id"})
示例#10
0
def test_from_swh_edge_cases_do_no_conversion_if_none_or_not_bytes():
    some_input = {
        "a": "something",
        "b": None,
        "c": "someone",
        "d": None,
        "e": None
    }

    expected_output = {
        "a": "something",
        "b": None,
        "c": "someone",
        "d": None,
        "e": None,
    }

    actual_output = converters.from_swh(some_input,
                                        hashess={"a", "b"},
                                        bytess={"c", "d"},
                                        dates={"e"})

    assert expected_output == actual_output
示例#11
0
    def test_from_swh_edge_cases_do_no_conversion_if_none_or_not_bytes(self):
        some_input = {
            'a': 'something',
            'b': None,
            'c': 'someone',
            'd': None,
            'e': None
        }

        expected_output = {
            'a': 'something',
            'b': None,
            'c': 'someone',
            'd': None,
            'e': None
        }

        actual_output = converters.from_swh(some_input,
                                            hashess={'a', 'b'},
                                            bytess={'c', 'd'},
                                            dates={'e'})

        self.assertEqual(expected_output, actual_output)
示例#12
0
def lookup_expression(expression, last_sha1, per_page):
    """Lookup expression in raw content.

    Args:
        expression (str): An expression to lookup through raw indexed
        content
        last_sha1 (str): Last sha1 seen
        per_page (int): Number of results per page

    Yields:
        ctags whose content match the expression

    """

    limit = min(per_page, MAX_LIMIT)
    ctags = idx_storage.content_ctags_search(expression,
                                             last_sha1=last_sha1,
                                             limit=limit)
    for ctag in ctags:
        ctag = converters.from_swh(ctag, hashess={"id"})
        ctag["sha1"] = ctag["id"]
        ctag.pop("id")
        yield ctag
示例#13
0
def test_from_swh():
    some_input = {
        "a": "something",
        "b": "someone",
        "c": b"sharp-0.3.4.tgz",
        "d":
        hashutil.hash_to_bytes("b04caf10e9535160d90e874b45aa426de762f19f"),
        "e": b"sharp.html/doc_002dS_005fISREG.html",
        "g": [b"utf-8-to-decode", b"another-one"],
        "h": "something filtered",
        "i": {
            "e": b"something"
        },
        "j": {
            "k": {
                "l": [b"bytes thing", b"another thingy", b""],
                "n": "don't care either",
            },
            "m": "don't care",
        },
        "o": "something",
        "p": b"foo",
        "q": {
            "extra-headers": [["a", b"intact"]]
        },
        "w": None,
        "r": {
            "p": "also intact",
            "q": "bar"
        },
        "s": {
            "timestamp": 42,
            "offset": -420,
            "negative_utc": None,
        },
        "s1": {
            "timestamp": {
                "seconds": 42,
                "microseconds": 0
            },
            "offset": -420,
            "negative_utc": None,
        },
        "s2": datetime.datetime(2013,
                                7,
                                1,
                                20,
                                0,
                                0,
                                tzinfo=datetime.timezone.utc),
        "t": None,
        "u": None,
        "v": None,
        "x": None,
    }

    expected_output = {
        "a": "something",
        "b": "someone",
        "c": "sharp-0.3.4.tgz",
        "d": "b04caf10e9535160d90e874b45aa426de762f19f",
        "e": "sharp.html/doc_002dS_005fISREG.html",
        "g": ["utf-8-to-decode", "another-one"],
        "i": {
            "e": "something"
        },
        "j": {
            "k": {
                "l": ["bytes thing", "another thingy", ""]
            }
        },
        "p": "foo",
        "q": {
            "extra-headers": [["a", "intact"]]
        },
        "w": {},
        "r": {
            "p": "also intact",
            "q": "bar"
        },
        "s": "1969-12-31T17:00:42-07:00",
        "s1": "1969-12-31T17:00:42-07:00",
        "s2": "2013-07-01T20:00:00+00:00",
        "u": {},
        "v": [],
        "x": None,
    }

    actual_output = converters.from_swh(
        some_input,
        hashess={"d", "o", "x"},
        bytess={"c", "e", "g", "l"},
        dates={"s", "s1", "s2"},
        blacklist={"h", "m", "n", "o"},
        removables_if_empty={"t"},
        empty_dict={"u"},
        empty_list={"v"},
        convert={"p", "q", "w"},
        convert_fn=converters.convert_revision_metadata,
    )

    assert expected_output == actual_output
示例#14
0
 def content_get_ctags(cls, cnt_id):
     cnt_id_bytes = hash_to_bytes(cnt_id)
     ctags = cls.idx_storage.content_ctags_get([cnt_id_bytes])
     for ctag in ctags:
         yield converters.from_swh(ctag, hashess={'id'})
示例#15
0
def test_from_swh_none():
    assert converters.from_swh(None) is None
示例#16
0
def test_from_swh_empty():
    assert {} == converters.from_swh({})
示例#17
0
    def test_from_swh(self):
        some_input = {
            'a':
            'something',
            'b':
            'someone',
            'c':
            b'sharp-0.3.4.tgz',
            'd':
            hashutil.hash_to_bytes('b04caf10e9535160d90e874b45aa426de762f19f'),
            'e':
            b'sharp.html/doc_002dS_005fISREG.html',
            'g': [b'utf-8-to-decode', b'another-one'],
            'h':
            'something filtered',
            'i': {
                'e': b'something'
            },
            'j': {
                'k': {
                    'l': [b'bytes thing', b'another thingy', b''],
                    'n': 'dont care either'
                },
                'm': 'dont care'
            },
            'o':
            'something',
            'p':
            b'foo',
            'q': {
                'extra-headers': [['a', b'intact']]
            },
            'w':
            None,
            'r': {
                'p': 'also intact',
                'q': 'bar'
            },
            's': {
                'timestamp': 42,
                'offset': -420,
                'negative_utc': None,
            },
            's1': {
                'timestamp': {
                    'seconds': 42,
                    'microseconds': 0
                },
                'offset': -420,
                'negative_utc': None,
            },
            's2':
            datetime.datetime(2013,
                              7,
                              1,
                              20,
                              0,
                              0,
                              tzinfo=datetime.timezone.utc),
            't':
            None,
            'u':
            None,
            'v':
            None,
            'x':
            None,
        }

        expected_output = {
            'a': 'something',
            'b': 'someone',
            'c': 'sharp-0.3.4.tgz',
            'd': 'b04caf10e9535160d90e874b45aa426de762f19f',
            'e': 'sharp.html/doc_002dS_005fISREG.html',
            'g': ['utf-8-to-decode', 'another-one'],
            'i': {
                'e': 'something'
            },
            'j': {
                'k': {
                    'l': ['bytes thing', 'another thingy', '']
                }
            },
            'p': 'foo',
            'q': {
                'extra-headers': [['a', 'intact']]
            },
            'w': {},
            'r': {
                'p': 'also intact',
                'q': 'bar'
            },
            's': '1969-12-31T17:00:42-07:00',
            's1': '1969-12-31T17:00:42-07:00',
            's2': '2013-07-01T20:00:00+00:00',
            'u': {},
            'v': [],
            'x': None,
        }

        actual_output = converters.from_swh(
            some_input,
            hashess={'d', 'o', 'x'},
            bytess={'c', 'e', 'g', 'l'},
            dates={'s', 's1', 's2'},
            blacklist={'h', 'm', 'n', 'o'},
            removables_if_empty={'t'},
            empty_dict={'u'},
            empty_list={'v'},
            convert={'p', 'q', 'w'},
            convert_fn=converters.convert_revision_metadata)

        self.assertEqual(expected_output, actual_output)
示例#18
0
 def test_from_swh_none(self):
     # when
     self.assertIsNone(converters.from_swh(None))
示例#19
0
 def test_from_swh_empty(self):
     # when
     self.assertEqual({}, converters.from_swh({}))
示例#20
0
 def content_get_license(self, cnt_id):
     cnt_id_bytes = hash_to_bytes(cnt_id)
     licenses = self.idx_storage.content_fossology_license_get(
         [cnt_id_bytes])
     for license in licenses:
         yield converters.from_swh(license.to_dict(), hashess={"id"})
示例#21
0
 def content_get_language(cls, cnt_id):
     lang = next(cls.idx_storage.content_language_get(
                 [hash_to_bytes(cnt_id)]))
     return converters.from_swh(lang, hashess={'id'})
示例#22
0
 def content_get_ctags(self, cnt_id):
     cnt_id_bytes = hash_to_bytes(cnt_id)
     ctags = self.idx_storage.content_ctags_get([cnt_id_bytes])
     for ctag in ctags:
         yield converters.from_swh(ctag, hashess={"id"})
示例#23
0
 def content_get_metadata(cls, cnt_id):
     cnt_id_bytes = hash_to_bytes(cnt_id)
     metadata = next(cls.storage.content_get_metadata([cnt_id_bytes]))
     return converters.from_swh(metadata,
                                hashess={'sha1', 'sha1_git', 'sha256',
                                         'blake2s256'})