示例#1
0
    def test_diff_to_json(self):
        j1 = diff_to_json(File("tests/resources/diff1.patch").read())
        j2 = diff_to_json(File("tests/resources/diff2.patch").read())

        e1 = File("tests/resources/diff1.json").read_json(flexible=False,
                                                          leaves=False)
        e2 = File("tests/resources/diff2.json").read_json(flexible=False,
                                                          leaves=False)
        self.assertEqual(j1, e1)
        self.assertEqual(j2, e2)
示例#2
0
    def test_small_changeset_to_json(self):
        small_patch_file = File("tests/resources/small.patch")

        j1 = diff_to_json(small_patch_file.read_bytes().decode(
            "utf8", "replace"))
        expected = File("tests/resources/small.json").read_json(flexible=False,
                                                                leaves=False)
        self.assertEqual(j1, expected)
示例#3
0
    def test_big_changeset_to_json(self):
        big_patch_file = File("tests/resources/big.patch")
        # big_patch_file.write_bytes(http.get("https://hg.mozilla.org/mozilla-central/raw-rev/e5693cea1ec944ca077c7a46c5f127c828a90f1b").content)
        self.assertEqual(b'\r'.decode('utf8', 'replace'), u'\r')

        j1 = diff_to_json(big_patch_file.read_bytes().decode(
            "utf8", "replace"))
        expected = File("tests/resources/big.json").read_json(flexible=False,
                                                              leaves=False)
        self.assertEqual(j1, expected)
示例#4
0
        def inner(changeset_id):
            if self.es.cluster.version.startswith("1.7."):
                query = {
                    "query": {"filtered": {
                        "query": {"match_all": {}},
                        "filter": {"and": [
                            {"prefix": {"changeset.id": changeset_id}},
                            {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                        ]}
                    }},
                    "size": 1
                }
            else:
                query = {
                    "query": {"bool": {"must": [
                        {"prefix": {"changeset.id": changeset_id}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}},
                    "size": 1
                }

            try:
                # ALWAYS TRY ES FIRST
                with self.es_locker:
                    response = self.es.search(query)
                    json_diff = response.hits.hits[0]._source.changeset.diff
                if json_diff:
                    return json_diff
            except Exception as e:
                pass

            url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id})
            if DEBUG:
                Log.note("get unified diff from {{url}}", url=url)
            try:
                response = http.get(url)
                diff = response.content.decode("utf8", "replace")
                json_diff = diff_to_json(diff)
                num_changes = _count(c for f in json_diff for c in f.changes)
                if json_diff:
                    if num_changes < MAX_DIFF_SIZE:
                        return json_diff
                    elif revision.changeset.description.startswith("merge "):
                        return None  # IGNORE THE MERGE CHANGESETS
                    else:
                        Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes)
                        for file in json_diff:
                            file.changes = None
                        return json_diff
            except Exception as e:
                Log.warning("could not get unified diff", cause=e)
示例#5
0
        def inner(changeset_id):
            if self.es.cluster.version.startswith("1.7."):
                query = {
                    "query": {"filtered": {
                        "query": {"match_all": {}},
                        "filter": {"and": [
                            {"prefix": {"changeset.id": changeset_id}},
                            {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                        ]}
                    }},
                    "size": 1
                }
            else:
                query = {
                    "query": {"bool": {"must": [
                        {"prefix": {"changeset.id": changeset_id}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}},
                    "size": 1
                }

            try:
                # ALWAYS TRY ES FIRST
                with self.es_locker:
                    response = self.es.search(query)
                    json_diff = response.hits.hits[0]._source.changeset.diff
                if json_diff:
                    return json_diff
            except Exception as e:
                pass

            url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id})
            DEBUG and Log.note("get unified diff from {{url}}", url=url)
            try:
                response = http.get(url)
                diff = response.content.decode("utf8")
                json_diff = diff_to_json(diff)
                num_changes = _count(c for f in json_diff for c in f.changes)
                if json_diff:
                    if revision.changeset.description.startswith("merge "):
                        return None  # IGNORE THE MERGE CHANGESETS
                    elif num_changes < MAX_DIFF_SIZE:
                        return json_diff
                    else:
                        Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes)
                        for file in json_diff:
                            file.changes = None
                        return json_diff
            except Exception as e:
                Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
示例#6
0
        def inner(changeset_id):
            # ALWAYS TRY ES FIRST
            json_diff = _get_changeset_from_es(self.repo,
                                               changeset_id).changeset.diff
            if json_diff:
                return json_diff
            url = URL(revision.branch.url) / "raw-rev" / changeset_id
            DEBUG and Log.note("get unified diff from {{url}}", url=url)
            try:
                response = http.get(url)
                try:
                    diff = response.content.decode("utf8")
                except Exception as e:
                    diff = response.content.decode("latin1")

                # File("tests/resources/big.patch").write_bytes(response.content)
                json_diff = diff_to_json(diff)
                num_changes = _count(c for f in json_diff for c in f.changes)
                if json_diff:
                    if (IGNORE_MERGE_DIFFS
                            and revision.changeset.description.startswith(
                                "merge ")):
                        return None  # IGNORE THE MERGE CHANGESETS
                    elif num_changes < MAX_DIFF_SIZE:
                        return json_diff
                    else:
                        Log.warning(
                            "Revision at {{url}} has a diff with {{num}} changes, ignored",
                            url=url,
                            num=num_changes,
                        )
                        for file in json_diff:
                            file.changes = None
                        return json_diff
            except Exception as e:
                Log.warning("could not get unified diff from {{url}}",
                            url=url,
                            cause=e)
示例#7
0
 def test_big_changeset_to_json(self):
     j1 = diff_to_json(File("tests/resources/big.patch").read())
     expected = File("tests/resources/big.json").read_json(flexible=False,
                                                           leaves=False)
     self.assertEqual(j1, expected)