Exemplo n.º 1
0
def test_wrap_2():
    Log.alert("Random types")
    switch = [
        lambda: Random.int(20),
        lambda: Random.string(20),
        lambda: {"i": Random.int(2000)},
        lambda: Data(i=Random.int(2000)),
        lambda: FlatList([{"i": Random.int(2000)}]),
        lambda: [{"i": Random.int(2000)}]
    ]

    inputs = [switch[min(len(switch) - 1, int(floor(-log(Random.float(), 2))))]() for i in range(NUM_INPUT)]

    for i in range(NUM_REPEAT):
        results = []
        gc.collect()
        with Timer("more string: to_data"):
            for v in inputs:
                results.append(to_data(v))

        results = []
        gc.collect()
        with Timer("more string: baseline"):
            for v in inputs:
                results.append(baseline(v))

        Log.note("Done {{i}} of {{num}}", i=i, num=NUM_REPEAT)
Exemplo n.º 2
0
    def _get_from_elasticsearch(self, revision, locale=None, get_diff=False):
        rev = revision.changeset.id
        if self.es.cluster.version.startswith("1.7."):
            query = {
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and": [
                        {"term": {"changeset.id12": rev[0:12]}},
                        {"term": {"branch.name": revision.branch.name}},
                        {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}
                }},
                "size": 2000
            }
        else:
            query = {
                "query": {"bool": {"must": [
                    {"term": {"changeset.id12": rev[0:12]}},
                    {"term": {"branch.name": revision.branch.name}},
                    {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                    {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                ]}},
                "size": 2000
            }

        for attempt in range(3):
            try:
                with self.es_locker:
                    docs = self.es.search(query).hits.hits
                break
            except Exception as e:
                e = Except.wrap(e)
                if "NodeNotConnectedException" in e:
                    # WE LOST A NODE, THIS MAY TAKE A WHILE
                    (Till(seconds=Random.int(5 * 60))).wait()
                    continue
                elif "EsRejectedExecutionException[rejected execution (queue capacity" in e:
                    (Till(seconds=Random.int(30))).wait()
                    continue
                else:
                    Log.warning("Bad ES call, fall back to TH", cause=e)
                    return None

        best = docs[0]._source
        if len(docs) > 1:
            for d in docs:
                if d._id.endswith(d._source.branch.locale):
                    best = d._source
            Log.warning("expecting no more than one document")

        if not GET_DIFF and not get_diff:
            return best
        elif best.changeset.diff:
            return best
        elif not best.changeset.files:
            return best  # NOT EXPECTING A DIFF, RETURN IT ANYWAY
        else:
            return None
Exemplo n.º 3
0
def fix(source_key, rownum, line, source, sample_only_filter, sample_size):
    """
    :param rownum:
    :param line:
    :param source:
    :param sample_only_filter:
    :param sample_size:
    :return:  (row, no_more_data) TUPLE WHERE row IS {"value":<data structure>} OR {"json":<text line>}
    """
    value = json2value(line)

    if rownum == 0:
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(source_key, value, source)
        value = _fix(value)
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            row = {"value": value}
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        _shorten(source_key, value, source)
        value = _fix(value)
    elif '"resource_usage":' in line:
        value = _fix(value)

    row = {"value": value}
    return row, False
Exemplo n.º 4
0
def fix(source_key, rownum, line, source, sample_only_filter, sample_size):
    """
    :param rownum:
    :param line:
    :param source:
    :param sample_only_filter:
    :param sample_size:
    :return:  (row, no_more_data) TUPLE WHERE row IS {"value":<data structure>} OR {"json":<text line>}
    """
    value = json2value(line)

    if rownum == 0:
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(source_key, value, source)
        value = _fix(value)
        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            row = {"value": value}
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        _shorten(source_key, value, source)
        value = _fix(value)
    elif '"resource_usage":' in line:
        value = _fix(value)

    row = {"value": value}
    return row, False
Exemplo n.º 5
0
    def output(*args):
        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception, _:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {
                    k: v
                    for k, v in _cache.items() if v[0] == None or v[0] > now
                }
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(
                args, (Null, Null, Null, Null))
Exemplo n.º 6
0
    def test_compare_isinstance_to_text(self):
        num = 1 * 1000 * 1000
        options = {
            0: lambda: 6,
            1: lambda: "string"
            # 2: lambda: {},
            # 3: lambda: Data(),
            # 4: lambda: Null,
        }
        data = [options[Random.int(len(options))]() for _ in range(num)]

        with Timer("isinstance check") as i_time:
            i_result = [isinstance(d, text) for d in data]

        with Timer("set check") as s_time:
            s_result = [d.__class__ in (text, ) for d in data]

        with Timer("eq check") as e_time:
            e_result = [d.__class__ is text for d in data]

        with Timer("name check") as n_time:
            n_result = [is_instance(d, text) for d in data]

        with Timer("check w method") as m_time:
            m_result = [is_text(d) for d in data]

        self.assertEqual(s_result, i_result)
        self.assertEqual(m_result, i_result)
        self.assertEqual(e_result, i_result)
        self.assertEqual(n_result, i_result)

        self.assertGreater(i_time.duration, s_time.duration)
        self.assertGreater(m_time.duration, s_time.duration)
Exemplo n.º 7
0
    def output(*args, **kwargs):
        if kwargs:
            Log.error(
                "Sorry, caching only works with ordered parameter, not keyword arguments"
            )

        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {
                    k: v
                    for k, v in _cache.items()
                    if v.timeout == None or v.timeout > now
                }
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(
                args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = CacheElement(now + cache_store.timeout, args,
                                            value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout,
                                                    args, value, None)
                    return value
                except Exception as e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout,
                                                    args, None, e)
                    raise e
            else:
                raise exception
        else:
            return value
Exemplo n.º 8
0
    def __enter__(self):
        if self.sample and Random.int(100) == 0:
            _Log.warning("acquire  lock {{name|quote}}", name=self.name)

        self.debug and _Log.note("acquire  lock {{name|quote}}",
                                 name=self.name)
        self.lock.acquire()
        self.debug and _Log.note("acquired lock {{name|quote}}",
                                 name=self.name)
        return self
Exemplo n.º 9
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}',
                        '{"code": "bb"}').replace('{"id": "tc"}',
                                                  '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = mo_json.json2value(suite_json)
                suite = convert.value2json(coalesce(suite.fullname,
                                                    suite.name))
                line = line.replace(suite_json, suite)

    if source.name.startswith("active-data-codecoverage"):
        d = convert.json2value(line)
        if d.source.file.total_covered > 0:
            return {"id": d._id, "json": line}, False
        else:
            return None, False

    if rownum == 0:
        value = mo_json.json2value(line)
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        value = mo_json.json2value(line)
        _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find('"resource_usage":') != -1:
        value = mo_json.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Exemplo n.º 10
0
    def _get_from_elasticsearch(self, revision, locale=None, get_diff=False, get_moves=True):
        rev = revision.changeset.id
        if self.es.cluster.version.startswith("1.7."):
            query = {
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and": [
                        {"term": {"changeset.id12": rev[0:12]}},
                        {"term": {"branch.name": revision.branch.name}},
                        {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                        {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                    ]}
                }},
                "size": 20
            }
        else:
            query = {
                "query": {"bool": {"must": [
                    {"term": {"changeset.id12": rev[0:12]}},
                    {"term": {"branch.name": revision.branch.name}},
                    {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}},
                    {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}}
                ]}},
                "size": 20
            }

        for attempt in range(3):
            try:
                with self.es_locker:
                    docs = self.es.search(query).hits.hits
                if len(docs) == 0:
                    return None
                best = docs[0]._source
                if len(docs) > 1:
                    for d in docs:
                        if d._id.endswith(d._source.branch.locale):
                            best = d._source
                    Log.warning("expecting no more than one document")
                return best
            except Exception as e:
                e = Except.wrap(e)
                if "EsRejectedExecutionException[rejected execution (queue capacity" in e:
                    (Till(seconds=Random.int(30))).wait()
                    continue
                else:
                    Log.warning("Bad ES call, waiting for {{num}} seconds", num=WAIT_AFTER_NODE_FAILURE, cause=e)
                    Till(seconds=WAIT_AFTER_NODE_FAILURE).wait()
                    continue

        Log.warning("ES did not deliver, fall back to HG")
        return None
Exemplo n.º 11
0
    def output(*args):
        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {
                    k: v
                    for k, v in _cache.items() if v[0] == None or v[0] > now
                }
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(
                args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = (now + cache_store.timeout, args, value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, value,
                                        None)
                    return value
                except Exception as e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = (now + cache_store.timeout, args, None,
                                        e)
                    raise e
            else:
                raise exception
        else:
            return value
Exemplo n.º 12
0
def test_wrap_3():
    switch = [
        lambda: Random.string(20), lambda: {
            "i": Random.int(2000)
        }, lambda: Data(i=Random.int(2000)),
        lambda: FlatList([{
            "i": Random.int(2000)
        }]), lambda: [{
            "i": Random.int(2000)
        }]
    ]

    inputs = [
        switch[min(len(switch) - 1, int(floor(-log(Random.float(), 2))))]()
        for i in range(NUM_INPUT)
    ]

    for i in range(NUM_REPEAT):
        results = []
        gc.collect()
        with Profiler("more string: slow_wrap"):
            for v in inputs:
                results.append(slow_wrap(v))

        results = []
        gc.collect()
        with Profiler("more string: wrap"):
            for v in inputs:
                results.append(wrap(v))

        results = []
        gc.collect()
        with Profiler("more string: baseline"):
            for v in inputs:
                results.append(baseline(v))

        Log.note("Done {{i}} of {{num}}", {"i": i, "num": NUM_REPEAT})
Exemplo n.º 13
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    value = json2value(line)

    if value._id.startswith(("tc.97", "96", "bb.27")):
        # AUG 24, 25 2017 - included full diff with repo; too big to index
        try:
            data = json2value(line)
            repo = data.repo
            repo.etl = None
            repo.branch.last_used = None
            repo.branch.description = None
            repo.branch.etl = None
            repo.branch.parent_name = None
            repo.children = None
            repo.parents = None
            if repo.changeset.diff or data.build.repo.changeset.diff:
                Log.error("no diff allowed")
            else:
                assertAlmostEqual(minimize_repo(repo), repo)
        except Exception as e:
            if CAN_NOT_DECODE_JSON in e:
                raise e
            data.repo = minimize_repo(repo)
            data.build.repo = minimize_repo(data.build.repo)
            line = value2json(data)
    else:
        pass

    if rownum == 0:
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        value = _fix(value)
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            row = {"value": value}
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        _shorten(value, source)
        value = _fix(value)
    elif line.find('"resource_usage":') != -1:
        value = _fix(value)

    row = {"value": value}
    return row, False
Exemplo n.º 14
0
    def output(*args, **kwargs):
        if kwargs:
            Log.error("Sorry, caching only works with ordered parameter, not keyword arguments")

        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {k: v for k, v in _cache.items() if v.timeout == None or v.timeout > now}
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null))

        if now >= timeout:
            value = func(self, *args)
            with cache_store.locker:
                _cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
            return value

        if value == None:
            if exception == None:
                try:
                    value = func(self, *args)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout, args, value, None)
                    return value
                except Exception as e:
                    e = Except.wrap(e)
                    with cache_store.locker:
                        _cache[args] = CacheElement(now + cache_store.timeout, args, None, e)
                    raise e
            else:
                raise exception
        else:
            return value
Exemplo n.º 15
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = mo_json.json2value(suite_json)
                suite = convert.value2json(coalesce(suite.fullname, suite.name))
                line = line.replace(suite_json, suite)

    if rownum == 0:
        value = mo_json.json2value(line)
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        value = mo_json.json2value(line)
        _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find('"resource_usage":') != -1:
        value = mo_json.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Exemplo n.º 16
0
    def output(*args):
        with cache_store.locker:
            if using_self:
                self = args[0]
                args = args[1:]
            else:
                self = cache_store

            now = Date.now()
            try:
                _cache = getattr(self, attr_name)
            except Exception, _:
                _cache = {}
                setattr(self, attr_name, _cache)

            if Random.int(100) == 0:
                # REMOVE OLD CACHE
                _cache = {k: v for k, v in _cache.items() if v[0]==None or v[0] > now}
                setattr(self, attr_name, _cache)

            timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null))
Exemplo n.º 17
0
    def test_compare_isinstance_to_class_checks(self):
        num = 1 * 1000 * 1000
        options = {
            0: lambda: {},
            1: lambda: Data(),
            2: lambda: Null,
            3: lambda: 6,
            4: lambda: "string",
        }
        data = [options[Random.int(len(options))]() for _ in range(num)]

        with Timer("isinstance check") as i_time:
            i_result = [isinstance(d, Mapping) for d in data]

        with Timer("set check") as s_time:
            s_result = [d.__class__ in MAPPING_TYPES for d in data]

        with Timer("eq check") as e_time:
            e_result = [
                d.__class__ is Data or d.__class__ is dict for d in data
            ]

        with Timer("name check") as n_time:
            n_result = [
                is_instance(d, Data) or is_instance(d, dict) for d in data
            ]

        with Timer("check w method") as m_time:
            m_result = [is_mapping(d) for d in data]

        self.assertEqual(s_result, i_result)
        self.assertEqual(m_result, i_result)
        self.assertEqual(e_result, i_result)
        self.assertEqual(n_result, i_result)

        self.assertGreater(i_time.duration, s_time.duration)
        self.assertGreater(m_time.duration, s_time.duration)
Exemplo n.º 18
0
    def _get_from_elasticsearch(
            self,
            revision,
            locale=None,
            get_diff=False,
            get_moves=True,
            after=None,  # RETURN RECORDS ETLed AFTER GIVEN TIME
    ):
        """
        MAKE CALL TO ES
        """
        rev = revision.changeset.id
        if self.repo.cluster.version.startswith("1.7."):
            query = {
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": {
                            "and": [
                                {
                                    "term": {
                                        "changeset.id12": rev[0:12]
                                    }
                                },
                                {
                                    "term": {
                                        "branch.name": revision.branch.name
                                    }
                                },
                                {
                                    "term": {
                                        "branch.locale":
                                        coalesce(
                                            locale,
                                            revision.branch.locale,
                                            DEFAULT_LOCALE,
                                        )
                                    }
                                },
                                {
                                    "range": {
                                        "etl.timestamp": {
                                            "gt": Date.max(after, MIN_ETL_AGE)
                                        }
                                    }
                                },
                            ]
                        },
                    }
                },
                "size": 20,
            }
        else:
            query = {
                "query": {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "changeset.id12": rev[0:12]
                                }
                            },
                            {
                                "term": {
                                    "branch.name": revision.branch.name
                                }
                            },
                            {
                                "term": {
                                    "branch.locale":
                                    coalesce(locale, revision.branch.locale,
                                             DEFAULT_LOCALE)
                                }
                            },
                            {
                                "range": {
                                    "etl.timestamp": {
                                        "gt": Date.max(after, MIN_ETL_AGE)
                                    }
                                }
                            },
                        ]
                    }
                },
                "size": 20,
            }

        for attempt in range(3):
            try:
                if get_moves:
                    with self.moves_locker:
                        docs = self.moves.search(query).hits.hits
                else:
                    with self.repo_locker:
                        docs = self.repo.search(query).hits.hits
                if len(docs) == 0:
                    return None
                best = docs[0]._source
                if len(docs) > 1:
                    for d in docs:
                        if d._id.endswith(d._source.branch.locale):
                            best = d._source
                    Log.warning("expecting no more than one document")
                return best
            except Exception as e:
                e = Except.wrap(e)
                if ("EsRejectedExecutionException[rejected execution (queue capacity"
                        in e):
                    (Till(seconds=Random.int(30))).wait()
                    continue
                else:
                    Log.warning(
                        "Bad ES call, waiting for {{num}} seconds",
                        num=WAIT_AFTER_NODE_FAILURE,
                        cause=e,
                    )
                    Till(seconds=WAIT_AFTER_NODE_FAILURE).wait()
                    continue

        Log.warning("ES did not deliver, fall back to HG")
        return None