def test_wrap_3(): switch = [ lambda: Random.string(20), lambda: {"i": Random.int(2000)}, lambda: Data(i=Random.int(2000)), lambda: FlatList([{"i": Random.int(2000)}]), lambda: [{"i": Random.int(2000)}] ] inputs = [switch[min(len(switch) - 1, int(floor(-log(Random.float(), 2))))]() for i in range(NUM_INPUT)] for i in range(NUM_REPEAT): results = [] gc.collect() with Profiler("more string: slow_wrap"): for v in inputs: results.append(slow_wrap(v)) results = [] gc.collect() with Profiler("more string: wrap"): for v in inputs: results.append(wrap(v)) results = [] gc.collect() with Profiler("more string: baseline"): for v in inputs: results.append(baseline(v)) Log.note("Done {{i}} of {{num}}", {"i": i, "num": NUM_REPEAT})
def output(*args): with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception, _: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = { k: v for k, v in _cache.items() if v[0] == None or v[0] > now } setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get( args, (Null, Null, Null, Null))
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = convert.json2value(suite_json) suite = convert.value2json(suite.name) line = line.replace(suite_json, suite) if rownum == 0: value = convert.json2value(line) if len(line) > 100000: value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > 100000: value = convert.json2value(line) value.result.subtests = [ s for s in value.result.subtests if s.ok is False ] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find("\"resource_usage\":") != -1: value = convert.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def seen_nonce(sender_id, nonce, timestamp): global seen key = '{id}:{nonce}:{ts}'.format( id=sender_id, nonce=nonce, ts=timestamp ) if Random.int(1000) == 0: old = (Date.now() - HOUR).unix seen = {k: v for k, v in seen.items() if v["timestamp"] >= old} if seen.get(key): return True else: seen[key] = {"timestamp": timestamp} return False
def fix(rownum, line, source, sample_only_filter, sample_size): # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}') # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED if source.name.startswith("active-data-test-result"): # "suite": {"flavor": "plain-chunked", "name": "mochitest"} found = strings.between(line, '"suite": {', '}') if found: suite_json = '{' + found + "}" if suite_json: suite = convert.json2value(suite_json) suite = convert.value2json(suite.name) line = line.replace(suite_json, suite) if rownum == 0: value = convert.json2value(line) if len(line) > 100000: value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") return row, True elif len(line) > 100000: value = convert.json2value(line) value.result.subtests = [s for s in value.result.subtests if s.ok is False] value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} elif line.find("\"resource_usage\":") != -1: value = convert.json2value(line) _id, value = _fix(value) row = {"id": _id, "value": value} else: # FAST _id = strings.between(line, "\"_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} return row, False
def copy(self, keys, source, sample_only_filter=None, sample_size=None): num_keys = 0 for key in keys: try: for rownum, line in enumerate( source.read_lines(strip_extension(key))): if rownum == 0: value = convert.json2value(line) if len(line) > 1000000: # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int( int(1.0 / coalesce( sample_size, 0.01))) != 0 and qb.filter( [value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") num_keys += 1 self.queue.add(row) break elif len(line) > 1000000: value = convert.json2value(line) # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} else: #FAST _id = strings.between(line, "_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} num_keys += 1 self.queue.add(row) except Exception, e: Log.warning("Could not get queue for {{key}}", key=key, cause=e)
def output(*args): with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception, _: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = {k: v for k, v in _cache.items() if v[0]==None or v[0] > now} setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null))
def output(*args): with cache_store.locker: if using_self: self = args[0] args = args[1:] else: self = cache_store now = Date.now() try: _cache = getattr(self, attr_name) except Exception, _: _cache = {} setattr(self, attr_name, _cache) if Random.int(100) == 0: # REMOVE OLD CACHE _cache = {k: v for k, v in _cache.items() if v[0]==None or v[0] < now} setattr(self, attr_name, _cache) timeout, key, value, exception = _cache.get(args, (Null, Null, Null, Null)) if now > timeout: value = func(self, *args) _cache[args] = (now + cache_store.timeout, args, value, None) return value if value == None: if exception == None: try: value = func(self, *args) _cache[args] = (now + cache_store.timeout, args, value, None) return value except Exception, e: e = Except.wrap(e) _cache[args] = (now + cache_store.timeout, args, None, e) raise e else: raise exception
def copy(self, keys, source, sample_only_filter=None, sample_size=None): num_keys = 0 for key in keys: try: for rownum, line in enumerate(source.read_lines(strip_extension(key))): if rownum == 0: value = convert.json2value(line) if len(line) > 1000000: # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and qb.filter([value], sample_only_filter): # INDEX etl.id==0, BUT NO MORE if value.etl.id != 0: Log.error("Expecting etl.id==0") num_keys += 1 self.queue.add(row) break elif len(line) > 1000000: value = convert.json2value(line) # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests)) value.result.subtests = None value.result.missing_subtests = True _id, value = _fix(value) row = {"id": _id, "value": value} else: #FAST _id = strings.between(line, "_id\": \"", "\"") # AVOID DECODING JSON row = {"id": _id, "json": line} num_keys += 1 self.queue.add(row) except Exception, e: Log.warning("Could not get queue for {{key}}", key=key, cause=e)