Пример #1
0
    def set_refresh_interval(self, seconds):
        if seconds <= 0:
            interval = -1
        else:
            interval = unicode(seconds) + "s"

        if self.cluster.version.startswith("0.90."):
            response = self.cluster.put(
                "/" + self.settings.index + "/_settings",
                data='{"index":{"refresh_interval":' + convert.value2json(interval) + '}}'
            )

            result = convert.json2value(utf82unicode(response.all_content))
            if not result.ok:
                Log.error("Can not set refresh interval ({{error}})", {
                    "error": utf82unicode(response.all_content)
                })
        elif any(map(self.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])):
            response = self.cluster.put(
                "/" + self.settings.index + "/_settings",
                data=convert.unicode2utf8('{"index":{"refresh_interval":' + convert.value2json(interval) + '}}')
            )

            result = convert.json2value(utf82unicode(response.all_content))
            if not result.acknowledged:
                Log.error("Can not set refresh interval ({{error}})", {
                    "error": utf82unicode(response.all_content)
                })
        else:
            Log.error("Do not know how to handle ES version {{version}}", version=self.cluster.version)
Пример #2
0
    def set_refresh_interval(self, seconds):
        if seconds <= 0:
            interval = -1
        else:
            interval = unicode(seconds) + "s"

        if self.cluster.version.startswith("0.90."):
            response = self.cluster.put(
                "/" + self.settings.index + "/_settings",
                data='{"index":{"refresh_interval":' + convert.value2json(interval) + '}}'
            )

            result = convert.json2value(utf82unicode(response.all_content))
            if not result.ok:
                Log.error("Can not set refresh interval ({{error}})", {
                    "error": utf82unicode(response.all_content)
                })
        elif any(map(self.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])):
            response = self.cluster.put(
                "/" + self.settings.index + "/_settings",
                data=convert.unicode2utf8('{"index":{"refresh_interval":' + convert.value2json(interval) + '}}')
            )

            result = convert.json2value(utf82unicode(response.all_content))
            if not result.acknowledged:
                Log.error("Can not set refresh interval ({{error}})", {
                    "error": utf82unicode(response.all_content)
                })
        else:
            Log.error("Do not know how to handle ES version {{version}}", version=self.cluster.version)
Пример #3
0
def process_unittest_in_s3(source_key,
                           source,
                           destination,
                           resources,
                           please_stop=None):
    lines = source.read_lines()

    etl_header = convert.json2value(lines[0])

    # FIX ETL IDS
    e = etl_header
    while e:
        if isinstance(e.id, basestring):
            e.id = int(e.id.split(":")[0])
        e = e.source

    bb_summary = transform_buildbot(convert.json2value(lines[1]),
                                    resources=resources)
    unittest_log = lines[2:]
    return process_unittest(source_key,
                            etl_header,
                            bb_summary,
                            unittest_log,
                            destination,
                            please_stop=please_stop)
Пример #4
0
    def create_index(
        self,
        index,
        alias=None,
        create_timestamp=None,
        schema=None,
        limit_replicas=None,
        read_only=False,
        tjson=False,
        settings=None
    ):
        if not alias:
            alias = settings.alias = settings.index
            index = settings.index = proto_name(alias, create_timestamp)

        if settings.alias == index:
            Log.error("Expecting index name to conform to pattern")

        if settings.schema_file:
            Log.error('schema_file attribute not supported.  Use {"$ref":<filename>} instead')

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = convert.json2value(schema, leaves=True)
        else:
            schema = convert.json2value(convert.value2json(schema), leaves=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to= health.number_of_nodes - 1
                )
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self.post(
            "/" + index,
            data=schema,
            headers={"Content-Type": "application/json"}
        )

        # CONFIRM INDEX EXISTS
        while True:
            try:
                state = self.get("/_cluster/state", retry={"times": 5}, timeout=3)
                if index in state.metadata.indices:
                    break
                Log.note("Waiting for index {{index}} to appear", index=index)
            except Exception, e:
                Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e)
            Thread.sleep(seconds=1)
Пример #5
0
    def create_index(
        self,
        index,
        alias=None,
        create_timestamp=None,
        schema=None,
        limit_replicas=None,
        read_only=False,
        tjson=False,
        settings=None
    ):
        if not alias:
            alias = settings.alias = settings.index
            index = settings.index = proto_name(alias, create_timestamp)

        if settings.alias == index:
            Log.error("Expecting index name to conform to pattern")

        if settings.schema_file:
            Log.error('schema_file attribute not supported.  Use {"$ref":<filename>} instead')

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = convert.json2value(schema, leaves=True)
        else:
            schema = convert.json2value(convert.value2json(schema), leaves=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to= health.number_of_nodes - 1
                )
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self.post(
            "/" + index,
            data=schema,
            headers={"Content-Type": "application/json"}
        )

        # CONFIRM INDEX EXISTS
        while True:
            try:
                state = self.get("/_cluster/state", retry={"times": 5}, timeout=3)
                if index in state.metadata.indices:
                    break
                Log.note("Waiting for index {{index}} to appear", index=index)
            except Exception, e:
                Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e)
            Thread.sleep(seconds=1)
Пример #6
0
    def create_index(
        self,
        index,
        alias=None,
        schema=None,
        limit_replicas=None,
        read_only=False,
        tjson=False,
        settings=None
    ):
        if not settings.alias:
            settings.alias = settings.index
            settings.index = proto_name(settings.alias)

        if settings.alias == settings.index:
            Log.error("Expecting index name to conform to pattern")

        if settings.schema_file:
            Log.error('schema_file attribute not supported.  Use {"$ref":<filename>} instead')

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = convert.json2value(schema, leaves=True)
        else:
            schema = convert.json2value(convert.value2json(schema), leaves=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to= health.number_of_nodes - 1
                )
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self.post(
            "/" + settings.index,
            data=schema,
            headers={"Content-Type": "application/json"}
        )
        while True:
            time.sleep(1)
            try:
                self.head("/" + settings.index)
                break
            except Exception:
                Log.note("{{index}} does not exist yet", index=settings.index)

        es = Index(settings=settings)
        return es
Пример #7
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}',
                        '{"code": "bb"}').replace('{"id": "tc"}',
                                                  '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = convert.json2value(suite_json)
                suite = convert.value2json(suite.name)
                line = line.replace(suite_json, suite)

    if rownum == 0:
        value = convert.json2value(line)
        if len(line) > 100000:
            value.result.subtests = [
                s for s in value.result.subtests if s.ok is False
            ]
            value.result.missing_subtests = True

        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > 100000:
        value = convert.json2value(line)
        value.result.subtests = [
            s for s in value.result.subtests if s.ok is False
        ]
        value.result.missing_subtests = True
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find("\"resource_usage\":") != -1:
        value = convert.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Пример #8
0
    def test_request(self):
        # SIMPLEST POSSIBLE REQUEST (NOTHING IMPORTANT HAPPENING)
        data = {
            "meta": {
                "suite": "sessionrestore_no_auto_restore osx-10-10",
                "platform": "osx-10-10",
                "e10s": False,
                "och": "opt",
                "bucket": "startup",
                "statistic": "mean"
            },
            "header": ["rownum", "timestamp", "revision", "value"],
            "data": [
                [1, "2015-12-06 09:21:15", "18339318", 879],
                [2, "2015-12-06 16:50:36", "18340858", 976],
                [3, "2015-12-06 19:01:54", "18342319", 880],
                [4, "2015-12-06 21:08:56", "18343567", 1003],
                [5, "2015-12-06 23:33:27", "18345266", 1002],
                [6, "2015-12-07 02:16:22", "18347807", 977],
                [7, "2015-12-07 02:18:29", "18348057", 1035],
                [8, "2015-12-07 04:51:52", "18351263", 1032],
                [9, "2015-12-07 05:29:42", "18351078", 1035],
                [10, "2015-12-07 05:50:37", "18351749", 1010]
            ]
        }

        response = requests.post(settings.url, json=data)
        self.assertEqual(response.status_code, 200)
        data = convert.json2value(convert.utf82unicode(response.content))
        self.assertEqual(data, {})
Пример #9
0
    def delete_index(self, index_name):
        if not isinstance(index_name, unicode):
            Log.error("expecting an index name")

        if self.debug:
            Log.note("Deleting index {{index}}", index=index_name)

        # REMOVE ALL ALIASES TOO
        aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
        if aliases:
            self.post(
                path="/_aliases",
                data={"actions": [{"remove": a} for a in aliases]}
            )

        url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name
        try:
            response = http.delete(url)
            if response.status_code != 200:
                Log.error("Expecting a 200, got {{code}}", code=response.status_code)
            details = convert.json2value(utf82unicode(response.content))
            if self.debug:
                Log.note("delete response {{response}}", response=details)
            return response
        except Exception, e:
            Log.error("Problem with call to {{url}}", url=url, cause=e)
def process(source_key, source, destination, resources, please_stop=None):
    lines = source.read_lines()

    etl_header = convert.json2value(lines[0])
    if etl_header.etl:
        start = 0
    elif etl_header.locale or etl_header._meta:
        start = 0
    else:
        start = 1

    keys = []
    records = []
    stats = Dict()
    for i, line in enumerate(lines[start:]):
        pulse_record = Null
        try:
            pulse_record = scrub_pulse_record(source_key, i, line, stats)
            if not pulse_record:
                continue

            with Profiler("transform_buildbot"):
                record = transform_buildbot(pulse_record.payload, resources=resources)
                record.etl = {
                    "id": i,
                    "source": pulse_record.etl,
                    "type": "join",
                    "revision": get_git_revision()
                }
            key = etl2key(record.etl)
            keys.append(key)
            records.append({"id": key, "value": record})
        except Exception, e:
            Log.warning("Problem with pulse payload {{pulse|json}}", pulse=pulse_record.payload, cause=e)
Пример #11
0
def process_test_result(source_key, source, destination, please_stop=None):
    path = key2path(source_key)
    destination.delete({"and": [
        {"term": {"etl.source.id": path[1]}},
        {"term": {"etl.source.source.id": path[0]}}
    ]})

    lines = source.read_lines()

    keys = []
    data = []
    for l in lines:
        record = convert.json2value(l)
        if record._id==None:
            continue
        record.result.crash_result = None  #TODO: Remove me after May 2015
        keys.append(record._id)
        data.append({
            "id": record._id,
            "value": record
        })
        record._id = None
    if data:
        try:
            destination.extend(data)
        except Exception, e:
            if "Can not decide on index by build.date" in e:
                if source.bucket.name == "ekyle-test-result":
                    # KNOWN CORRUPTION
                    # TODO: REMOVE LATER (today = Mar2015)
                    delete_list = source.bucket.keys(prefix=key_prefix(source_key))
                    for d in delete_list:
                        source.bucket.delete_key(d)
            Log.error("Can not add to sink", e)
Пример #12
0
 def __init__(self,
              host,
              index,
              type="query",
              max_size=10,
              batch_size=10,
              kwargs=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     es = Cluster(kwargs).get_or_create_index(schema=convert.json2value(
         convert.value2json(SCHEMA), leaves=True),
                                              limit_replicas=True,
                                              kwargs=kwargs)
     #ENSURE THE TYPE EXISTS FOR PROBING
     try:
         es.add({
             "id": "dummy",
             "value": {
                 "hash": "dummy",
                 "create_time": Date.now(),
                 "last_used": Date.now(),
                 "query": {}
             }
         })
     except Exception, e:
         Log.warning("Problem saving query", cause=e)
Пример #13
0
 def __init__(self, settings):
     self.settings = wrap({"host":"fake", "index":"fake"})
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except IOError:
         self.data = Dict()
Пример #14
0
def decrypt(data, _key):
    """
    ACCEPT JSON OF ENCRYPTED DATA  {"salt":s, "length":l, "data":d}
    """
    from pyLibrary.queries import jx

    # Key and iv have not been generated or provided, bail out
    if _key is None:
        Log.error("Expecting a key")

    _input = convert.json2value(data)

    # Initialize encryption using key and iv
    key_expander_256 = key_expander.KeyExpander(256)
    expanded_key = key_expander_256.expand(_key)
    aes_cipher_256 = aes_cipher.AESCipher(expanded_key)
    aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16)
    aes_cbc_256.set_iv(convert.base642bytearray(_input.salt))

    raw = convert.base642bytearray(_input.data)
    out_data = bytearray()
    for _, e in jx.groupby(raw, size=16):
        out_data.extend(aes_cbc_256.decrypt_block(e))

    return str(out_data[:_input.length:]).decode("utf8")
Пример #15
0
    def test_request(self):
        # MAKE SOME DATA
        data = {
            "constant": "this is a test",
            "random-data": convert.bytes2base64(Random.bytes(100))
        }

        client = Client(settings.url, unwrap(settings.hawk))  # unwrap() DUE TO BUG https://github.com/kumar303/mohawk/issues/21
        link, id = client.send(data)
        Log.note("Success!  Located at {{link}} id={{id}}", link=link, id=id)

        # FILL THE REST OF THE FILE
        Log.note("Add ing {{num}} more...", num=99-id)
        for i in range(id + 1, storage.BATCH_SIZE):
            l, k = client.send(data)
            if l != link:
                Log.error("Expecting rest of data to have same link")

        # TEST LINK HAS DATA
        raw_content = requests.get(link).content
        content = convert.zip2bytes(raw_content)
        for line in convert.utf82unicode(content).split("\n"):
            data = convert.json2value(line)
            if data.etl.id == id:
                Log.note("Data {{id}} found", id=id)
                break
        else:
            Log.error("Expecting to find data at link")
Пример #16
0
 def __init__(self, filename, host="fake", index="fake", settings=None):
     self.settings = settings
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except Exception:
         self.data = Dict()
Пример #17
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    c = response.all_content
    return convert.json2value(convert.utf82unicode(c))
Пример #18
0
def decrypt(data, _key):
    """
    ACCEPT JSON OF ENCRYPTED DATA  {"salt":s, "length":l, "data":d}
    """
    from pyLibrary.queries import jx

    # Key and iv have not been generated or provided, bail out
    if _key is None:
        Log.error("Expecting a key")

    _input = convert.json2value(data)

    # Initialize encryption using key and iv
    key_expander_256 = key_expander.KeyExpander(256)
    expanded_key = key_expander_256.expand(_key)
    aes_cipher_256 = aes_cipher.AESCipher(expanded_key)
    aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16)
    aes_cbc_256.set_iv(convert.base642bytearray(_input.salt))

    raw = convert.base642bytearray(_input.data)
    out_data = bytearray()
    for _, e in jx.groupby(raw, size=16):
        out_data.extend(aes_cbc_256.decrypt_block(e))

    return str(out_data[:_input.length:]).decode("utf8")
Пример #19
0
def get_active_data(settings):
    query = {
    "limit": 100000,
    "from": "unittest",
    "where": {"and": [
        {"eq": {"result.ok": False}},
        {"gt": {"run.timestamp": RECENT.milli}}
    ]},
    "select": [
        "result.ok",
        "build.branch",
        "build.platform",
        "build.release",
        "build.revision",
        "build.type",
        "build.revision",
        "build.date",
        "run.timestamp",
        "run.suite",
        "run.chunk",
        "result.test",
        "run.stats.status.test_status"
    ],
    "format": "table"
    }
    result = http.post("http://activedata.allizom.org/query", data=convert.unicode2utf8(convert.value2json(query)))

    query_result = convert.json2value(convert.utf82unicode(result.all_content))

    tab = convert.table2tab(query_result.header, query_result.data)
    File(settings.output.activedata).write(tab)
Пример #20
0
    def test_branch_count(self):
        if self.not_real_service():
            return

        test = wrap({"query": {
            "from": {
                "type": "elasticsearch",
                "settings": {
                    "host": ES_CLUSTER_LOCATION,
                    "index": "unittest",
                    "type": "test_result"
                }
            },
            "select": [
                {"aggregate": "count"},
            ],
            "edges": [
                "build.branch"
            ],
            "where": {"or": [
                {"missing": "build.id"}
                # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}}
            ]},
            "format": "table"
        }})

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Пример #21
0
    def pop(self, wait=SECOND, till=None):
        m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not m:
            return None

        self.pending.append(m)
        return convert.json2value(m.get_body())
Пример #22
0
def accumulate_logs(source_key, file_name, lines, please_stop):
    accumulator = LogSummary()
    for line in lines:
        if please_stop:
            Log.error(
                "Shutdown detected.  Structured log iterator is stopped.")
        accumulator.stats.bytes += len(
            line
        ) + 1  # INCLUDE THE \n THAT WOULD HAVE BEEN AT END OF EACH LINE
        line = strings.strip(line)

        if line == "":
            continue
        try:
            accumulator.stats.lines += 1
            log = convert.json2value(line)
            log.time = log.time / 1000
            accumulator.stats.start_time = Math.min(
                accumulator.stats.start_time, log.time)
            accumulator.stats.end_time = Math.max(accumulator.stats.end_time,
                                                  log.time)

            # FIX log.test TO BE A STRING
            if isinstance(log.test, list):
                log.test = " ".join(log.test)

            accumulator.__getattribute__(log.action)(log)
            if log.subtest:
                accumulator.last_subtest = log.time
        except Exception, e:
            accumulator.stats.bad_lines += 1
Пример #23
0
def get_env(ref, url):
    # GET ENVIRONMENT VARIABLES
    ref = ref.host
    try:
        new_value = _convert.json2value(os.environ[ref])
    except Exception, e:
        new_value = os.environ[ref]
Пример #24
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    c = response.all_content
    return convert.json2value(convert.utf82unicode(c))
Пример #25
0
 def __init__(self, settings):
     self.settings = wrap({"host": "fake", "index": "fake"})
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except IOError:
         self.data = Dict()
Пример #26
0
    def test_multiple_agg_on_same_field(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "name": "max_bytes",
                    "value": "run.stats.bytes",
                    "aggregate": "max"
                }, {
                    "name": "count",
                    "value": "run.stats.bytes",
                    "aggregate": "count"
                }]
            }
        })

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Пример #27
0
    def pop(self, wait=SECOND, till=None):
        m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not m:
            return None

        self.pending.append(m)
        return convert.json2value(m.get_body())
Пример #28
0
def get_env(ref, url):
    # GET ENVIRONMENT VARIABLES
    ref = ref.host
    try:
        new_value = convert.json2value(os.environ[ref])
    except Exception, e:
        new_value = os.environ[ref]
Пример #29
0
def solve():
    try:
        data = convert.json2value(convert.utf82unicode(flask.request.data))
        solved = noop.solve(data)
        response_data = convert.unicode2utf8(convert.value2json(solved))

        return Response(
            response_data,
            direct_passthrough=True,  # FOR STREAMING
            status=200,
            headers={
                "access-control-allow-origin": "*",
                "content-type": "application/json"
            }
        )
    except Exception, e:
        e = Except.wrap(e)
        Log.warning("Could not process", cause=e)
        e = e.as_dict()
        return Response(
            convert.unicode2utf8(convert.value2json(e)),
            status=400,
            headers={
                "access-control-allow-origin": "*",
                "content-type": "application/json"
            }
        )
Пример #30
0
 def __init__(self, filename, host="fake", index="fake", settings=None):
     self.settings = settings
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except Exception:
         self.data = Dict()
Пример #31
0
    def delete_index(self, index_name):
        if not isinstance(index_name, unicode):
            Log.error("expecting an index name")

        if self.debug:
            Log.note("Deleting index {{index}}", index=index_name)

        # REMOVE ALL ALIASES TOO
        aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
        if aliases:
            self.post(
                path="/_aliases",
                data={"actions": [{"remove": a} for a in aliases]}
            )

        url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name
        try:
            response = http.delete(url)
            if response.status_code != 200:
                Log.error("Expecting a 200")
            details = convert.json2value(utf82unicode(response.content))
            if self.debug:
                Log.note("delete response {{response}}", response=details)
            return response
        except Exception, e:
            Log.error("Problem with call to {{url}}", url=url, cause=e)
Пример #32
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = convert.json2value(row.json), None
        timestamp = Date(self.rollover_field(wrap(row).value))
        if timestamp == None or timestamp < Date.today() - self.rollover_max:
            return Null

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": self.cluster.get_aliases(),
                "where": {
                    "regex": {
                        "index":
                        self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"
                    }
                },
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:],
                                      elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = elasticsearch.Index(read_only=False,
                                             alias=best.alias,
                                             index=best.index,
                                             settings=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(
                            create_timestamp=rounded_timestamp,
                            settings=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception, e:
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = elasticsearch.Index(read_only=False,
                                         alias=best.alias,
                                         index=best.index,
                                         settings=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 10, timeout=5)

            self._delete_old_indexes(candidates)

            queue = self.known_queues[
                rounded_timestamp.unix] = es.threaded_queue(
                    max_size=self.settings.queue_size,
                    batch_size=self.settings.batch_size,
                    silent=True)
Пример #33
0
def expand_json(rows):
    # CONVERT JSON TO VALUES
    for r in rows:
        for k, json in list(r.items()):
            if isinstance(json, basestring) and json[0:1] in ("[", "{"):
                with suppress_exception:
                    value = convert.json2value(json)
                    r[k] = value
Пример #34
0
def _get_url(url, branch, **kwargs):
    with Explanation("get push from {{url}}", url=url):
        response = http.get(url, **kwargs)
        data = convert.json2value(response.content.decode("utf8"))
        if isinstance(data, basestring) and data.startswith("unknown revision"):
            Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'"))
        branch.url = _trim(url)  #RECORD THIS SUCCESS IN THE BRANCH
        return data
Пример #35
0
def expand_json(rows):
    # CONVERT JSON TO VALUES
    for r in rows:
        for k, json in list(r.items()):
            if isinstance(json, basestring) and json[0:1] in ("[", "{"):
                with suppress_exception:
                    value = convert.json2value(json)
                    r[k] = value
Пример #36
0
    def create_index(self,
                     index,
                     alias=None,
                     schema=None,
                     limit_replicas=None,
                     settings=None):
        if not settings.alias:
            settings.alias = settings.index
            settings.index = proto_name(settings.alias)

        if settings.alias == settings.index:
            Log.error("Expecting index name to conform to pattern")

        if settings.schema_file:
            Log.error(
                'schema_file attribute not supported.  Use {"$ref":<filename>} instead'
            )

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = convert.json2value(schema, paths=True)
        else:
            schema = convert.json2value(convert.value2json(schema), paths=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning(
                    "Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to=health.number_of_nodes - 1)
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self._post("/" + settings.index,
                   data=convert.value2json(schema).encode("utf8"),
                   headers={"Content-Type": "application/json"})
        while True:
            time.sleep(1)
            try:
                self.head("/" + settings.index)
                break
            except Exception, _:
                Log.note("{{index}} does not exist yet", index=settings.index)
Пример #37
0
 def delete(self, path, **kwargs):
     url = self.settings.host + ":" + unicode(self.settings.port) + path
     try:
         response = convert.json2value(utf82unicode(http.delete(url, **kwargs).content))
         if self.debug:
             Log.note("delete response {{response}}",  response= response)
         return response
     except Exception, e:
         Log.error("Problem with call to {{url}}",  url= url, cause=e)
Пример #38
0
    def read_json(self, encoding="utf8"):
        from pyLibrary.jsons import ref

        content = self.read(encoding=encoding)
        value = convert.json2value(content, flexible=True, leaves=True)
        abspath = self.abspath
        if os.sep == "\\":
            abspath = "/" + abspath.replace(os.sep, "/")
        return ref.expand(value, "file://" + abspath)
Пример #39
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"]))

    response = post(url, **kwargs)
    c=response.all_content
    return convert.json2value(convert.utf82unicode(c))
Пример #40
0
    def read_json(self, encoding="utf8"):
        from pyLibrary.jsons import ref

        content = self.read(encoding=encoding)
        value = convert.json2value(content, flexible=True, leaves=True)
        abspath = self.abspath
        if os.sep == "\\":
            abspath = "/" + abspath.replace(os.sep, "/")
        return ref.expand(value, "file://" + abspath)
Пример #41
0
    def test_timing(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "name": "count",
                    "value": "run.duration",
                    "aggregate": "count"
                }, {
                    "name": "total",
                    "value": "run.duration",
                    "aggregate": "sum"
                }],
                "edges": [{
                    "name": "chunk",
                    "value": ["run.suite", "run.chunk"]
                }, "result.ok"],
                "where": {
                    "and": [{
                        "lt": {
                            "timestamp": Date.floor(Date.now()).milli / 1000
                        }
                    }, {
                        "gte": {
                            "timestamp":
                            Date.floor(Date.now() - (Duration.DAY * 7),
                                       Duration.DAY).milli / 1000
                        }
                    }]
                },
                "format":
                "cube",
                "samples": {
                    "limit": 30
                }
            }
        })

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Пример #42
0
def get_http(ref, url):
    from pyLibrary.env import http

    params = url.query
    new_value = convert.json2value(http.get(ref),
                                   params=params,
                                   flexible=True,
                                   paths=True)
    return new_value
Пример #43
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"]))

    response = post(url, **kwargs)
    c = response.all_content
    return convert.json2value(convert.utf82unicode(c))
Пример #44
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = convert.json2value(suite_json)
                suite = convert.value2json(suite.name)
                line = line.replace(suite_json, suite)

    if rownum == 0:
        value = convert.json2value(line)
        if len(line) > 100000:
            value.result.subtests = [s for s in value.result.subtests if s.ok is False]
            value.result.missing_subtests = True

        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > 100000:
        value = convert.json2value(line)
        value.result.subtests = [s for s in value.result.subtests if s.ok is False]
        value.result.missing_subtests = True
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find("\"resource_usage\":") != -1:
        value = convert.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Пример #45
0
def full_etl(settings):
    schema = convert.json2value(convert.value2json(SCHEMA), leaves=True)
    Cluster(settings.destination).get_or_create_index(settings=settings.destination, schema=schema, limit_replicas=True)
    destq = FromES(settings.destination)
    if settings.incremental:
        min_bug_id = destq.query({
            "from": coalesce(settings.destination.alias, settings.destination.index),
            "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"}
        })

        min_bug_id = int(MAX(min_bug_id-1000, 0))
    else:
        min_bug_id = 0

    sourceq = FromES(settings.source)
    max_bug_id = sourceq.query({
        "from": coalesce(settings.source.alias, settings.source.index),
        "select": {"name": "max_bug_id", "value": "bug_id", "aggregate": "max"}
    }) + 1
    max_bug_id = int(coalesce(max_bug_id, 0))

    # FIRST, GET ALL MISSING BUGS
    for s, e in qb.reverse(list(qb.intervals(min_bug_id, max_bug_id, 10000))):
        with Timer("pull {{start}}..{{end}} from ES", {"start": s, "end": e}):
            children = sourceq.query({
                "from": settings.source.alias,
                "select": ["bug_id", "dependson", "blocked", "modified_ts", "expires_on"],
                "where": {"and": [
                    {"range": {"bug_id": {"gte": s, "lt": e}}},
                    {"or": [
                        {"exists": "dependson"},
                        {"exists": "blocked"}
                    ]}
                ]},
                "limit": 10000
            })

        with Timer("fixpoint work"):
            to_fix_point(settings, destq, children.data)

    # PROCESS RECENT CHANGES
    with Timer("pull recent dependancies from ES"):
        children = sourceq.query({
            "from": settings.source.alias,
            "select": ["bug_id", "dependson", "blocked"],
            "where": {"and": [
                {"range": {"modified_ts": {"gte": convert.datetime2milli(datetime.utcnow() - timedelta(days=7))}}},
                {"or": [
                    {"exists": "dependson"},
                    {"exists": "blocked"}
                ]}
            ]},
            "limit": 100000
        })

    to_fix_point(settings, destq, children.data)
Пример #46
0
 def delete(self, path, **kwargs):
     url = self.settings.host + ":" + unicode(self.settings.port) + path
     try:
         response = convert.json2value(
             utf82unicode(http.delete(url, **kwargs).content))
         if self.debug:
             Log.note("delete response {{response}}", response=response)
         return response
     except Exception, e:
         Log.error("Problem with call to {{url}}", url=url, cause=e)
Пример #47
0
    def __eq__(self, other):
        Log.warning("expensive")

        from pyLibrary.testing.fuzzytestcase import assertAlmostEqual

        try:
            assertAlmostEqual(convert.json2value(self.json), other)
            return True
        except Exception:
            return False
Пример #48
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None):
        num_keys = 0
        for key in keys:
            try:
                for rownum, line in enumerate(
                        source.read_lines(strip_extension(key))):
                    if rownum == 0:
                        value = convert.json2value(line)
                        if len(line) > 1000000:
                            # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                            value.result.subtests = None
                            value.result.missing_subtests = True

                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                        if sample_only_filter and Random.int(
                                int(1.0 / coalesce(
                                    sample_size, 0.01))) != 0 and qb.filter(
                                        [value], sample_only_filter):
                            # INDEX etl.id==0, BUT NO MORE
                            if value.etl.id != 0:
                                Log.error("Expecting etl.id==0")
                            num_keys += 1
                            self.queue.add(row)
                            break
                    elif len(line) > 1000000:
                        value = convert.json2value(line)
                        # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                        value.result.subtests = None
                        value.result.missing_subtests = True
                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                    else:
                        #FAST
                        _id = strings.between(line, "_id\": \"",
                                              "\"")  # AVOID DECODING JSON
                        row = {"id": _id, "json": line}
                    num_keys += 1
                    self.queue.add(row)
            except Exception, e:
                Log.warning("Could not get queue for {{key}}",
                            key=key,
                            cause=e)
Пример #49
0
    def pop_message(self, wait=SECOND, till=None):
        """
        RETURN THE MESSAGE, CALLER IS RESPONSIBLE FOR CALLING delete_message() WHEN DONE
        """
        m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not m:
            return None

        output = convert.json2value(m.get_body())
        return output
 def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(settings).get_or_create_index(
         schema=convert.json2value(convert.value2json(SCHEMA), paths=True),
         limit_replicas=True,
         settings=settings
     )
     self.queue = self.es.threaded_queue(max_size=max_size, batch_size=batch_size)
Пример #51
0
    def pop(self, wait=SECOND, till=None):
        if till is not None and not isinstance(till, Signal):
            Log.error("Expecting a signal")

        m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not m:
            return None

        self.pending.append(m)
        output = convert.json2value(m.get_body())
        return output
Пример #52
0
    def pop_message(self, wait=SECOND, till=None):
        """
        RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE
        """
        message = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not message:
            return None
        message.delete = lambda: self.queue.delete_message(message)

        payload = convert.json2value(message.get_body())
        return message, payload
Пример #53
0
 def test_bad_long_json(self):
     test = pypy_json_encode({"values": [i for i in range(1000)]})
     test = test[:1000] + "|" + test[1000:]
     expected = u"Can not decode JSON at:\n\t..., 216, 217, 218, 219|, 220, 221, 222, 22...\n\t                       ^\n"
     # expected = u'Can not decode JSON at:\n\t...9,270,271,272,273,27|4,275,276,277,278,2...\n\t                       ^\n'
     try:
         output = convert.json2value(test)
         Log.error("Expecting error")
     except Exception, e:
         if e.message != expected:
             Log.error("Expecting good error message", cause=e)
Пример #54
0
    def pop(self, wait=SECOND, till=None):
        if till is not None and not isinstance(till, Signal):
            Log.error("Expecting a signal")

        m = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not m:
            return None

        self.pending.append(m)
        output = convert.json2value(m.get_body())
        return output
Пример #55
0
    def pop_message(self, wait=SECOND, till=None):
        """
        RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE
        """
        message = self.queue.read(wait_time_seconds=Math.floor(wait.seconds))
        if not message:
            return None
        message.delete = lambda: self.queue.delete_message(message)

        payload = convert.json2value(message.get_body())
        return message, payload
Пример #56
0
    def to_esfilter(self):
        if not isinstance(self.lhs, Variable) or not isinstance(self.rhs, Literal) or self.op in BinaryOp.algebra_ops:
            return {"script": {"script": self.to_ruby()}}

        if self.op in ["eq", "term"]:
            return {"term": {self.lhs.var: self.rhs.to_esfilter()}}
        elif self.op in ["ne", "neq"]:
            return {"not": {"term": {self.lhs.var: self.rhs.to_esfilter()}}}
        elif self.op in BinaryOp.ineq_ops:
            return {"range": {self.lhs.var: {self.op: convert.json2value(self.rhs.json)}}}
        else:
            Log.error("Logic error")
Пример #57
0
def error(response):
    response = convert.utf82unicode(response.content)

    try:
        e = Except.new_instance(convert.json2value(response))
    except Exception:
        e = None

    if e:
        Log.error("Failed request", e)
    else:
        Log.error("Failed request\n {{response}}", {"response": response})
Пример #58
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}',
                        '{"code": "bb"}').replace('{"id": "tc"}',
                                                  '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = mo_json.json2value(suite_json)
                suite = convert.value2json(coalesce(suite.fullname,
                                                    suite.name))
                line = line.replace(suite_json, suite)

    if source.name.startswith("active-data-codecoverage"):
        d = convert.json2value(line)
        if d.source.file.total_covered > 0:
            return {"id": d._id, "json": line}, False
        else:
            return None, False

    if rownum == 0:
        value = mo_json.json2value(line)
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        value = mo_json.json2value(line)
        _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find('"resource_usage":') != -1:
        value = mo_json.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Пример #59
0
    def test_simple_query(self):
        if self.not_real_service():
            return

        query = convert.unicode2utf8(convert.value2json({"from": "unittest"}))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})