Пример #1
0
def _wait_for_exit(please_stop):
    """
    /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN
    """
    cr_count = 0  # COUNT NUMBER OF BLANK LINES

    while not please_stop:
        # if DEBUG:
        #     Log.note("inside wait-for-shutdown loop")
        if cr_count > 30:
            (Till(seconds=3) | please_stop).wait()
        try:
            line = sys.stdin.readline()
        except Exception as e:
            Except.wrap(e)
            if "Bad file descriptor" in e:
                _wait_for_interrupt(please_stop)
                break

        # if DEBUG:
        #     Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count)
        if line == "":
            cr_count += 1
        else:
            cr_count = -1000000  # NOT /dev/null

        if line.strip() == "exit":
            Log.alert("'exit' Detected!  Stopping...")
            return
Пример #2
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, container=self, namespace=self.namespace)

            for s in listwrap(query.select):
                if s.aggregate != None and not aggregates.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self.es, query):
                return es_deepop(self.es, query)
            if is_aggsop(self.es, query):
                return es_aggsop(self.es, frum, query)
            if is_setop(self.es, query):
                return es_setop(self.es, query)
            Log.error("Can not handle")
        except Exception as e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self.es.cluster.url / "_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Пример #3
0
 def _db_insert_column(self, column):
     try:
         self.db.execute(
             "INSERT INTO"
             + db_table_name
             + sql_iso(all_columns)
             + "VALUES"
             + sql_iso(
                 sql_list(
                     [
                         quote_value(column[c.name])
                         if c.name not in ("nested_path", "partitions")
                         else quote_value(value2json(column[c.name]))
                         for c in METADATA_COLUMNS
                     ]
                 )
             )
         )
     except Exception as e:
         e = Except.wrap(e)
         if "UNIQUE constraint failed" in e or " are not unique" in e:
             # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA
             self.todo.add((UPDATE, column), force=True)
         else:
             Log.error("do not know how to handle", cause=e)
Пример #4
0
def get_branches(hg, branches, kwargs=None):
    # TRY ES
    cluster = elasticsearch.Cluster(branches)
    try:
        es = cluster.get_index(kwargs=branches, read_only=False)
        esq = jx_elasticsearch.new_instance(branches)
        found_branches = esq.query({"from": "branches", "format": "list", "limit": 10000}).data

        # IF IT IS TOO OLD, THEN PULL FROM HG
        oldest = Date(MAX(found_branches.etl.timestamp))
        if oldest == None or Date.now() - oldest > OLD_BRANCH:
            found_branches = _get_branches_from_hg(hg)
            es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches)
            es.flush()

        try:
            return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False)
        except Exception as e:
            Log.error("Bad branch in ES index", cause=e)
    except Exception as e:
        e = Except.wrap(e)
        if "Can not find index " in e:
            set_default(branches, {"schema": branches_schema})
            es = cluster.get_or_create_index(branches)
            es.add_alias()
            return get_branches(kwargs)
        Log.error("problem getting branches", cause=e)
Пример #5
0
 def write(self, template, params):
     try:
         self.queue.add({"template": template, "params": params})
         return self
     except Exception as e:
         e = Except.wrap(e)
         raise e  # OH NO!
Пример #6
0
 def _db_transaction(self):
     self.db.execute(str("BEGIN"))
     try:
         yield
         self.db.execute(str("COMMIT"))
     except Exception as e:
         e = Except.wrap(e)
         self.db.execute(str("ROLLBACK"))
         Log.error("Transaction failed", cause=e)
Пример #7
0
 def delete(self):
     try:
         if os.path.isdir(self._filename):
             shutil.rmtree(self._filename)
         elif os.path.isfile(self._filename):
             os.remove(self._filename)
         return self
     except Exception as e:
         e = Except.wrap(e)
         if "The system cannot find the path specified" in e:
             return
         Log.error("Could not remove file", e)
Пример #8
0
def delete_daemon(file, caller_stack, please_stop):
    # WINDOWS WILL HANG ONTO A FILE FOR A BIT AFTER WE CLOSED IT
    while not please_stop:
        try:
            file.delete()
            return
        except Exception as e:
            e = Except.wrap(e)
            e.trace = e.trace[0:2]+caller_stack

            Log.warning(u"problem deleting file {{file}}", file=file.abspath, cause=e)
            (Till(seconds=10)|please_stop).wait()
Пример #9
0
def relay_post(path):
    try:
        return cache.request("post", path, flask.request.headers)
    except Exception as e:
        e = Except.wrap(e)
        Log.warning("could not handle request", cause=e)
        return Response(
            unicode2utf8(value2json(e, pretty=True)),
            status=400,
            headers={
                "Content-Type": "text/html"
            }
        )
Пример #10
0
def ujson_encode(value, pretty=False):
    if pretty:
        return pretty_json(value)

    try:
        scrubbed = scrub(value)
        return ujson_dumps(scrubbed, ensure_ascii=False, sort_keys=True, escape_forward_slashes=False).decode('utf8')
    except Exception as e:
        from mo_logs.exceptions import Except
        from mo_logs import Log

        e = Except.wrap(e)
        Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e)
        raise e
Пример #11
0
    def encode(self, value, pretty=False):
        if pretty:
            return pretty_json(value)

        try:
            scrubbed = scrub(value)
            return unicode(self.encoder.encode(scrubbed))
        except Exception as e:
            from mo_logs.exceptions import Except
            from mo_logs import Log

            e = Except.wrap(e)
            Log.warning("problem serializing {{type}}", type=_repr(value), cause=e)
            raise e
Пример #12
0
 def raise_error(e, packed):
     err = text_type(e)
     e = Except.wrap(e)
     if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err):
         missing = [p for p in params if str(p) not in packed]
         given = [p for p in params if str(p) in packed]
         get_logger().error(
             "Problem calling {{func_name}}:  Expecting parameter {{missing}}, given {{given}}",
             func_name=func_name,
             missing=missing,
             given=given,
             stack_depth=2
         )
     get_logger().error("Error dispatching call", e)
Пример #13
0
def value2json(obj, pretty=False, sort_keys=False):
    try:
        json = json_encoder(obj, pretty=pretty)
        if json == None:
            Log.note(str(type(obj)) + " is not valid{{type}}JSON",  type= " (pretty) " if pretty else " ")
            Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj)))
        return json
    except Exception as e:
        e = Except.wrap(e)
        try:
            json = pypy_json_encode(obj)
            return json
        except Exception:
            pass
        Log.error("Can not encode into JSON: {{value}}", value=repr(obj), cause=e)
Пример #14
0
    def encode(self, value, pretty=False):
        if pretty:
            return pretty_json(value)

        try:
            with Timer("scrub", too_long=0.1):
                scrubbed = scrub(value)
            with Timer("encode", too_long=0.1):
                return text_type(self.encoder(scrubbed))
        except Exception as e:
            from mo_logs.exceptions import Except
            from mo_logs import Log

            e = Except.wrap(e)
            Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e)
            raise e
Пример #15
0
    def _run(self):
        self.id = get_ident()
        with RegisterThread(self):
            try:
                if self.target is not None:
                    a, k, self.args, self.kwargs = self.args, self.kwargs, None, None
                    self.end_of_thread.response = self.target(*a, **k)
                    self.parent.remove_child(self)  # IF THREAD ENDS OK, THEN FORGET ABOUT IT
            except Exception as e:
                e = Except.wrap(e)
                with self.synch_lock:
                    self.end_of_thread.exception = e
                with self.parent.child_lock:
                    emit_problem = self not in self.parent.children
                if emit_problem:
                    # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT
                    try:
                        Log.fatal("Problem in thread {{name|quote}}", name=self.name, cause=e)
                    except Exception:
                        sys.stderr.write(str("ERROR in thread: " + self.name + " " + text_type(e) + "\n"))
            finally:
                try:
                    with self.child_lock:
                        children = copy(self.children)
                    for c in children:
                        try:
                            DEBUG and sys.stdout.write(str("Stopping thread " + c.name + "\n"))
                            c.stop()
                        except Exception as e:
                            Log.warning("Problem stopping thread {{thread}}", thread=c.name, cause=e)

                    for c in children:
                        try:
                            DEBUG and sys.stdout.write(str("Joining on thread " + c.name + "\n"))
                            c.join()
                        except Exception as e:
                            Log.warning("Problem joining thread {{thread}}", thread=c.name, cause=e)
                        finally:
                            DEBUG and sys.stdout.write(str("Joined on thread " + c.name + "\n"))

                    del self.target, self.args, self.kwargs
                    DEBUG and Log.note("thread {{name|quote}} stopping", name=self.name)
                except Exception as e:
                    DEBUG and Log.warning("problem with thread {{name|quote}}", cause=e, name=self.name)
                finally:
                    self.stopped.go()
                    DEBUG and Log.note("thread {{name|quote}} is done", name=self.name)
Пример #16
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if hasattr(l, "__iter__"):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1

        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer(
                        "Sending {{count}} lines in {{file_length|comma}} bytes",
                    {
                        "file_length": file_length,
                        "count": count
                    },
                        silent=not self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception as e:
                e = Except.wrap(e)
                retry -= 1
                if retry == 0 or 'Access Denied' in e or "No space left on device" in e:
                    Log.error("could not push data to s3", cause=e)
                else:
                    Log.warning("could not push data to s3", cause=e)

        if self.settings.public:
            storage.set_acl('public-read')
        return
Пример #17
0
def _get_file(ref, url):

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        DEBUG and Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception as e:
        content = None
        Log.error("Could not read file {{filename}}", filename=path, cause=e)

    try:
        new_value = json2value(content,
                               params=ref.query,
                               flexible=True,
                               leaves=True)
    except Exception as e:
        e = Except.wrap(e)
        try:
            new_value = ini2value(content)
        except Exception:
            raise Log.error("Can not read {{file}}", file=path, cause=e)
    new_value = _replace_ref(new_value, ref)
    return new_value
Пример #18
0
 def raise_error(e, packed):
     err = text_type(e)
     e = Except.wrap(e)
     if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err):
         missing = [p for p in params if str(p) not in packed]
         given = [p for p in params if str(p) in packed]
         if not missing:
             raise e
         else:
             get_logger().error(
                 "Problem calling {{func_name}}:  Expecting parameter {{missing}}, given {{given}}",
                 func_name=func_name,
                 missing=missing,
                 given=given,
                 stack_depth=2,
                 cause=e
             )
     raise e
Пример #19
0
    def encode(self, value, pretty=False):
        if pretty:
            return pretty_json(value)

        try:
            with Timer("scrub", too_long=0.1):
                scrubbed = scrub(value)
            with Timer("encode", too_long=0.1):
                return text_type(self.encoder(scrubbed))
        except Exception as e:
            from mo_logs.exceptions import Except
            from mo_logs import Log

            e = Except.wrap(e)
            Log.warning("problem serializing {{type}}",
                        type=text_type(repr(value)),
                        cause=e)
            raise e
Пример #20
0
def find_query(hash):
    """
    FIND QUERY BY HASH, RETURN Response OBJECT
    :param hash:
    :return: Response OBJECT
    """
    try:
        hash = hash.split("/")[0]
        query = query_finder.find(hash)

        if not query:
            return Response(b'{"type": "ERROR", "template": "not found"}', status=404)
        else:
            return Response(query.encode("utf8"), status=200)
    except Exception as e:
        e = Except.wrap(e)
        Log.warning("problem finding query with hash={{hash}}", hash=hash, cause=e)
        return Response(value2json(e).encode("utf8"), status=400)
Пример #21
0
 def _db_insert_column(self, column):
     try:
         self.db.execute(
             "INSERT INTO" + db_table_name + sql_iso(all_columns) +
             "VALUES" + sql_iso(
                 sql_list([
                     quote_value(column[c.name]) if c.name not in
                     ("nested_path", "partitions"
                      ) else quote_value(value2json(column[c.name]))
                     for c in METADATA_COLUMNS
                 ])))
     except Exception as e:
         e = Except.wrap(e)
         if "UNIQUE constraint failed" in e or " are not unique" in e:
             # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA
             self.todo.add((UPDATE, column), force=True)
         else:
             Log.error("do not know how to handle", cause=e)
Пример #22
0
def get_file(ref, url):

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception as e:
        content = None
        Log.error("Could not read file {{filename}}", filename=path, cause=e)

    try:
        new_value = json2value(content, params=ref.query, flexible=True, leaves=True)
    except Exception as e:
        e = Except.wrap(e)
        try:
            new_value = ini2value(content)
        except Exception:
            raise Log.error("Can not read {{file}}", file=path, cause=e)
    new_value = _replace_ref(new_value, ref)
    return new_value
Пример #23
0
def value2json(obj, pretty=False, sort_keys=False):
    try:
        json = json_encoder(obj, pretty=pretty)
        if json == None:
            Log.note(str(type(obj)) + " is not valid{{type}}JSON",
                     type=" (pretty) " if pretty else " ")
            Log.error("Not valid JSON: " + str(obj) + " of type " +
                      str(type(obj)))
        return json
    except Exception as e:
        e = Except.wrap(e)
        try:
            json = pypy_json_encode(obj)
            return json
        except Exception:
            pass
        Log.error("Can not encode into JSON: {{value}}",
                  value=repr(obj),
                  cause=e)
Пример #24
0
def ujson_encode(value, pretty=False):
    if pretty:
        return pretty_json(value)

    try:
        scrubbed = scrub(value)
        return ujson_dumps(scrubbed,
                           ensure_ascii=False,
                           sort_keys=True,
                           escape_forward_slashes=False).decode('utf8')
    except Exception as e:
        from mo_logs.exceptions import Except
        from mo_logs import Log

        e = Except.wrap(e)
        Log.warning("problem serializing {{type}}",
                    type=text_type(repr(value)),
                    cause=e)
        raise e
Пример #25
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query,
                                 container=self,
                                 namespace=self.namespace)

            self.stats.record(query)

            for s in listwrap(query.select):
                if s.aggregate != None and not aggregates.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is"
                        " not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate,
                    )

            frum = query["from"]
            if is_op(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_bulk_agg(self.es, query):
                return es_bulkaggsop(self, frum, query)
            if is_bulk_set(self.es, query):
                return es_bulksetop(self, frum, query)

            query.limit = temper_limit(query.limit, query)

            if is_aggsop(self.es, query):
                return es_aggsop(self.es, frum, query)
            if is_setop(self.es, query):
                return es_setop(self.es, query)
            Log.error("Can not handle")
        except Exception as cause:
            cause = Except.wrap(cause)
            if "Data too large, data for" in cause:
                http.post(self.es.cluster.url / "_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)",
                          cause)
            Log.error("problem", cause=cause)
Пример #26
0
    def assertRaises(self, problem, function, *args, **kwargs):
        try:
            function(*args, **kwargs)
        except Exception as e:
            f = Except.wrap(e)
            if is_text(problem):
                if problem in f:
                    return
                Log.error(
                    "expecting an exception returning {{problem|quote}} got something else instead",
                    problem=problem,
                    cause=f
                )
            elif not isinstance(f, problem) and not isinstance(e, problem):
                Log.error("expecting an exception of type {{type}} to be raised", type=problem)
            else:
                return

        Log.error("Expecting an exception to be raised")
Пример #27
0
    def encode(self, value, pretty=False):
        if pretty:
            return pretty_json(value)

        try:
            with Timer("scrub", too_long=0.1):
                scrubbed = scrub(value)
            param = {"size": 0}
            with Timer("encode {{size}} characters", param=param, too_long=0.1):
                output = text_type(self.encoder(scrubbed))
                param["size"] = len(output)
                return output
        except Exception as e:
            from mo_logs.exceptions import Except
            from mo_logs import Log

            e = Except.wrap(e)
            Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e)
            raise e
Пример #28
0
 def _execute(self, command):
     with self.lock:
         if self.current_task is not None:
             self.current_task.wait()
         self.current_task = Signal()
         self.current_response = None
         self.current_error = None
     self.process.stdin.add(value2json(command))
     self.current_task.wait()
     with self.lock:
         try:
             if self.current_error:
                 Log.error("problem with process call", cause=Except.new_instance(self.current_error))
             else:
                 return self.current_response
         finally:
             self.current_task = None
             self.current_response = None
             self.current_error = None
Пример #29
0
 def _merge(*schemas):
     if len(schemas) == 1:
         return schemas[0]
     try:
         return OrderedDict(
             (k, _merge(*[ss for s in schemas for ss in [s.get(k)] if ss]))
             for k in jx.sort(set(k for s in schemas for k in s.keys()))
         )
     except Exception as e:
         e = Except.wrap(e)
         if "Expecting types to match" in e:
             raise e
         t = list(set(schemas))
         if len(t) == 1:
             return t[0]
         elif len(t) == 2 and STRING in t and NUMBER in t:
             return STRING
         else:
             Log.error("Expecting types to match {{types|json}}", types=t)
Пример #30
0
    def close(self):
        if self.transaction_level > 0:
            if self.readonly:
                self.commit()  # AUTO-COMMIT
            else:
                Log.error("expecting commit() or rollback() before close")
        self.cursor = None  # NOT NEEDED
        try:
            self.db.close()
        except Exception as e:
            e = Except.wrap(e)
            if "Already closed" in e:
                return

            Log.warning("can not close()", e)
        finally:
            try:
                all_db.remove(self)
            except Exception as e:
                Log.error("not expected", cause=e)
Пример #31
0
    def replacer(found):
        ops = found.group(1).split("|")

        path = ops[0]
        var = path.lstrip(".")
        depth = min(len(seq), max(1, len(path) - len(var)))
        try:
            val = seq[-depth]
            if var:
                if is_sequence(val) and float(var) == _round(float(var), 0):
                    val = val[int(var)]
                else:
                    val = val[var]
            for func_name in ops[1:]:
                parts = func_name.split("(")
                if len(parts) > 1:
                    val = eval(parts[0] + "(val, " + "(".join(parts[1::]))
                else:
                    val = FORMATTERS[func_name](val)
            val = toString(val)
            return val
        except Exception as e:
            from mo_logs import Except

            e = Except.wrap(e)
            try:
                if e.message.find("is not JSON serializable"):
                    # WORK HARDER
                    val = toString(val)
                    return val
            except Exception as f:
                if not _Log:
                    _late_import()

                _Log.warning(
                    "Can not expand " + "|".join(ops) +
                    " in template: {{template_|json}}",
                    template_=template,
                    cause=e,
                )
            return "[template expansion error: (" + str(e.message) + ")]"
Пример #32
0
 def get_or_create_table(
     self,
     table,
     schema=None,
     typed=True,
     read_only=False,
     sharded=False,
     partition=None,
     cluster=None,  # TUPLE OF FIELDS TO SORT DATA
     id=None,
     kwargs=None,
 ):
     if kwargs.lookup != None or kwargs.flake != None:
         Log.error("expecting schema, not lookup")
     try:
         return Table(kwargs=kwargs, container=self)
     except Exception as e:
         e = Except.wrap(e)
         if not read_only and "Not found: Table" in e:
             return self.create_table(kwargs)
         Log.error("could not get table {{table}}", table=table, cause=e)
Пример #33
0
    def create_or_replace_table(
        self,
        table,
        schema=None,
        typed=True,
        read_only=False,
        partition=None,
        cluster=None,  # TUPLE OF FIELDS TO SORT DATA
        sharded=False,
        kwargs=None,
    ):
        if kwargs.lookup != None or kwargs.flake != None:
            Log.error("expecting schema, not lookup")

        try:
            self.delete_table(table)
        except Exception as e:
            e = Except.wrap(e)
            if "Not found: Table" not in e and "Unable to get TableReference" not in e:
                Log.error("could not get table {{table}}", table=table, cause=e)
        return self.create_table(kwargs=kwargs)
Пример #34
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        if not exc_val:
            Log.error("Expecting an error")
        f = Except.wrap(exc_val)

        if isinstance(self.problem, (list, tuple)):
            problems = self.problem
        else:
            problems = [self.problem]

        causes = []
        for problem in problems:
            if isinstance(problem, object.__class__) and issubclass(
                    problem, BaseException) and isinstance(exc_val, problem):
                return True
            try:
                self.this.assertIn(problem, f)
                return True
            except Exception as cause:
                causes.append(cause)
        Log.error("problem is not raised", cause=first(causes))
Пример #35
0
    def replacer(found):
        ops = found.group(1).split("|")

        path = ops[0]
        var = path.lstrip(".")
        depth = min(len(seq), max(1, len(path) - len(var)))
        try:
            val = seq[-depth]
            if var:
                if is_sequence(val) and float(var) == _round(float(var), 0):
                    val = val[int(var)]
                else:
                    val = val[var]
            for func_name in ops[1:]:
                parts = func_name.split('(')
                if len(parts) > 1:
                    val = eval(parts[0] + "(val, " + ("(".join(parts[1::])))
                else:
                    val = FORMATTERS[func_name](val)
            val = toString(val)
            return val
        except Exception as e:
            from mo_logs import Except

            e = Except.wrap(e)
            try:
                if e.message.find("is not JSON serializable"):
                    # WORK HARDER
                    val = toString(val)
                    return val
            except Exception as f:
                if not _Log:
                    _late_import()

                _Log.warning(
                    "Can not expand " + "|".join(ops) + " in template: {{template_|json}}",
                    template_=template,
                    cause=e
                )
            return "[template expansion error: (" + str(e.message) + ")]"
Пример #36
0
    def join(self, till=None):
        """
        RETURN THE RESULT {"response":r, "exception":e} OF THE THREAD EXECUTION (INCLUDING EXCEPTION, IF EXISTS)
        """
        if self is Thread:
            Log.error("Thread.join() is not a valid call, use t.join()")

        with self.child_lock:
            children = copy(self.children)
        for c in children:
            c.join(till=till)

        DEBUG and Log.note("{{parent|quote}} waiting on thread {{child|quote}}", parent=Thread.current().name, child=self.name)
        (self.stopped | till).wait()
        if self.stopped:
            self.parent.remove_child(self)
            if not self.end_of_thread.exception:
                return self.end_of_thread.response
            else:
                Log.error("Thread {{name|quote}} did not end well", name=self.name, cause=self.end_of_thread.exception)
        else:
            raise Except(context=THREAD_TIMEOUT)
Пример #37
0
    def _execute(self, command):
        with self.lock:
            self.current_task.wait()
            self.current_task = Signal()
            self.current_response = None
            self.current_error = None

            if self.process.service_stopped:
                Log.error("python is not running")
            self.process.stdin.add(value2json(command))
            (self.current_task | self.process.service_stopped).wait()

            try:
                if self.current_error:
                    Log.error("problem with process call",
                              cause=Except.new_instance(self.current_error))
                else:
                    return self.current_response
            finally:
                self.current_task = DONE
                self.current_response = None
                self.current_error = None
Пример #38
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if hasattr(l, "__iter__"):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1

        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, silent=not self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception as e:
                e = Except.wrap(e)
                retry -= 1
                if retry == 0 or 'Access Denied' in e or "No space left on device" in e:
                    Log.error("could not push data to s3", cause=e)
                else:
                    Log.warning("could not push data to s3", cause=e)

        if self.settings.public:
            storage.set_acl('public-read')
        return
Пример #39
0
def sliding_MWU(values):
    """
    RETURN
    :param values:
    :return:
    """
    # ADD MEDIAN TO EITHER SIDE OF values
    prefix = [
        np.median(values[:i + weight_radius]) for i in range(weight_radius)
    ]
    suffix = [
        np.median(values[-i - weight_radius:])
        for i in reversed(range(weight_radius))
    ]
    combined = np.array(prefix + list(values) + suffix)
    b = combined.itemsize
    window = as_strided(combined,
                        shape=(len(values), weight_radius * 2),
                        strides=(b, b))

    med = (len(median_weight) + 1) / 2
    try:
        m_score = np.array([
            stats.mannwhitneyu(
                w[:weight_radius],
                w[-weight_radius:],
                use_continuity=True,
                alternative="two-sided",
            ) for v in window for r in [rankdata(v)]
            for w in [(r - med) * median_weight]
        ])

        return m_score
    except Exception as cause:
        cause = Except.wrap(cause)
        if "All numbers are identical" in cause:
            return np.ones((window.shape[0], 2))
        raise cause
Пример #40
0
def test_transactions(service):
    # This should pass
    old = service.get_tuids("/testing/geckodriver/CONTRIBUTING.md",
                            "6162f89a4838",
                            commit=False)
    new = service.get_tuids("/testing/geckodriver/CONTRIBUTING.md",
                            "06b1a22c5e62",
                            commit=False)

    assert len(old) == len(new)

    # listed_inserts = [None] * 100
    listed_inserts = [('test' + str(count), str(count))
                      for count, entry in enumerate(range(100))]
    listed_inserts.append(
        'hello world')  # This should cause a transaction failure

    try:
        with service.conn.transaction() as t:
            count = 0
            while count < len(listed_inserts):
                tmp_inserts = listed_inserts[count:count + 50]
                count += 50
                t.execute(
                    "INSERT OR REPLACE INTO latestFileMod (file, revision) VALUES "
                    + sql_list(
                        sql_iso(sql_list(map(quote_value, i)))
                        for i in tmp_inserts))
        assert False  # SHOULD NOT GET HERE
    except Exception as e:
        e = Except.wrap(e)
        assert "11 values for 2 columns" in e

    # Check that the transaction was undone
    latestTestMods = service.conn.get_one(
        "SELECT revision FROM latestFileMod WHERE file=?", ('test1', ))

    assert not latestTestMods
Пример #41
0
    def setUpClass(self):
        while True:
            try:
                es = test_jx.global_settings.backend_es
                http.get_json(URL(es.host, port=es.port))
                break
            except Exception as e:
                e = Except.wrap(e)
                if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e:
                    Log.alert("Problem connecting")
                    Till(seconds=WAIT_AFTER_PROBLEM).wait()
                else:
                    Log.error("Server raised exception", e)

        # REMOVE OLD INDEXES
        cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es)
        aliases = cluster.get_aliases()
        for a in aliases:
            try:
                if a.index.startswith("testing_"):
                    cluster.delete_index(a.index)
            except Exception as e:
                Log.warning("Problem removing {{index|quote}}", index=a.index, cause=e)
Пример #42
0
def get_branches(hg, branches, kwargs=None):
    # TRY ES
    cluster = elasticsearch.Cluster(branches)
    try:
        es = cluster.get_index(kwargs=branches, read_only=False)
        esq = jx_elasticsearch.new_instance(branches)
        found_branches = esq.query({
            "from": branches.index,
            "format": "list",
            "limit": 10000
        }).data

        # IF IT IS TOO OLD, THEN PULL FROM HG
        oldest = Date(MAX(found_branches.etl.timestamp))
        if oldest == None or Date.now() - oldest > OLD_BRANCH:
            found_branches = _get_branches_from_hg(hg)
            es.extend([{
                "id": b.name + " " + b.locale,
                "value": b
            } for b in found_branches])
            es.flush()

        try:
            return UniqueIndex(["name", "locale"],
                               data=found_branches,
                               fail_on_dup=False)
        except Exception as e:
            Log.error("Bad branch in ES index", cause=e)
    except Exception as e:
        e = Except.wrap(e)
        if "Can not find index " in e:
            branches.schema = branches_schema
            es = cluster.get_or_create_index(branches)
            es.add_alias()
            return get_branches(kwargs)
        Log.error("problem getting branches", cause=e)
Пример #43
0
def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True):
    """
    :param obj:  THE VALUE TO TURN INTO JSON
    :param pretty: True TO MAKE A MULTI-LINE PRETTY VERSION
    :param sort_keys: True TO SORT KEYS
    :param keep_whitespace: False TO strip() THE WHITESPACE IN THE VALUES
    :return:
    """
    if FIND_LOOPS:
        obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else trim_whitespace())
    try:
        json = json_encoder(obj, pretty=pretty)
        if json == None:
            Log.note(str(type(obj)) + " is not valid{{type}}JSON", type=" (pretty) " if pretty else " ")
            Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj)))
        return json
    except Exception as e:
        e = Except.wrap(e)
        try:
            json = pypy_json_encode(obj)
            return json
        except Exception:
            pass
        Log.error("Can not encode into JSON: {{value}}", value=text(repr(obj)), cause=e)
Пример #44
0
def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True):
    """
    :param obj:  THE VALUE TO TURN INTO JSON
    :param pretty: True TO MAKE A MULTI-LINE PRETTY VERSION
    :param sort_keys: True TO SORT KEYS
    :param keep_whitespace: False TO strip() THE WHITESPACE IN THE VALUES
    :return:
    """
    if FIND_LOOPS:
        obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else _trim_whitespace())
    try:
        json = json_encoder(obj, pretty=pretty)
        if json == None:
            Log.note(str(type(obj)) + " is not valid{{type}}JSON", type=" (pretty) " if pretty else " ")
            Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj)))
        return json
    except Exception as e:
        e = Except.wrap(e)
        try:
            json = pypy_json_encode(obj)
            return json
        except Exception:
            pass
        Log.error("Can not encode into JSON: {{value}}", value=text_type(repr(obj)), cause=e)
Пример #45
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = transpose(*grid)

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Пример #46
0
def unicode2Date(value, format=None):
    """
    CONVERT UNICODE STRING TO UNIX TIMESTAMP VALUE
    """
    # http://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
    if value == None:
        return None

    if format != None:
        try:
            if format.endswith("%S.%f") and "." not in value:
                value += ".000"
            return _unix2Date(datetime2unix(datetime.strptime(value, format)))
        except Exception as e:
            from mo_logs import Log

            Log.error("Can not format {{value}} with {{format}}",
                      value=value,
                      format=format,
                      cause=e)

    value = value.strip()
    if value.lower() == "now":
        return _unix2Date(datetime2unix(_utcnow()))
    elif value.lower() == "today":
        return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400)
    elif value.lower() in ["eod", "tomorrow"]:
        return _unix2Date(
            math.floor(datetime2unix(_utcnow()) / 86400) * 86400 + 86400)

    if any(value.lower().find(n) >= 0
           for n in ["now", "today", "eod", "tomorrow"] +
           list(MILLI_VALUES.keys())):
        return parse_time_expression(value)

    try:  # 2.7 DOES NOT SUPPORT %z
        local_value = parse_date(value)  #eg 2014-07-16 10:57 +0200
        return _unix2Date(
            datetime2unix(
                (local_value -
                 coalesce(local_value.utcoffset(), 0)).replace(tzinfo=None)))
    except Exception as e:
        e = Except.wrap(e)  # FOR DEBUGGING
        pass

    formats = [
        "%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M:%S",
        "%Y-%m-%dT%H:%M:%S.%f"
    ]
    for f in formats:
        try:
            return _unix2Date(datetime2unix(datetime.strptime(value, f)))
        except Exception:
            pass

    deformats = [
        "%Y-%m", "%Y%m%d", "%d%m%Y", "%d%m%y", "%d%b%Y", "%d%b%y", "%d%B%Y",
        "%d%B%y", "%B%d%Y", "%b%d%Y", "%B%d%", "%b%d%y", "%Y%m%d%H%M%S%f",
        "%Y%m%d%H%M%S", "%Y%m%dT%H%M%S", "%d%m%Y%H%M%S", "%d%m%y%H%M%S",
        "%d%b%Y%H%M%S", "%d%b%y%H%M%S", "%d%B%Y%H%M%S", "%d%B%y%H%M%S"
    ]
    value = deformat(value)
    for f in deformats:
        try:
            return unicode2Date(value, format=f)
        except Exception:
            pass

    else:
        from mo_logs import Log
        Log.error("Can not interpret {{value}} as a datetime", value=value)
def extractor(
    guid,
    num_partitions,
    esq,
    query,
    selects,
    query_path,
    schema,
    chunk_size,
    cardinality,
    abs_limit,
    formatter,
    please_stop,
):
    total = 0
    # WE MESS WITH THE QUERY LIMITS FOR CHUNKING
    query.limit = first(query.groupby).domain.limit = chunk_size * 2
    start_time = Date.now()

    try:
        write_status(
            guid,
            {
                "status": "starting",
                "chunks": num_partitions,
                "rows": min(abs_limit, cardinality),
                "start_time": start_time,
                "timestamp": Date.now(),
            },
        )

        with TempFile() as temp_file:
            with open(temp_file.abspath, "wb") as output:
                for i in range(0, num_partitions):
                    if please_stop:
                        Log.error("request to shutdown!")
                    is_last = i == num_partitions - 1
                    first(query.groupby).allowNulls = is_last
                    acc, decoders, es_query = aggop_to_es_queries(
                        selects, query_path, schema, query)
                    # REACH INTO THE QUERY TO SET THE partitions
                    terms = es_query.aggs._filter.aggs._match.terms
                    terms.include.partition = i
                    terms.include.num_partitions = num_partitions

                    result = esq.es.search(deepcopy(es_query), query.limit)
                    aggs = unwrap(result.aggregations)

                    formatter.add(aggs, acc, query, decoders, selects)
                    for b in formatter.bytes():
                        if b is DONE:
                            break
                        output.write(b)
                    else:
                        write_status(
                            guid,
                            {
                                "status": "working",
                                "chunk": i,
                                "chunks": num_partitions,
                                "row": total,
                                "rows": min(abs_limit, cardinality),
                                "start_time": start_time,
                                "timestamp": Date.now(),
                            },
                        )
                        continue
                    break
                for b in formatter.footer():
                    output.write(b)

            upload(guid + ".json", temp_file)
        write_status(
            guid,
            {
                "ok": True,
                "status": "done",
                "chunks": num_partitions,
                "rows": min(abs_limit, cardinality),
                "start_time": start_time,
                "end_time": Date.now(),
                "timestamp": Date.now(),
            },
        )
    except Exception as e:
        e = Except.wrap(e)
        write_status(
            guid,
            {
                "ok": False,
                "status": "error",
                "error": e,
                "start_time": start_time,
                "end_time": Date.now(),
                "timestamp": Date.now(),
            },
        )
        Log.warning("Could not extract", cause=e)
Пример #48
0
        def life_cycle_watcher(please_stop):
            failed_attempts=Data()

            while not please_stop:
                spot_requests = self._get_managed_spot_requests()
                last_get = Date.now()
                instances = wrap({i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances})
                # INSTANCES THAT REQUIRE SETUP
                time_to_stop_trying = {}
                please_setup = [
                    (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests]
                    if i.id and not i.tags.get("Name") and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP
                ]
                for i, r in please_setup:
                    try:
                        p = self.settings.utility[i.instance_type]
                        if p == None:
                            try:
                                self.ec2_conn.terminate_instances(instance_ids=[i.id])
                                with self.net_new_locker:
                                    self.net_new_spot_requests.remove(r.id)
                            finally:
                                Log.error("Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type)
                        i.markup = p
                        try:
                            self.instance_manager.setup(i, coalesce(p, 0))
                        except Exception as e:
                            e = Except.wrap(e)
                            failed_attempts[r.id] += [e]
                            Log.error(ERROR_ON_CALL_TO_SETUP, e)
                        i.add_tag("Name", self.settings.ec2.instance.name + " (running)")
                        with self.net_new_locker:
                            self.net_new_spot_requests.remove(r.id)
                    except Exception as e:
                        if not time_to_stop_trying.get(i.id):
                            time_to_stop_trying[i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN
                        if Date.now() > time_to_stop_trying[i.id]:
                            # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE
                            self.ec2_conn.terminate_instances(instance_ids=[i.id])
                            with self.net_new_locker:
                                self.net_new_spot_requests.remove(r.id)
                            Log.warning("Problem with setup of {{instance_id}}.  Time is up.  Instance TERMINATED!", instance_id=i.id, cause=e)
                        elif "Can not setup unknown " in e:
                            Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e)
                        elif ERROR_ON_CALL_TO_SETUP in e:
                            if len(failed_attempts[r.id]) > 2:
                                Log.warning("Problem with setup() of {{instance_id}}", instance_id=i.id, cause=failed_attempts[r.id])
                        else:
                            Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e)

                if Date.now() - last_get > 5 * SECOND:
                    # REFRESH STALE
                    spot_requests = self._get_managed_spot_requests()
                    last_get = Date.now()

                pending = wrap([r for r in spot_requests if r.status.code in PENDING_STATUS_CODES])
                give_up = wrap([r for r in spot_requests if r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES])
                ignore = wrap([r for r in spot_requests if r.status.code in MIGHT_HAPPEN])  # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT

                if self.done_spot_requests:
                    with self.net_new_locker:
                        expired = Date.now() - self.settings.run_interval + 2 * MINUTE
                        for ii in list(self.net_new_spot_requests):
                            if Date(ii.create_time) < expired:
                                ## SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS
                                self.net_new_spot_requests.remove(ii)
                        for g in give_up:
                            self.net_new_spot_requests.remove(g.id)
                        for g in ignore:
                            self.net_new_spot_requests.remove(g.id)
                        pending = UniqueIndex(("id",), data=pending)
                        pending = pending | self.net_new_spot_requests

                    if give_up:
                        self.ec2_conn.cancel_spot_instance_requests(request_ids=give_up.id)
                        Log.note("Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code)

                if not pending and not time_to_stop_trying and self.done_spot_requests:
                    Log.note("No more pending spot requests")
                    please_stop.go()
                    break
                elif pending:
                    Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending])

                (Till(seconds=10) | please_stop).wait()

            Log.note("life cycle watcher has stopped")
Пример #49
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(str(key + ".json.gz"))

        if VERIFY_UPLOAD:
            lines = list(lines)

        with mo_files.TempFile() as tempfile:
            with open(tempfile.abspath, "wb") as buff:
                DEBUG and Log.note("Temp file {{filename}}",
                                   filename=tempfile.abspath)
                archive = gzip.GzipFile(filename=str(key + ".json"),
                                        fileobj=buff,
                                        mode="w")
                count = 0
                for l in lines:
                    if is_many(l):
                        for ll in l:
                            archive.write(ll.encode("utf8"))
                            archive.write(b"\n")
                            count += 1
                    else:
                        archive.write(l.encode("utf8"))
                        archive.write(b"\n")
                        count += 1
                archive.close()

            retry = 3
            while retry:
                try:
                    with Timer(
                            "Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}",
                        {
                            "key": key,
                            "file_length": tempfile.length,
                            "count": count
                        },
                            verbose=self.settings.debug,
                    ):
                        storage.set_contents_from_filename(
                            tempfile.abspath,
                            headers={"Content-Type": mimetype.GZIP})
                    break
                except Exception as e:
                    e = Except.wrap(e)
                    retry -= 1
                    if (retry == 0 or "Access Denied" in e
                            or "No space left on device" in e):
                        Log.error("could not push data to s3", cause=e)
                    else:
                        Log.warning("could not push data to s3, will retry",
                                    cause=e)

            if self.settings.public:
                storage.set_acl("public-read")

            if VERIFY_UPLOAD:
                try:
                    with open(tempfile.abspath, mode="rb") as source:
                        result = list(ibytes2ilines(
                            scompressed2ibytes(source)))
                        assertAlmostEqual(result,
                                          lines,
                                          msg="file is different")

                    # full_url = "https://"+self.name+".s3-us-west-2.amazonaws.com/"+storage.key.replace(":", "%3A")
                    # https://active-data-test-result.s3-us-west-2.amazonaws.com/tc.1524896%3A152488763.0.json.gz

                    # dest_bucket = s3.MultiBucket(bucket="self.name", kwargs=self.settings.aws)

                    result = list(self.read_lines(strip_extension(key)))
                    assertAlmostEqual(result,
                                      lines,
                                      result,
                                      msg="S3 is different")

                except Exception as e:
                    from activedata_etl.transforms import TRY_AGAIN_LATER

                    Log.error(TRY_AGAIN_LATER,
                              reason="did not pass verification",
                              cause=e)
        return
Пример #50
0
def _scrub(value, is_done):
    type_ = value.__class__

    if type_ in (NoneType, NullType):
        return None
    elif type_ is unicode:
        value_ = value.strip()
        if value_:
            return value_
        else:
            return None
    elif type_ is float:
        if math.isnan(value) or math.isinf(value):
            return None
        return value
    elif type_ in (int, long, bool):
        return value
    elif type_ in (date, datetime):
        return float(datetime2unix(value))
    elif type_ is timedelta:
        return value.total_seconds()
    elif type_ is Date:
        return float(value.unix)
    elif type_ is Duration:
        return float(value.seconds)
    elif type_ is str:
        return utf82unicode(value)
    elif type_ is Decimal:
        return float(value)
    elif type_ is Data:
        return _scrub(_get(value, '_dict'), is_done)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            Log.warning("possible loop in structure detected")
            return '"<LOOP IN STRUCTURE>"'
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if isinstance(k, basestring):
                pass
            elif hasattr(k, "__unicode__"):
                k = unicode(k)
            else:
                Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None or isinstance(v, Mapping):
                output[k] = v

        is_done.discard(_id)
        return output
    elif type_ in (tuple, list, FlatList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type_ is type:
        return value.__name__
    elif type_.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif not isinstance(value, Except) and isinstance(value, Exception):
        return _scrub(Except.wrap(value), is_done)
    elif hasattr(value, '__data__'):
        try:
            return _scrub(value.__data__(), is_done)
        except Exception as e:
            Log.error("problem with calling __json__()", e)
    elif hasattr(value, 'co_code') or hasattr(value, "f_locals"):
        return None
    elif hasattr(value, '__iter__'):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif hasattr(value, '__call__'):
        return repr(value)
    else:
        return _scrub(DataObject(value), is_done)
Пример #51
0
def unicode2Date(value, format=None):
    """
    CONVERT UNICODE STRING TO UNIX TIMESTAMP VALUE
    """
    # http://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
    if value == None:
        return None

    if format != None:
        try:
            if format.endswith("%S.%f") and "." not in value:
                value += ".000"
            return _unix2Date(datetime2unix(datetime.strptime(value, format)))
        except Exception as e:
            from mo_logs import Log

            Log.error("Can not format {{value}} with {{format}}", value=value, format=format, cause=e)

    value = value.strip()
    if value.lower() == "now":
        return _unix2Date(datetime2unix(_utcnow()))
    elif value.lower() == "today":
        return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400)
    elif value.lower() in ["eod", "tomorrow"]:
        return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400 + 86400)

    if any(value.lower().find(n) >= 0 for n in ["now", "today", "eod", "tomorrow"] + list(MILLI_VALUES.keys())):
        return parse_time_expression(value)

    try:  # 2.7 DOES NOT SUPPORT %z
        local_value = parse_date(value)  #eg 2014-07-16 10:57 +0200
        return _unix2Date(datetime2unix((local_value - local_value.utcoffset()).replace(tzinfo=None)))
    except Exception as e:
        e = Except.wrap(e)  # FOR DEBUGGING
        pass

    formats = [
        "%Y-%m-%dT%H:%M:%S",
        "%Y-%m-%dT%H:%M:%S.%f"
    ]
    for f in formats:
        try:
            return _unix2Date(datetime2unix(datetime.strptime(value, f)))
        except Exception:
            pass



    deformats = [
        "%Y-%m",# eg 2014-07-16 10:57 +0200
        "%Y%m%d",
        "%d%m%Y",
        "%d%m%y",
        "%d%b%Y",
        "%d%b%y",
        "%d%B%Y",
        "%d%B%y",
        "%Y%m%d%H%M%S",
        "%Y%m%dT%H%M%S",
        "%d%m%Y%H%M%S",
        "%d%m%y%H%M%S",
        "%d%b%Y%H%M%S",
        "%d%b%y%H%M%S",
        "%d%B%Y%H%M%S",
        "%d%B%y%H%M%S"
    ]
    value = deformat(value)
    for f in deformats:
        try:
            return unicode2Date(value, format=f)
        except Exception:
            pass

    else:
        from mo_logs import Log
        Log.error("Can not interpret {{value}} as a datetime", value=value)
Пример #52
0
def json2value(json_string, params=Null, flexible=False, leaves=False):
    """
    :param json_string: THE JSON
    :param params: STANDARD JSON PARAMS
    :param flexible: REMOVE COMMENTS
    :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
    :return: Python value
    """
    if isinstance(json_string, str):
        Log.error("only unicode json accepted")

    try:
        if flexible:
            # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r
            # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58
            json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE)
            json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n"))
            # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA
            json_string = re.sub(r",\s*\}", r"}", json_string)
            # ALLOW LISTS TO END WITH COMMA
            json_string = re.sub(r",\s*\]", r"]", json_string)

        if params:
            # LOOKUP REFERENCES
            json_string = expand_template(json_string, params)

        try:
            value = wrap(json_decoder(unicode(json_string)))
        except Exception as e:
            Log.error("can not decode\n{{content}}", content=json_string, cause=e)

        if leaves:
            value = wrap_leaves(value)

        return value

    except Exception as e:
        e = Except.wrap(e)

        if not json_string.strip():
            Log.error("JSON string is only whitespace")

        c = e
        while "Expecting '" in c.cause and "' delimiter: line" in c.cause:
            c = c.cause

        if "Expecting '" in c and "' delimiter: line" in c:
            line_index = int(strings.between(c.message, " line ", " column ")) - 1
            column = int(strings.between(c.message, " column ", " ")) - 1
            line = json_string.split("\n")[line_index].replace("\t", " ")
            if column > 20:
                sample = "..." + line[column - 20:]
                pointer = "   " + (" " * 20) + "^"
            else:
                sample = line
                pointer = (" " * column) + "^"

            if len(sample) > 43:
                sample = sample[:43] + "..."

            Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n")

        base_str = strings.limit(json_string, 1000).encode('utf8')
        hexx_str = bytes2hex(base_str, " ")
        try:
            char_str = " " + "  ".join((c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str)
        except Exception as e:
            char_str = " "
        Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n", e)
Пример #53
0
def _scrub(value, is_done):
    type_ = value.__class__

    if type_ in (NoneType, NullType):
        return None
    elif type_ is unicode:
        value_ = value.strip()
        if value_:
            return value_
        else:
            return None
    elif type_ is float:
        if math.isnan(value) or math.isinf(value):
            return None
        return value
    elif type_ in (int, long, bool):
        return value
    elif type_ in (date, datetime):
        return float(datetime2unix(value))
    elif type_ is timedelta:
        return value.total_seconds()
    elif type_ is Date:
        return float(value.unix)
    elif type_ is Duration:
        return float(value.seconds)
    elif type_ is str:
        return utf82unicode(value)
    elif type_ is Decimal:
        return float(value)
    elif type_ is Data:
        return _scrub(_get(value, '_dict'), is_done)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            Log.warning("possible loop in structure detected")
            return '"<LOOP IN STRUCTURE>"'
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if isinstance(k, basestring):
                pass
            elif hasattr(k, "__unicode__"):
                k = unicode(k)
            else:
                Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None or isinstance(v, Mapping):
                output[k] = v

        is_done.discard(_id)
        return output
    elif type_ in (tuple, list, FlatList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type_ is type:
        return value.__name__
    elif type_.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif not isinstance(value, Except) and isinstance(value, Exception):
        return _scrub(Except.wrap(value), is_done)
    elif hasattr(value, '__data__'):
        try:
            return _scrub(value.__data__(), is_done)
        except Exception as e:
            Log.error("problem with calling __json__()", e)
    elif hasattr(value, 'co_code') or hasattr(value, "f_locals"):
        return None
    elif hasattr(value, '__iter__'):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif hasattr(value, '__call__'):
        return repr(value)
    else:
        return _scrub(DataObject(value), is_done)
Пример #54
0
    def _db_worker(self, please_stop):
        while not please_stop:
            try:
                with self._db_transaction():
                    result = self._query(
                        SQL_SELECT
                        + all_columns
                        + SQL_FROM
                        + db_table_name
                        + SQL_WHERE
                        + "last_updated > "
                        + quote_value(self.last_load)
                        + SQL_ORDERBY
                        + sql_list(map(quote_column, ["es_index", "name", "es_column"]))
                    )

                with self.locker:
                    for r in result.data:
                        c = row_to_column(result.header, r)
                        self._add(c)
                        if c.last_updated > self.last_load:
                            self.last_load = c.last_updated

                updates = self.todo.pop_all()
                DEBUG and updates and Log.note(
                    "{{num}} columns to push to db", num=len(updates)
                )
                for action, column in updates:
                    while not please_stop:
                        try:
                            with self._db_transaction():
                                DEBUG and Log.note(
                                    "{{action}} db for {{table}}.{{column}}",
                                    action=action,
                                    table=column.es_index,
                                    column=column.es_column,
                                )
                                if action is EXECUTE:
                                    self.db.execute(column)
                                elif action is UPDATE:
                                    self.db.execute(
                                        "UPDATE"
                                        + db_table_name
                                        + "SET"
                                        + sql_list(
                                            [
                                                "count=" + quote_value(column.count),
                                                "cardinality="
                                                + quote_value(column.cardinality),
                                                "multi=" + quote_value(column.multi),
                                                "partitions="
                                                + quote_value(
                                                    value2json(column.partitions)
                                                ),
                                                "last_updated="
                                                + quote_value(column.last_updated),
                                            ]
                                        )
                                        + SQL_WHERE
                                        + SQL_AND.join(
                                            [
                                                "es_index = "
                                                + quote_value(column.es_index),
                                                "es_column = "
                                                + quote_value(column.es_column),
                                                "last_updated < "
                                                + quote_value(column.last_updated),
                                            ]
                                        )
                                    )
                                elif action is DELETE:
                                    self.db.execute(
                                        "DELETE FROM"
                                        + db_table_name
                                        + SQL_WHERE
                                        + SQL_AND.join(
                                            [
                                                "es_index = "
                                                + quote_value(column.es_index),
                                                "es_column = "
                                                + quote_value(column.es_column),
                                            ]
                                        )
                                    )
                                else:
                                    self._db_insert_column(column)
                            break
                        except Exception as e:
                            e = Except.wrap(e)
                            if "database is locked" in e:
                                Log.note("metadata database is locked")
                                Till(seconds=1).wait()
                                break
                            else:
                                Log.warning("problem updataing database", cause=e)

            except Exception as e:
                Log.warning("problem updating database", cause=e)

            (Till(seconds=10) | please_stop).wait()
Пример #55
0
def tuid_endpoint(path):
    with RegisterThread():
        try:
            service.statsdaemon.update_requests(requests_total=1)

            if flask.request.headers.get("content-length", "") in ["", "0"]:
                # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
                service.statsdaemon.update_requests(requests_complete=1, requests_passed=1)
                return Response(
                    EXPECTING_QUERY,
                    status=400,
                    headers={
                        "Content-Type": "text/html"
                    }
                )
            elif int(flask.request.headers["content-length"]) > QUERY_SIZE_LIMIT:
                service.statsdaemon.update_requests(requests_complete=1, requests_passed=1)
                return Response(
                    unicode2utf8("request too large"),
                    status=400,
                    headers={
                        "Content-Type": "text/html"
                    }
                )
            request_body = flask.request.get_data().strip()
            query = json2value(utf82unicode(request_body))

            # ENSURE THE QUERY HAS THE CORRECT FORM
            if query['from'] != 'files':
                Log.error("Can only handle queries on the `files` table")

            ands = listwrap(query.where['and'])
            if len(ands) != 3:
                Log.error(
                    'expecting a simple where clause with following structure\n{{example|json}}',
                    example={"and": [
                        {"eq": {"branch": "<BRANCH>"}},
                        {"eq": {"revision": "<REVISION>"}},
                        {"in": {"path": ["<path1>", "<path2>", "...", "<pathN>"]}}
                    ]}
                )

            rev = None
            paths = None
            branch_name = None
            for a in ands:
                rev = coalesce(rev, a.eq.revision)
                paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path))
                branch_name = coalesce(branch_name, a.eq.branch)
            paths = listwrap(paths)

            if len(paths) == 0:
                response, completed = [], True
            elif service.conn.pending_transactions > TOO_BUSY:  # CHECK IF service IS VERY BUSY
                # TODO:  BE SURE TO UPDATE STATS TOO
                Log.note("Too many open transactions")
                response, completed = [], False
            elif service.get_thread_count() > TOO_MANY_THREADS:
                Log.note("Too many threads open")
                response, completed = [], False
            else:
                # RETURN TUIDS
                with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}):
                    response, completed = service.get_tuids_from_files(
                        revision=rev, files=paths, going_forward=True, repo=branch_name
                    )

                if not completed:
                    Log.note(
                        "Request for {{num}} files is incomplete for revision {{rev}}.",
                        num=len(paths), rev=rev
                    )

            if query.meta.format == 'list':
                formatter = _stream_list
            else:
                formatter = _stream_table

            service.statsdaemon.update_requests(
                requests_complete=1 if completed else 0,
                requests_incomplete=1 if not completed else 0,
                requests_passed=1
            )

            return Response(
                formatter(response),
                status=200 if completed else 202,
                headers={
                    "Content-Type": "application/json"
                }
            )
        except Exception as e:
            e = Except.wrap(e)
            service.statsdaemon.update_requests(requests_incomplete=1, requests_failed=1)
            Log.warning("could not handle request", cause=e)
            return Response(
                unicode2utf8(value2json(e, pretty=True)),
                status=400,
                headers={
                    "Content-Type": "text/html"
                }
            )