Пример #1
0
    def go(self):
        """
        ACTIVATE SIGNAL (DOES NOTHING IF SIGNAL IS ALREADY ACTIVATED)
        """
        DEBUG and self._name and Log.note("GO! {{name|quote}}", name=self.name)

        if self._go:
            return

        with self.lock:
            if self._go:
                return
            self._go = True

        DEBUG and self._name and Log.note("internal GO! {{name|quote}}", name=self.name)
        jobs, self.job_queue = self.job_queue, None
        threads, self.waiting_threads = self.waiting_threads, None

        if threads:
            DEBUG and self._name and Log.note("Release {{num}} threads", num=len(threads))
            for t in threads:
                t.release()

        if jobs:
            for j in jobs:
                try:
                    j()
                except Exception as e:
                    Log.warning("Trigger on Signal.go() failed!", cause=e)
Пример #2
0
 def __exit__(self, a, b, c):
     if self.waiting:
         if DEBUG:
             _Log.note("signaling {{num}} waiters", num=len(self.waiting))
         waiter = self.waiting.pop()
         waiter.go()
     self.lock.release()
Пример #3
0
    def _reader(self, name, pipe, recieve, please_stop):
        try:
            line = "dummy"
            while not please_stop and self.service.returncode is None and line:
                line = pipe.readline().rstrip()
                if line:
                    recieve.add(line)
                if self.debug:
                    Log.note("{{process}} ({{name}}): {{line}}", name=name, process=self.name, line=line)
            # GRAB A FEW MORE LINES
            max = 100
            while max:
                try:
                    line = pipe.readline().rstrip()
                    if line:
                        max = 100
                        recieve.add(line)
                        if self.debug:
                            Log.note("{{process}} ({{name}}): {{line}}", name=name, process=self.name, line=line)
                    else:
                        max -= 1
                except Exception:
                    break
        finally:
            pipe.close()

        recieve.add(THREAD_STOP)
Пример #4
0
 def __enter__(self):
     Log.note("SingleInstance.lockfile = " + self.lockfile)
     if sys.platform == 'win32':
         try:
             # file already exists, we try to remove (in case previous execution was interrupted)
             if os.path.exists(self.lockfile):
                 os.unlink(self.lockfile)
             self.fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
         except Exception as e:
             Log.alarm("Another instance is already running, quitting.")
             sys.exit(-1)
     else: # non Windows
         import fcntl
         self.fp = open(self.lockfile, 'w')
         try:
             fcntl.lockf(self.fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
         except IOError:
             Log.note(
                 "\n"
                 "**********************************************************************\n"
                 "** Another instance is already running, quitting.\n"
                 "**********************************************************************\n"
             )
             sys.exit(-1)
     self.initialized = True
Пример #5
0
    def _upsert_column(self, c):
        # ASSUMING THE  self.meta.columns.locker IS HAD
        existing_columns = self.meta.columns.find(c.es_index, c.names["."])
        if not existing_columns:
            self.meta.columns.add(c)
            self.todo.add(c)

            if ENABLE_META_SCAN:
                if DEBUG:
                    Log.note("todo: {{table}}::{{column}}", table=c.es_index, column=c.es_column)
                # MARK meta.columns AS DIRTY TOO
                cols = self.meta.columns.find("meta.columns", None)
                for cc in cols:
                    cc.partitions = cc.cardinality = None
                    cc.last_updated = Date.now()
                self.todo.extend(cols)
        else:
            canonical = existing_columns[0]
            if canonical is not c:
                set_default(c.names, canonical.names)
                for key in Column.__slots__:
                    canonical[key] = c[key]
            if DEBUG:
                Log.note("todo: {{table}}::{{column}}", table=canonical.es_index, column=canonical.es_column)
            self.todo.add(canonical)
Пример #6
0
        def disconnect():
            with suppress_exception:
                self.target_queue.close()
                Log.note("stop put into queue")

            self.pulse.disconnect()
            Log.note("pulse listener was given a disconnect()")
Пример #7
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor: # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = zip(*grid)

            if not old_cursor:   # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception as e:
            if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e,stack_depth=1)
Пример #8
0
    def not_monitor(self, please_stop):
        Log.alert("metadata scan has been disabled")
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            column = self.todo.pop()
            if column == THREAD_STOP:
                break

            if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
                DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
                column.last_updated = Date.now()
                continue
            elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None:
                # DO NOT UPDATE FRESH COLUMN METADATA
                DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
                continue

            with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05):
                if untype_path(column.name) in ["build.type", "run.type"]:
                    try:
                        self._update_cardinality(column)
                    except Exception as e:
                        Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
                else:
                    column.last_updated = Date.now()
Пример #9
0
    def not_monitor(self, please_stop):
        Log.alert("metadata scan has been disabled")
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            c = self.todo.pop()
            if c == THREAD_STOP:
                break

            if not c.last_updated or c.last_updated >= Date.now()-TOO_OLD:
                continue

            with self.meta.columns.locker:
                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear":[
                        "count",
                        "cardinality",
                        "partitions",
                    ],
                    "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}}
                })
            if DEBUG:
                Log.note("Could not get {{col.es_index}}.{{col.es_column}} info", col=c)
Пример #10
0
    def forall(self, sql, param=None, _execute=None):
        assert _execute
        num = 0

        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor: # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}",  sql= indent(sql))
            self.cursor.execute(sql)

            columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
            for r in self.cursor:
                num += 1
                _execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))

            if not old_cursor:   # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

        except Exception as e:
            Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e, stack_depth=1)

        return num
Пример #11
0
    def delete_record(self, filter):
        if self.settings.read_only:
            Log.error("Index opened in read only mode, no changes allowed")
        self.cluster.get_metadata()

        if self.cluster.cluster_state.version.number.startswith("0.90"):
            query = {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}
        elif self.cluster.cluster_state.version.number.startswith("1."):
            query = {"query": {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}}
        else:
            raise NotImplementedError

        if self.debug:
            Log.note("Delete bugs:\n{{query}}",  query= query)

        result = self.cluster.delete(
            self.path + "/_query",
            data=convert.value2json(query),
            timeout=600,
            params={"consistency": self.settings.consistency}
        )

        for name, status in result._indices.items():
            if status._shards.failed > 0:
                Log.error("Failure to delete from {{index}}", index=name)
Пример #12
0
    def __init__(self, name, params, cwd=None, env=None, debug=False, shell=False, bufsize=-1):
        self.name = name
        self.service_stopped = Signal("stopped signal for " + strings.quote(name))
        self.stdin = Queue("stdin for process " + strings.quote(name), silent=True)
        self.stdout = Queue("stdout for process " + strings.quote(name), silent=True)
        self.stderr = Queue("stderr for process " + strings.quote(name), silent=True)

        try:
            self.debug = debug or DEBUG
            self.service = service = subprocess.Popen(
                params,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                bufsize=bufsize,
                cwd=cwd if isinstance(cwd, (basestring, NullType, NoneType)) else cwd.abspath,
                env=unwrap(set_default(env, os.environ)),
                shell=shell
            )

            self.please_stop = Signal()
            self.please_stop.on_go(self._kill)
            self.thread_locker = Lock()
            self.children = [
                Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " stdout", self._reader, "stdout", service.stdout, self.stdout, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " stderr", self._reader, "stderr", service.stderr, self.stderr, please_stop=self.service_stopped, parent_thread=self),
                Thread.run(self.name + " waiter", self._monitor, parent_thread=self),
            ]
        except Exception as e:
            Log.error("Can not call", e)

        if self.debug:
            Log.note("{{process}} START: {{command}}", process=self.name, command=" ".join(map(strings.quote, params)))
Пример #13
0
    def _execute_backlog(self):
        if not self.backlog: return

        (backlog, self.backlog) = (self.backlog, [])
        if self.db.__module__.startswith("pymysql"):
            # BUG IN PYMYSQL: CAN NOT HANDLE MULTIPLE STATEMENTS
            # https://github.com/PyMySQL/PyMySQL/issues/157
            for b in backlog:
                sql = self.preamble + b
                try:
                    if self.debug:
                        Log.note("Execute SQL:\n{{sql|indent}}",  sql= sql)
                    self.cursor.execute(b)
                except Exception as e:
                    Log.error("Can not execute sql:\n{{sql}}",  sql= sql, cause=e)

            self.cursor.close()
            self.cursor = self.db.cursor()
        else:
            for i, g in jx.groupby(backlog, size=MAX_BATCH_SIZE):
                sql = self.preamble + ";\n".join(g)
                try:
                    if self.debug:
                        Log.note("Execute block of SQL:\n{{sql|indent}}",  sql= sql)
                    self.cursor.execute(sql)
                    self.cursor.close()
                    self.cursor = self.db.cursor()
                except Exception as e:
                    Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e, stack_depth=1)
Пример #14
0
def write_profiles(main_thread_profile):
    if cprofiler_stats is None:
        return

    from pyLibrary import convert
    from mo_files import File

    cprofiler_stats.add(pstats.Stats(main_thread_profile.cprofiler))
    stats = cprofiler_stats.pop_all()

    Log.note("aggregating {{num}} profile stats", num=len(stats))
    acc = stats[0]
    for s in stats[1:]:
        acc.add(s)

    stats = [
        {
            "num_calls": d[1],
            "self_time": d[2],
            "total_time": d[3],
            "self_time_per_call": d[2] / d[1],
            "total_time_per_call": d[3] / d[1],
            "file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
            "line": f[1],
            "method": f[2].lstrip("<").rstrip(">")
        }
        for f, d, in iteritems(acc.stats)
    ]
    stats_file = File(FILENAME, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
    stats_file.write(convert.list2tab(stats))
    Log.note("profile written to {{filename}}", filename=stats_file.abspath)
Пример #15
0
def open_test_instance(name, filename=None, es=None, kwargs=None):
    if filename != None:
        Log.note(
            "Using {{filename}} as {{type}}",
            filename=filename,
            type=name
        )
        return FakeES(filename=filename)
    else:
        Log.note(
            "Using ES cluster at {{host}} as {{type}}",
            host=es.host,
            type=name
        )
        cluster = Cluster(es)
        try:
            old_index = cluster.get_index(es)
            cluster.delete_index(old_index.settings.index)
        except Exception as e:
            if "Can not find index" not in e:
                Log.error("unexpected", cause=e)

        output = cluster.create_index(limit_replicas=True, limit_replicas_warning=False, kwargs=es)
        output.delete_all_but_self()
        output.add_alias(es.index)
        return output
Пример #16
0
    def delete_index(self, index_name):
        if not isinstance(index_name, unicode):
            Log.error("expecting an index name")

        if self.debug:
            Log.note("Deleting index {{index}}", index=index_name)

        # REMOVE ALL ALIASES TOO
        aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
        if aliases:
            self.post(
                path="/_aliases",
                data={"actions": [{"remove": a} for a in aliases]}
            )

        url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name
        try:
            response = http.delete(url)
            if response.status_code != 200:
                Log.error("Expecting a 200, got {{code}}", code=response.status_code)
            details = mo_json.json2value(utf82unicode(response.content))
            if self.debug:
                Log.note("delete response {{response}}", response=details)
            return response
        except Exception as e:
            Log.error("Problem with call to {{url}}", url=url, cause=e)
Пример #17
0
    def _got_result(self, data, message):
        global count

        data = wrap(data)
        with count_locker:
            Log.note("{{count}} from {{exchange}}", count=count, exchange=self.pulse.exchange)
            data._meta.count = count
            data._meta.exchange = self.pulse.exchange
            count += 1

        if self.settings.debug:
            Log.note("{{data}}", data=data)
        if self.target_queue != None:
            try:
                self.target_queue.add(data)
                message.ack()
            except Exception as e:
                e = Except.wrap(e)
                if not self.target_queue.closed:  # EXPECTED TO HAPPEN, THIS THREAD MAY HAVE BEEN AWAY FOR A WHILE
                    raise e
        else:
            try:
                self.pulse_target(data)
                message.ack()
            except Exception as e:
                Log.warning("Problem processing pulse (see `data` in structured log)", data=data, cause=e)
Пример #18
0
    def __init__(self, conn=None, tuid_service=None, start_workers=True, new_table=False, kwargs=None):
        try:
            self.config = kwargs
            self.conn = conn if conn else sql.Sql(self.config.database.name)
            self.hg_cache = HgMozillaOrg(kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null

            self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
                kwargs=self.config.tuid, conn=self.conn, clogger=self
            )
            self.rev_locker = Lock()
            self.working_locker = Lock()

            if new_table:
                with self.conn.transaction() as t:
                    t.execute("DROP TABLE IF EXISTS csetLog")

            self.init_db()
            self.next_revnum = coalesce(self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
            self.csets_todo_backwards = Queue(name="Clogger.csets_todo_backwards")
            self.deletions_todo = Queue(name="Clogger.deletions_todo")
            self.maintenance_signal = Signal(name="Clogger.maintenance_signal")

            if 'tuid' in self.config:
                self.config = self.config.tuid

            self.disable_backfilling = False
            self.disable_tipfilling = False
            self.disable_deletion = False
            self.disable_maintenance = False

            self.backfill_thread = None
            self.tipfill_thread = None
            self.deletion_thread = None
            self.maintenance_thread = None

            # Make sure we are filled before allowing queries
            numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
            if numrevs < MINIMUM_PERMANENT_CSETS:
                Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS)
                oldest_rev = 'tip'
                with self.conn.transaction() as t:
                    tmp = t.query("SELECT min(revnum), revision FROM csetLog").data[0][1]
                    if tmp:
                        oldest_rev = tmp
                self._fill_in_range(
                    MINIMUM_PERMANENT_CSETS - numrevs,
                    oldest_rev,
                    timestamp=False
                )

            Log.note(
                "Table is filled with atleast {{minim}} entries.",
                minim=MINIMUM_PERMANENT_CSETS
            )

            if start_workers:
                self.start_workers()
        except Exception as e:
            Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
Пример #19
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        pending = []  # FOR WHEN WE DO NOT HAVE QUEUE YET
        for key in keys:
            timer = Timer("Process {{key}}", param={"key": key})
            try:
                with timer:
                    for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        if rownum > 0 and rownum % 1000 == 0:
                            Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name)

                        row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size)
                        num_keys += 1

                        if queue == None:
                            queue = self._get_queue(row)
                            if queue == None:
                                pending.append(row)
                                if len(pending) > 1000:
                                    self._get_queue(row)
                                    Log.error("first 1000 (key={{key}}) records have no indication what index to put data", key=tuple(keys)[0])
                                continue
                            elif queue is DATA_TOO_OLD:
                                break
                            if pending:
                                queue.extend(pending)
                                pending = []

                        queue.add(row)

                        if please_stop:
                            break
            except Exception as e:
                done_copy = None
                Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)

        if done_copy:
            if queue == None:
                done_copy()
            else:
                queue.add(done_copy)

        if pending:
            Log.error("Did not find an index to place the data for key={{key}}", key=tuple(keys)[0])

        Log.note("{{num}} keys from {{key|json}} added", num=num_keys, key=keys)
        return num_keys
Пример #20
0
 def __init__(self, name=None):
     if DEBUG:
         Log.note("New signal {{name|quote}}", name=name)
     self._name = name
     self.lock = _allocate_lock()
     self._go = False
     self.job_queue = None
     self.waiting_threads = None
Пример #21
0
    def add(self, doc, queue=None):
        if queue == None:
            queue = self._get_queue(doc)
            if queue == None:
                Log.note("Document not added: Too old")
                return

        queue.add(doc)
Пример #22
0
 def flush(self):
     try:
         self.cluster.post("/" + self.settings.index + "/_flush", data={"wait_if_ongoing": True, "forced": False})
     except Exception as e:
         if "FlushNotAllowedEngineException" in e:
             Log.note("Flush is ignored")
         else:
             Log.error("Problem flushing", cause=e)
Пример #23
0
 def print_proc_memory_used(self, loc):
     self.prev_mem = self.curr_mem
     self.curr_mem = self.get_process_memory()
     Log.note(
         "Used memory since last call: {{mem}}",
         loc=loc,
         mem=self.curr_mem - self.prev_mem
     )
Пример #24
0
    def create_index(
        self,
        index,
        alias=None,
        create_timestamp=None,
        schema=None,
        limit_replicas=None,
        read_only=False,
        tjson=False,
        kwargs=None
    ):
        if not alias:
            alias = kwargs.alias = kwargs.index
            index = kwargs.index = proto_name(alias, create_timestamp)

        if kwargs.alias == index:
            Log.error("Expecting index name to conform to pattern")

        if kwargs.schema_file:
            Log.error('schema_file attribute not supported.  Use {"$ref":<filename>} instead')

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = mo_json.json2value(schema, leaves=True)
        else:
            schema = mo_json.json2value(convert.value2json(schema), leaves=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to= health.number_of_nodes - 1
                )
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self.post(
            "/" + index,
            data=schema,
            headers={"Content-Type": "application/json"}
        )

        # CONFIRM INDEX EXISTS
        while True:
            try:
                state = self.get("/_cluster/state", retry={"times": 5}, timeout=3)
                if index in state.metadata.indices:
                    break
                Log.note("Waiting for index {{index}} to appear", index=index)
            except Exception as e:
                Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e)
            Till(seconds=1).wait()
        Log.alert("Made new index {{index|quote}}", index=index)

        es = Index(kwargs=kwargs)
        return es
Пример #25
0
    def __exit__(self, type, value, traceback):
        self.end = time()
        self.interval = self.end - self.start

        if self.debug:
            param = wrap(self.param)
            param.duration = timedelta(seconds=self.interval)
            if not self.silent:
                Log.note("Timer end  : " + self.template + " (took {{duration}})", self.param, stack_depth=1)
Пример #26
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if isinstance(node, Mapping):
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = URL(v)
            else:
                output[k] = _replace_ref(v, url)

        if not ref:
            return output

        node = output

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            output["$ref"] = ref
            return output

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = mo_dots.get_attr(new_value, ref.fragment)

        DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value)

        if not output:
            output = new_value
        elif isinstance(output, text_type):
            Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
        else:
            output = unwrap(set_default(output, new_value))

        DEBUG and Log.note("Return {{output}}", output=output)

        return output
    elif isinstance(node, list):
        output = [_replace_ref(n, url) for n in node]
        # if all(p[0] is p[1] for p in zip(output, node)):
        #     return node
        return output

    return node
Пример #27
0
 def _stderr(self, please_stop):
     while not please_stop:
         try:
             line = self.process.stderr.pop(till=please_stop)
             if line == THREAD_STOP:
                 please_stop.go()
                 break
             Log.note("Error line from {{name}}({{pid}}): {{line}}", line=line, name=self.process.name, pid=self.process.pid)
         except Exception as e:
             Log.error("could not process line", cause=e)
Пример #28
0
    def _daemon(self, please_stop):
        while not please_stop:
            timestamp = Date.now()
            with self.lock:
                decay = METRIC_DECAY_RATE ** (timestamp - self.last_request).seconds
                request_rate = self.request_rate = decay * self.request_rate
                self.last_request = timestamp

            Log.note("{{name}} request rate: {{rate|round(places=2)}} requests per second", name=self.name, rate=request_rate)
            (please_stop | Till(seconds=METRIC_REPORT_PERIOD.seconds)).wait()
Пример #29
0
 def __exit__(self, type, value, traceback):
     self.end = time()
     self.interval = self.end - self.start
     self.agg += self.interval
     self.param.duration = timedelta(seconds=self.interval)
     if not self.silent:
         if self.too_long == 0:
             Log.note("Timer end  : " + self.template + " (took {{duration}})", default_params=self.param, stack_depth=1)
         elif self.interval >= self.too_long:
             Log.note("Time too long: " + self.template + " ({{duration}})", default_params=self.param, stack_depth=1)
Пример #30
0
    def stop(self):
        try:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
            Log.note("joined on thread")
        except Exception as e:
            Log.note("problem in threaded logger" + str(e))

        with suppress_exception:
            self.queue.close()
Пример #31
0
def add_version(flask_app):
    """
    ADD ROUTING TO HANDLE REQUEST FOR /__version__
    :param flask_app: THE (Flask) APP
    :return:
    """
    try:
        rev = coalesce(git.get_revision(), "")
        branch = "https://github.com/mozilla/ActiveData/tree/" + coalesce(git.get_branch())

        version_info = value2json(
            {
                "source": "https://github.com/mozilla/ActiveData/tree/" + rev,
                "branch": branch,
                "commit": rev,
            },
            pretty=True,
        ).encode('utf8') + text("\n")

        Log.note("Using github version\n{{version}}", version=version_info)

        @register_thread
        @cors_wrapper
        def version():
            return Response(
                version_info, status=200, headers={"Content-Type": mimetype.JSON}
            )

        flask_app.add_url_rule(
            str("/__version__"),
            None,
            version,
            defaults={},
            methods=[str("GET"), str("POST")],
        )
    except Exception as e:
        Log.error("Problem setting up listeners for dockerflow", cause=e)
Пример #32
0
        def make_database(num, please_stop):
            db = sqlite3.connect(
                database=":memory:" if num == 1 else "this is a test file" +
                text(num) + ".sqlite",
                check_same_thread=False,
                isolation_level=None,
            )
            db.execute("DROP TABLE IF EXISTS digits" + text(num))
            db.execute("DROP TABLE IF EXISTS thousand" + text(num))
            db.execute("DROP TABLE IF EXISTS million" + text(num))
            db.execute("DROP TABLE IF EXISTS billion" + text(num))
            db.execute("CREATE TABLE digits" + text(num) + " (value TEXT)")

            Log.note("ten")
            db.execute(
                "INSERT INTO digits" + text(num) +
                " VALUES ('1'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8'), ('9')"
            )
            db.commit()
            Log.note("thousand")
            db.execute(
                "CREATE TABLE thousand" + text(num) +
                " AS SELECT d1.value || d2.value || d3.value AS value FROM digits"
                + text(num) + " d1, digits" + text(num) + " d2, digits" +
                text(num) + " d3")
            Log.note("million")
            db.execute(
                "CREATE TABLE million" + text(num) +
                " AS SELECT d1.value || d2.value AS value FROM thousand" +
                text(num) + " d1, thousand" + text(num) + " d2")
            Log.note("billion")
            db.execute(
                "CREATE TABLE billion" + text(num) +
                " AS SELECT d1.value || d2.value || d3.value AS value FROM thousand"
                + text(num) + " d1, thousand" + text(num) + " d2, thousand" +
                text(num) + " d3")
            Log.note("done")
Пример #33
0
def safe_size(source):
    """
    READ THE source UP TO SOME LIMIT, THEN COPY TO A FILE IF TOO BIG
    RETURN A str() OR A FileString()
    """

    if source is None:
        return None

    total_bytes = 0
    bytes = []
    b = source.read(MIN_READ_SIZE)
    while b:
        total_bytes += len(b)
        bytes.append(b)
        if total_bytes > MAX_STRING_SIZE:
            try:
                data = FileString(TemporaryFile())
                for bb in bytes:
                    data.write(bb)
                del bytes
                del bb
                b = source.read(MIN_READ_SIZE)
                while b:
                    total_bytes += len(b)
                    data.write(b)
                    b = source.read(MIN_READ_SIZE)
                data.seek(0)
                Log.note("Using file of size {{length}} instead of str()",
                         length=total_bytes)

                return data
            except Exception, e:
                Log.error("Could not write file > {{num}} bytes",
                          num=total_bytes,
                          cause=e)
        b = source.read(MIN_READ_SIZE)
Пример #34
0
def open_test_instance(name, filename=None, es=None, kwargs=None):
    if filename != None:
        Log.note("Using {{filename}} as {{type}}",
                 filename=filename,
                 type=name)
        return FakeES(filename=filename)
    else:
        Log.note("Using ES cluster at {{host}} as {{type}}",
                 host=es.host,
                 type=name)
        cluster = Cluster(es)
        try:
            old_index = cluster.get_index(es)
            cluster.delete_index(old_index.settings.index)
        except Exception as e:
            if "Can not find index" not in e:
                Log.error("unexpected", cause=e)

        output = cluster.create_index(limit_replicas=True,
                                      limit_replicas_warning=False,
                                      kwargs=es)
        output.delete_all_but_self()
        output.add_alias(es.index)
        return output
Пример #35
0
    def commit(self):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed, commit not allowed")

            try:
                self._add_pending({"add": {"status.start": self.start}})
                for i in range(self.db.status.start, self.start):
                    self._add_pending({"remove": str(i)})

                if self.db.status.end - self.start < 10 or Random.range(0, 1000) == 0:  # FORCE RE-WRITE TO LIMIT FILE SIZE
                    # SIMPLY RE-WRITE FILE
                    if DEBUG:
                        Log.note("Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start)
                        for k in self.db.keys():
                            if k == "status" or int(k) >= self.db.status.start:
                                continue
                            Log.error("Not expecting {{key}}", key=k)
                    self._commit()
                    self.file.write(mo_json.value2json({"add": self.db}) + "\n")
                else:
                    self._commit()
            except Exception as e:
                raise e
Пример #36
0
    def test_multiple_agg_on_same_field(self):
        if self.not_real_service():
            return

        test = wrap({"query": {
            "from": {
                "type": "elasticsearch",
                "settings": {
                    "host": ES_CLUSTER_LOCATION,
                    "index": "unittest",
                    "type": "test_result"
                }
            },
            "select": [
                {
                    "name": "max_bytes",
                    "value": "run.stats.bytes",
                    "aggregate": "max"
                },
                {
                    "name": "count",
                    "value": "run.stats.bytes",
                    "aggregate": "count"
                }
            ]
        }})

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Пример #37
0
def open_test_instance(name, settings):
    if settings.filename:
        Log.note("Using {{filename}} as {{type}}",
                 filename=settings.filename,
                 type=name)
        return FakeES(settings)
    else:
        Log.note("Using ES cluster at {{host}} as {{type}}",
                 host=settings.host,
                 type=name)
        cluster = Cluster(settings)
        try:
            old_index = cluster.get_index(kwargs=settings)
            old_index.delete()
        except Exception as e:
            if "Can not find index" not in e:
                Log.error("unexpected", cause=e)

        es = cluster.create_index(limit_replicas=True,
                                  limit_replicas_warning=False,
                                  kwargs=settings)
        es.delete_all_but_self()
        es.add_alias(settings.index)
        return es
    def add(self, value):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed")

            if value is THREAD_STOP:
                DEBUG and Log.note("Stop is seen in persistent queue")
                self.please_stop.go()
                return

            self._add_pending({"add": {str(self.db.status.end): value}})
            self.db.status.end += 1
            self._add_pending({"add": {"status.end": self.db.status.end}})
            self._commit()
        return self
Пример #39
0
def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True):
    """
    :param obj:  THE VALUE TO TURN INTO JSON
    :param pretty: True TO MAKE A MULTI-LINE PRETTY VERSION
    :param sort_keys: True TO SORT KEYS
    :param keep_whitespace: False TO strip() THE WHITESPACE IN THE VALUES
    :return:
    """
    if FIND_LOOPS:
        obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else trim_whitespace())
    try:
        json = json_encoder(obj, pretty=pretty)
        if json == None:
            Log.note(str(type(obj)) + " is not valid{{type}}JSON", type=" (pretty) " if pretty else " ")
            Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj)))
        return json
    except Exception as e:
        e = Except.wrap(e)
        try:
            json = pypy_json_encode(obj)
            return json
        except Exception:
            pass
        Log.error("Can not encode into JSON: {{value}}", value=text(repr(obj)), cause=e)
def download_perfherder(desc, repo, id, dummy, framework):
    sig_result = http.get_json(
        "https://treeherder.mozilla.org/api/project/" + repo +
        "/performance/signatures/?format=json&framework=" + str(framework) +
        "&id=" + str(id))

    signature = first(sig_result.keys())
    data_result = http.get_json("https://treeherder.mozilla.org/api/project/" +
                                repo + "/performance/data/?signatures=" +
                                signature)

    Log.note(
        "{{result|json}}",
        result={
            "name":
            desc,
            "data":
            jx.run({
                "from": ListContainer("data", data_result[signature]),
                "sort": "push_timestamp",
                "select": "value"
            }).data
        },
    )
Пример #41
0
def test_wrap_1():
    switch = [
        lambda: Data(i=Random.int(2000)), lambda: {
            "i": Random.int(2000)
        }, lambda: FlatList([{
            "i": Random.int(2000)
        }]), lambda: [{
            "i": Random.int(2000)
        }]
    ]

    inputs = [
        switch[min(len(switch) - 1, int(floor(-log(Random.float(), 2))))]()
        for i in range(NUM_INPUT)
    ]

    for i in range(NUM_REPEAT):
        results = []
        gc.collect()
        with Profiler("more struct: slow_wrap"):
            for v in inputs:
                results.append(slow_wrap(v))

        results = []
        gc.collect()
        with Profiler("more struct: wrap"):
            for v in inputs:
                results.append(wrap(v))

        results = []
        gc.collect()
        with Profiler("more struct: baseline"):
            for v in inputs:
                results.append(baseline(v))

        Log.note("Done {{i}} of {{num}}", {"i": i, "num": NUM_REPEAT})
Пример #42
0
    def send_queries(self, subtest, places=6):
        subtest = wrap(subtest)

        try:
            # EXECUTE QUERY
            num_expectations = 0
            for k, v in subtest.items():
                if k.startswith("expecting_"):  # WHAT FORMAT ARE WE REQUESTING
                    format = k[len("expecting_"):]
                elif k == "expecting":  # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS)
                    format = None
                else:
                    continue

                num_expectations += 1
                expected = v

                subtest.query.format = format
                subtest.query.meta.testing = True  # MARK ALL QUERIES FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION
                query = convert.unicode2utf8(convert.value2json(subtest.query))
                # EXECUTE QUERY
                response = self.try_till_response(self.service_url, data=query)

                if response.status_code != 200:
                    error(response)
                result = convert.json2value(convert.utf82unicode(response.all_content))

                # HOW TO COMPARE THE OUT-OF-ORDER DATA?
                compare_to_expected(subtest.query, result, expected, places)
                Log.note("Test result compares well")
            if num_expectations == 0:
                Log.error("Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses", {
                    "name": subtest.name
                })
        except Exception, e:
            Log.error("Failed test {{name|quote}}", {"name": subtest.name}, e)
Пример #43
0
    def test_time_subtraction(self):
        """
        IF THIS FAILS, MAYBE THE JSON ROUNDING ENGINE IS ENABLED
        """
        today = Date("2018-10-20")
        Log.note("Notice {{date}} is {{unix}} and rounding will affect that number", date=today, unix=today.unix)
        data = [
            {"a": today, "t": Date("2018-10-20").unix, "v": 2},
            {"a": today, "t": Date("2018-10-19").unix, "v": 2},
            {"a": today, "t": Date("2018-10-18").unix, "v": 3},
            {"a": today, "t": Date("2018-10-17").unix, "v": 5},
            {"a": today, "t": Date("2018-10-16").unix, "v": 7},
            {"a": today, "t": Date("2018-10-15").unix, "v": 11},
            {"a": today, "t": None, "v": 27},
            {"a": today, "t": Date("2018-10-19").unix, "v": 13},
            {"a": today, "t": Date("2018-10-18").unix, "v": 17},
            {"a": today, "t": Date("2018-10-16").unix, "v": 19},
            {"a": today, "t": Date("2018-10-15").unix, "v": 23}
        ]

        test = {
            "data": data,
            "query": {
                "from": TEST_TABLE,
                "select": ["a", "t", "v", {"name": "diff", "value": {"sub": ["t", "a"]}}],
                "limit": 100
            },
            "expecting_list": {
                "meta": {"format": "list"},
                "data": [
                    {"a": Date(r.a).unix, "t": Date(r.t).unix, "v":r.v, "diff": (Date(r.t) - Date(r.a)).seconds}
                    for r in to_data(data)
                ]
            }
        }
        self.utils.execute_tests(test)
Пример #44
0
    def test_failures_by_directory(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "aggregate": "count"
                }],
                "edges": ["result.test", "result.ok"],
                "where": {
                    "prefix": {
                        "result.test": "/"
                    }
                },
                "format": "table"
            }
        })

        query = unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.testing.query, data=query)
            if response.status_code != 200:
                error(response)
        result = json2value(utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Пример #45
0
    def test_chunk_timing(self):
        if self.not_real_service():
            return

        test = wrap({"query": {
            "from": {
                "type": "elasticsearch",
                "settings": {
                    "host": ES_CLUSTER_LOCATION,
                    "index": "unittest",
                    "type": "test_result"
                }
            },
            "select": {"value": "run.stats.duration", "aggregate": "average"},
            "edges": [
                {"name": "chunk", "value": ["run.suite", "run.chunk"]}
            ],
            "where": {"and": [
                {"term": {"etl.id": 0}},
                {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}}
            ]},
            "format": "cube",
            "samples": {
                "limit": 30
            }
        }})

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = self.utils.try_till_response(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Пример #46
0
 def loop(is_ready, please_stop):
     with locker:
         while not got_signal:
             locker.wait(till=Till(seconds=0.01))
             is_ready.go()
             Log.note("{{thread}} is ready",
                      thread=Thread.current().name)
         Log.note("outside loop")
         locker.wait()
         Log.note("thread is expected to get here")
Пример #47
0
 def _worker(self, please_stop):
     try:
         # MAIN EXECUTION LOOP
         while not please_stop:
             command_item = self.queue.pop(till=please_stop)
             if command_item is None:
                 break
             self._process_command_item(command_item)
     except Exception as e:
         e = Except.wrap(e)
         if not please_stop:
             Log.warning("Problem with sql", cause=e)
     finally:
         self.closed = True
         DEBUG and Log.note("Database is closed")
         self.db.close()
Пример #48
0
    def query(self, sql, param=None, stream=False, row_tuples=False):
        """
        RETURN A LIST OF dicts

        :param sql:  SQL TEMPLATE TO SEND
        :param param: PARAMETERS TO INJECT INTO SQL TEMPLATE
        :param stream: STREAM OUTPUT
        :param row_tuples: DO NOT RETURN dicts
        """
        if not self.cursor:  # ALLOW NON-TRANSACTIONAL READS
            Log.error("must perform all queries inside a transaction")
        self._execute_backlog()

        try:
            if isinstance(sql, SQL):
                sql = text(sql)
            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            if row_tuples:
                if stream:
                    result = self.cursor
                else:
                    result = wrap(list(self.cursor))
            else:
                columns = tuple(utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, []))
                def streamer():
                    for row in self.cursor:
                        output = Data()
                        for c, v in zip(columns, row):
                            output[c] = v
                        yield output

                if stream:
                    result = streamer()
                else:
                    result = wrap(streamer())

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
Пример #49
0
def _test_mode_wait(query):
    """
    WAIT FOR METADATA TO ARRIVE ON INDEX
    :param query: dict() OF REQUEST BODY
    :return: nothing
    """
    try:
        m = meta.singlton
        now = Date.now()
        end_time = now + MINUTE

        # MARK COLUMNS DIRTY
        m.meta.columns.update({
            "clear": ["partitions", "count", "cardinality", "last_updated"],
            "where": {
                "eq": {
                    "es_index": join_field(split_field(query["from"])[0:1])
                }
            }
        })

        # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION
        cols = [
            c for c in m.get_columns(table_name=query["from"], force=True)
            if c.type not in STRUCT
        ]
        for c in cols:
            Log.note("Mark {{column}} dirty at {{time}}",
                     column=c.names["."],
                     time=now)
            c.last_updated = now - TOO_OLD
            m.todo.push(c)

        while end_time > now:
            # GET FRESH VERSIONS
            cols = [
                c for c in m.get_columns(table_name=query["from"])
                if c.type not in STRUCT
            ]
            for c in cols:
                if not c.last_updated or c.cardinality == None:
                    Log.note(
                        "wait for column (table={{col.es_index}}, name={{col.es_column}}) metadata to arrive",
                        col=c)
                    break
            else:
                break
            Till(seconds=1).wait()
        for c in cols:
            Log.note(
                "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}",
                column=c)
    except Exception, e:
        Log.warning("could not pickup columns", cause=e)
Пример #50
0
    def _execute_backlog(self):
        if not self.backlog: return

        backlog, self.backlog = self.backlog, []
        for i, g in jx.chunk(backlog, size=MAX_BATCH_SIZE):
            sql = self.preamble + ";\n".join(g)
            try:
                self.debug and Log.note(
                    "Execute block of SQL:\n{{sql|indent}}", sql=sql)
                self.cursor.execute(sql)
                self.cursor.close()
                self.cursor = self.db.cursor()
            except Exception as e:
                Log.error("Problem executing SQL:\n{{sql|indent}}",
                          sql=sql,
                          cause=e,
                          stack_depth=1)
Пример #51
0
        def inner(changeset_id):
            # ALWAYS TRY ES FIRST
            moves = _get_changeset_from_es(self.moves,
                                           changeset_id).changeset.moves
            if moves:
                return moves

            url = URL(revision.branch.url) / "raw-rev" / changeset_id
            DEBUG and Log.note("get unified diff from {{url}}", url=url)
            try:
                # THE ENCODING DOES NOT MATTER BECAUSE WE ONLY USE THE '+', '-' PREFIXES IN THE DIFF
                moves = http.get(url).content.decode("latin1")
                return diff_to_moves(text(moves))
            except Exception as e:
                Log.warning("could not get unified diff from {{url}}",
                            url=url,
                            cause=e)
Пример #52
0
def test_backfilling_by_count(clogger):
    clogger.disable_backfilling = False
    clogger.disable_tipfilling = True
    clogger.disable_deletion = True
    clogger.disable_maintenance = True

    num_trys = 50
    wait_time = 2
    num_to_go_back = 10

    oldest_revnum, oldest_rev = clogger.conn.get_one(
        "SELECT min(revnum) AS revnum, revision FROM csetLog")

    new_old_rev = None
    clog_url = HG_URL + clogger.config.hg.branch + '/' + 'json-log/' + oldest_rev
    clog_obj_list = list(clogger._get_clog(clog_url)['changesets'])
    for count, clog_obj in enumerate(clog_obj_list[1:]):
        if count >= num_to_go_back - 1:
            new_old_rev = clog_obj['node'][:12]
            break

    clogger.csets_todo_backwards.add((num_to_go_back, True))

    new_ending = None
    new_revnum = None
    while num_trys > 0:
        with clogger.conn.transaction() as t:
            new_ending = t.get_one(
                "SELECT min(revnum) AS revnum, revision FROM csetLog")[1]
            DEBUG and Log.note("{{data}}",
                               data=(oldest_rev, new_old_rev, new_ending))
            if new_ending == new_old_rev:
                new_revnum = t.get_one(
                    "SELECT revnum FROM csetLog WHERE revision=?",
                    (oldest_rev, ))[0]
                break
        if new_ending != new_old_rev:
            Till(seconds=wait_time).wait()
            num_trys -= 1

    assert num_trys > 0
    assert new_old_rev == new_ending

    # Check that revnum's were properly handled
    expected_revnum = oldest_revnum + num_to_go_back
    assert expected_revnum == new_revnum
Пример #53
0
    def query(self, sql, param=None, stream=False, row_tuples=False):
        """
        RETURN LIST OF dicts
        """
        if not self.cursor:  # ALLOW NON-TRANSACTIONAL READS
            Log.error("must perform all queries inside a transaction")
        self._execute_backlog()

        try:
            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            if row_tuples:
                if stream:
                    result = self.cursor
                else:
                    result = wrap(list(self.cursor))
            else:
                columns = [
                    utf8_to_unicode(d[0])
                    for d in coalesce(self.cursor.description, [])
                ]
                if stream:
                    result = (wrap(
                        {c: utf8_to_unicode(v)
                         for c, v in zip(columns, row)})
                              for row in self.cursor)
                else:
                    result = wrap(
                        [{c: utf8_to_unicode(v)
                          for c, v in zip(columns, row)}
                         for row in self.cursor])

            return result
        except Exception as e:
            e = Except.wrap(e)
            if "InterfaceError" in e:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Пример #54
0
def _get_file(ref, url):

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        DEBUG and Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception as e:
        content = None
        Log.error("Could not read file {{filename}}", filename=path, cause=e)

    try:
        new_value = json2value(content,
                               params=ref.query,
                               flexible=True,
                               leaves=True)
    except Exception as e:
        e = Except.wrap(e)
        try:
            new_value = ini2value(content)
        except Exception:
            raise Log.error("Can not read {{file}}", file=path, cause=e)
    new_value = _replace_ref(new_value, ref)
    return new_value
Пример #55
0
    def get_session(self, session_id):
        now = Date.now().unix
        result = self.db.query(
            sql_query({
                "from": self.table,
                "where": {
                    "eq": {
                        "session_id": session_id
                    }
                }
            }))
        saved_record = first(Data(zip(result.header, r)) for r in result.data)
        if not saved_record or saved_record.expires <= now:
            return Data()
        session = json2value(saved_record.data)

        DEBUG and Log.note("record from db {{session}}", session=saved_record)
        return session
def icompressed2ibytes(source):
    """
    :param source: GENERATOR OF COMPRESSED BYTES
    :return: GENERATOR OF BYTES
    """
    decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
    last_bytes_count = 0  # Track the last byte count, so we do not show too many debug lines
    bytes_count = 0
    for bytes_ in source:

        data = decompressor.decompress(bytes_)

        bytes_count += len(data)
        if mo_math.floor(last_bytes_count, 1000000) != mo_math.floor(
                bytes_count, 1000000):
            last_bytes_count = bytes_count
            DEBUG and Log.note("bytes={{bytes}}", bytes=bytes_count)
        yield data
Пример #57
0
    def _setup_etl_code(self):
        sudo("apt-get install -y python2.7")

        Log.note("5")
        if not fabric_files.exists("/usr/local/bin/pip"):
            run("mkdir -p /home/ubuntu/temp")

            with cd("/home/ubuntu/temp"):
                # INSTALL FROM CLEAN DIRECTORY
                run("wget https://bootstrap.pypa.io/get-pip.py")
                sudo("rm -fr ~/.cache/pip"
                     )  # JUST IN CASE THE DIRECTORY WAS MADE
                sudo("python2.7 get-pip.py")

        Log.note("6")
        if not fabric_files.exists("/home/ubuntu/ActiveData-ETL/README.md"):
            with cd("/home/ubuntu"):
                sudo("apt-get -yf install git-core")
                run('rm -fr /home/ubuntu/ActiveData-ETL')
                run("git clone https://github.com/klahnakoski/ActiveData-ETL.git"
                    )
                run("mkdir -p /home/ubuntu/ActiveData-ETL/results/logs")

        Log.note("7")
        with cd("/home/ubuntu/ActiveData-ETL"):
            run("git checkout etl")
            # pip install -r requirements.txt HAS TROUBLE IMPORTING SOME LIBS
            sudo("rm -fr ~/.cache/pip")  # JUST IN CASE THE DIRECTORY WAS MADE
            sudo("pip install future")
            sudo("pip install BeautifulSoup")
            sudo("pip install MozillaPulse")
            sudo("pip install boto")
            sudo("pip install requests")
            sudo("pip install taskcluster")
            sudo("apt-get install -y python-dev")  # REQUIRED FOR psutil
            sudo("apt-get install -y build-essential")  # REQUIRED FOR psutil
            sudo("pip install psutil")
            sudo("pip install pympler")
            sudo("pip install -r requirements.txt")

            Log.note("8")
            sudo("apt-get -y install python-psycopg2")
Пример #58
0
    def monitor(self, please_stop):
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            try:
                if not self.todo:
                    with self.meta.columns.locker:
                        old_columns = [
                            c
                            for c in self.meta.columns
                            if (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in STRUCT
                        ]
                        if old_columns:
                            if DEBUG:
                                Log.note("Old columns wth dates {{dates|json}}", dates=wrap(old_columns).last_updated)
                            self.todo.extend(old_columns)
                            # TEST CONSISTENCY
                            for c, d in product(list(self.todo.queue), list(self.todo.queue)):
                                if c.es_column == d.es_column and c.es_index == d.es_index and c != d:
                                    Log.error("")
                        else:
                            if DEBUG:
                                Log.note("no more metatdata to update")

                column = self.todo.pop(Till(seconds=(10*MINUTE).seconds))
                if column:
                    if DEBUG:
                        Log.note("update {{table}}.{{column}}", table=column.es_index, column=column.es_column)
                    if column.type in STRUCT:
                        with self.meta.columns.locker:
                            column.last_updated = Date.now()
                        continue
                    elif column.last_updated >= Date.now()-TOO_OLD:
                        continue
                    try:
                        self._update_cardinality(column)
                        if DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX):
                            Log.note("updated {{column.name}}", column=column)
                    except Exception as e:
                        Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
            except Exception as e:
                Log.warning("problem in cardinality monitor", cause=e)
Пример #59
0
    def do_all(self):
        # ENSURE PARENT TRANSACTION IS UP TO DATE
        if self.parent:
            self.parent.do_all()

        try:
            # GET THE REMAINING COMMANDS
            with self.locker:
                todo = self.todo[self.complete:]
                self.complete = len(self.todo)

            # RUN THEM
            for c in todo:
                DEBUG and Log.note(FORMAT_COMMAND, command=c.command)
                self.db.db.execute(c.command)
                if c.command in [COMMIT, ROLLBACK]:
                    Log.error("logic error")
        except Exception as e:
            Log.error("problem running commands", current=c, cause=e)
Пример #60
0
    def do_all(self):
        # ENSURE PARENT TRANSACTION IS UP TO DATE
        c = None
        try:
            if self.parent == self:
                Log.warning("Transactions parent is equal to itself.")
            if self.parent:
                self.parent.do_all()
            # GET THE REMAINING COMMANDS
            with self.locker:
                todo = self.todo[self.complete:]
                self.complete = len(self.todo)

            # RUN THEM
            for c in todo:
                DEBUG and Log.note(FORMAT_COMMAND, command=c.command)
                self.db.db.execute(c.command)
        except Exception as e:
            Log.error("problem running commands", current=c, cause=e)