Пример #1
0
def setup_ssl():
    config.flask.ssl_context = None

    if not config.flask.ssl_context:
        return

    ssl_flask = config.flask.copy()
    ssl_flask.debug = False
    ssl_flask.port = 443

    if isinstance(config.flask.ssl_context, Mapping):
        # EXPECTED PEM ENCODED FILE NAMES
        # `load_cert_chain` REQUIRES CONCATENATED LIST OF CERTS
        tempfile = NamedTemporaryFile(delete=False, suffix=".pem")
        try:
            tempfile.write(
                File(ssl_flask.ssl_context.certificate_file).read_bytes())
            if ssl_flask.ssl_context.certificate_chain_file:
                tempfile.write(
                    File(ssl_flask.ssl_context.certificate_chain_file).
                    read_bytes())
            tempfile.flush()
            tempfile.close()

            context = SSLContext(PROTOCOL_SSLv23)
            context.load_cert_chain(
                tempfile.name,
                keyfile=File(ssl_flask.ssl_context.privatekey_file).abspath)

            ssl_flask.ssl_context = context
        except Exception, e:
            Log.error("Could not handle ssl context construction", cause=e)
        finally:
Пример #2
0
def _run_remote(command, name):
    File("./results/temp/" + name +
         ".sh").write("nohup " + command +
                      " >& /dev/null < /dev/null &\nsleep 20")
    put("./results/temp/" + name + ".sh", "" + name + ".sh")
    run("chmod u+x " + name + ".sh")
    run("./" + name + ".sh")
Пример #3
0
def path2fullpath(path):
    fullpath = "file:///" + File(path).abspath.replace("\\", "/")
    if fullpath.find("#") >= 0:
        fullpath = fullpath.replace("#", "#log=" + LOG_DIV + "&")
    else:
        fullpath = fullpath + "#log=" + LOG_DIV
    return fullpath
Пример #4
0
def read_settings(filename=None, defs=None):
    # READ SETTINGS
    if filename:
        settings_file = File(filename)
        if not settings_file.exists:
            Log.error("Can not file settings file {{filename}}", {
                "filename": settings_file.abspath
            })
        settings = ref.get("file:///" + settings_file.abspath)
        if defs:
            settings.args = argparse(defs)
        return settings
    else:
        defs = listwrap(defs)
        defs.append({
            "name": ["--settings", "--settings-file", "--settings_file"],
            "help": "path to JSON file with settings",
            "type": str,
            "dest": "filename",
            "default": "./settings.json",
            "required": False
        })
        args = argparse(defs)
        settings = ref.get("file://" + args.filename.replace(os.sep, "/"))
        settings.args = args
        return settings
Пример #5
0
 def __init__(self, settings):
     self.settings = wrap({"host": "fake", "index": "fake"})
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except IOError:
         self.data = Dict()
Пример #6
0
 def __init__(self, filename, host="fake", index="fake", settings=None):
     self.settings = settings
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except Exception:
         self.data = Dict()
Пример #7
0
def get_file(ref, url):
    from pyLibrary.env.files import File

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            _Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception, e:
        content = None
        _Log.error("Could not read file {{filename}}", filename=path, cause=e)
Пример #8
0
def _start_es():
    File("./results/temp/start_es.sh").write(
        "nohup ./bin/elasticsearch >& /dev/null < /dev/null &\nsleep 20")
    with cd("/home/ec2-user/"):
        put("./results/temp/start_es.sh", "start_es.sh")
        run("chmod u+x start_es.sh")

    with cd("/usr/local/elasticsearch/"):
        sudo("/home/ec2-user/start_es.sh")
Пример #9
0
    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")
Пример #10
0
    def extend(self, records):
        """
        JUST SO WE MODEL A Queue
        """
        records = {v["id"]: v["value"] for v in records}

        unwrap(self.data).update(records)

        data_as_json = convert.value2json(self.data, pretty=True)

        File(self.filename).write(data_as_json)
        Log.note("{{num}} documents added", num=len(records))
Пример #11
0
    def test_51586(self):
        debug_settings = {
            "trace": True,
            "cprofile": {
                "enabled": True,
                "filename": "tests/results/test_51586_profile.tab"
            }
        }
        Log.start(debug_settings)

        source_key = "51586_5124145.52"
        content = File("tests/resources/51586_5124145.52.json.gz").read_bytes()
        source = Dict(read_lines=lambda: GzipLines(content))
        with Accumulator(
                File("tests/results/51586_5124145.52.json")) as destination:
            with Timer("ETL file"):
                process_unittest_in_s3(source_key,
                                       source,
                                       destination,
                                       please_stop=None)
        Log.stop()
Пример #12
0
def _upgrade():
    global _upgraded
    _upgraded = True
    try:
        import sys

        sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
        python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
        if python_dll.read_bytes() != sqlite_dll.read_bytes():
            backup = sqlite_dll.backup()
            File.copy(python_dll, sqlite_dll)
    except Exception, e:
        Log.warning("could not upgrade python's sqlite", cause=e)
Пример #13
0
def write(profile_settings):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    profs = list(profiles.values())
    for p in profs:
        p.stats = p.stats.end()

    stats = [{
        "description": p.description,
        "num_calls": p.stats.count,
        "total_time": p.stats.count * p.stats.mean,
        "total_time_per_call": p.stats.mean
    } for p in profs if p.stats.count > 0]
    stats_file = File(profile_settings.filename,
                      suffix=convert.datetime2string(datetime.now(),
                                                     "_%Y%m%d_%H%M%S"))
    if stats:
        stats_file.write(convert.list2tab(stats))
    else:
        stats_file.write("<no profiles>")

    stats_file2 = File(profile_settings.filename,
                       suffix=convert.datetime2string(datetime.now(),
                                                      "_series_%Y%m%d_%H%M%S"))
    if not profs:
        return

    max_samples = MAX([len(p.samples) for p in profs if p.samples])
    if not max_samples:
        return

    r = range(max_samples)
    profs.insert(0, Dict(description="index", samples=r))
    stats = [{p.description: wrap(p.samples)[i]
              for p in profs if p.samples} for i in r]
    if stats:
        stats_file2.write(convert.list2tab(stats))
Пример #14
0
 def _worker(self, please_stop):
     if Sqlite.canonical:
         self.db = Sqlite.canonical
     else:
         self.db = sqlite3.connect(':memory:')
         try:
             full_path = File(
                 "pyLibrary/vendor/sqlite/libsqlitefunctions.so").abspath
             # self.db.execute("SELECT sqlite3_enable_load_extension(1)")
             self.db.enable_load_extension(True)
             self.db.execute("SELECT load_extension('" + full_path + "')")
         except Exception, e:
             Log.warning(
                 "loading sqlite extension functions failed, doing without. (no SQRT for you!)",
                 cause=e)
Пример #15
0
 def execute_file(filename,
                  host,
                  username,
                  password,
                  schema=None,
                  param=None,
                  ignore_errors=False,
                  settings=None):
     # MySQLdb provides no way to execute an entire SQL file in bulk, so we
     # have to shell out to the commandline client.
     sql = File(filename).read()
     if ignore_errors:
         try:
             MySQL.execute_sql(sql=sql, param=param, settings=settings)
         except Exception, e:
             pass
Пример #16
0
def _get_attr(obj, path):
    if not path:
        return obj

    attr_name = path[0]

    if isinstance(obj, ModuleType):
        if attr_name in obj.__dict__:
            return _get_attr(obj.__dict__[attr_name], path[1:])
        elif attr_name in dir(obj):
            return _get_attr(obj[attr_name], path[1:])

        # TRY FILESYSTEM
        from pyLibrary.env.files import File
        possible_error = None
        if File.new_instance(File(obj.__file__).parent,
                             attr_name).set_extension("py").exists:
            try:
                # THIS CASE IS WHEN THE __init__.py DOES NOT IMPORT THE SUBDIR FILE
                # WE CAN STILL PUT THE PATH TO THE FILE IN THE from CLAUSE
                if len(path) == 1:
                    # GET MODULE OBJECT
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[0]], 0)
                    return output
                else:
                    # GET VARIABLE IN MODULE
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[1]], 0)
                    return _get_attr(output, path[1:])
            except Exception, e:
                from pyLibrary.debugs.exceptions import Except
                possible_error = Except.wrap(e)

        # TRY A CASE-INSENSITIVE MATCH
        attr_name = lower_match(attr_name, dir(obj))
        if not attr_name:
            from pyLibrary.debugs.logs import Log
            Log.warning(PATH_NOT_FOUND + ". Returning None.",
                        cause=possible_error)
        elif len(attr_name) > 1:
            from pyLibrary.debugs.logs import Log
            Log.error(AMBIGUOUS_PATH_FOUND + " {{paths}}", paths=attr_name)
        else:
            return _get_attr(obj[attr_name[0]], path[1:])
Пример #17
0
def main():
    try:
        config = startup.read_settings(defs=[{
            "name": ["--file"],
            "help": "file to save backup",
            "type": str,
            "dest": "file",
            "required": True
        }])
        constants.set(config.constants)
        Log.start(config.debug)

        sq = elasticsearch.Index(settings=config.saved_queries)
        result = sq.search({"query": {"match_all": {}}, "size": 200000})

        File(config.args.file).write("".join(
            map(convert.json2value, result.hits.hits)))

    except Exception, e:
        Log.error("Problem with etl", e)
Пример #18
0
def write_profile(profile_settings, stats):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    acc = stats[0]
    for s in stats[1:]:
        acc.add(s)

    stats = [{
        "num_calls": d[1],
        "self_time": d[2],
        "total_time": d[3],
        "self_time_per_call": d[2] / d[1],
        "total_time_per_call": d[3] / d[1],
        "file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
        "line": f[1],
        "method": f[2].lstrip("<").rstrip(">")
    } for f, d, in acc.stats.iteritems()]
    stats_file = File(profile_settings.filename,
                      suffix=convert.datetime2string(datetime.now(),
                                                     "_%Y%m%d_%H%M%S"))
    stats_file.write(convert.list2tab(stats))
Пример #19
0
def main():
    """
    CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE
    """
    try:
        settings = startup.read_settings(defs=[{
            "name": ["--bucket"],
            "help": "bucket to reprocess",
            "type": str,
            "dest": "bucket",
            "required": True
        }, {
            "name": ["--begin", "--start"],
            "help": "lowest key (or prefix) to reprocess",
            "type": str,
            "dest": "start",
            "default": "1",
            "required": False
        }, {
            "name": ["--end", "--stop"],
            "help": "highest key (or prefix) to reprocess",
            "type": str,
            "dest": "end",
            "default": None,
            "required": False
        }, {
            "name": ["--file"],
            "help": "path to file with CR-delimited prefix list",
            "type": str,
            "dest": "file",
            "default": None,
            "required": False
        }])
        Log.start(settings.debug)

        with aws.Queue(settings.work_queue) as work_queue:
            source = Connection(settings.aws).get_bucket(settings.args.bucket)

            if settings.args.file:
                now = Date.now()
                for prefix in File(settings.args.file):
                    all_keys = source.keys(prefix=key_prefix(prefix))
                    for k in all_keys:
                        Log.note("Adding {{key}}", key=k)
                        work_queue.add({
                            "bucket": settings.args.bucket,
                            "key": k,
                            "timestamp": now.unix,
                            "date/time": now.format()
                        })
                return

            if settings.args.end and settings.args.start:
                up_to = str(int(settings.args.end) - 1)
                prefix = strings.common_prefix(settings.args.start, up_to)
            else:
                prefix = None
            start = Version(settings.args.start)
            end = Version(settings.args.end)

            all_keys = source.keys(prefix=prefix)
            with Timer("filtering {{num}} keys", {"num": len(all_keys)}):
                all_keys = [(k, Version(k)) for k in all_keys
                            if k.find("None") == -1]
                all_keys = [(k, p) for k, p in all_keys if start <= p < end]
            with Timer("sorting {{num}} keys", {"num": len(all_keys)}):
                all_keys = qb.sort(all_keys, 1)
            for k, p in all_keys:
                Log.note("Adding {{key}}", key=k)
                now = Date.now()
                work_queue.add({
                    "bucket": settings.args.bucket,
                    "key": k,
                    "timestamp": now.unix,
                    "date/time": now.format()
                })

    except Exception, e:
        Log.error("Problem with etl", e)
Пример #20
0
def make_test_instance(name, settings):
    if settings.filename:
        File(settings.filename).delete()
    return open_test_instance(name, settings)
Пример #21
0
from pyLibrary import convert
from pyLibrary.debugs import startup
from pyLibrary.maths.randoms import Random
from pyLibrary.sql.mysql import MySQL
from pyLibrary.env.files import File
from pyLibrary.debugs.logs import Log
from pyLibrary.queries import qb
from pyLibrary.strings import between
from pyLibrary.dot import coalesce, wrap
from pyLibrary.thread.multithread import Multithread
from pyLibrary.times.timer import Timer

DEBUG = True

TEMPLATE_FILE = File(
    "C:/Users/klahnakoski/git/datazilla-alerts/tests/resources/hg/changeset_nofiles.template"
)


def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url,
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
Пример #22
0
from active_data import record_request, cors_wrapper
from active_data.actions import save_query
from active_data.actions.json import get_raw_json
from active_data.actions.query import query
from active_data.actions.save_query import SaveQueries, find_query
from active_data.actions.static import download
from pyLibrary import convert
from pyLibrary.debugs import constants, startup
from pyLibrary.debugs.logs import Log
from pyLibrary.env import elasticsearch
from pyLibrary.env.files import File
from pyLibrary.queries import containers
from pyLibrary.queries.meta import FromESMetadata
from pyLibrary.thread.threads import Thread

OVERVIEW = File("active_data/public/index.html").read()

app = Flask(__name__)
config = None


@app.route('/', defaults={'path': ''}, methods=['OPTIONS', 'HEAD'])
@app.route('/<path:path>', methods=['OPTIONS', 'HEAD'])
@cors_wrapper
def _head(path):
    return Response(b'', status=200)


app.add_url_rule('/tools/<path:filename>', None, download)
app.add_url_rule('/find/<path:hash>', None, find_query)
app.add_url_rule('/query',
Пример #23
0
    assert settings["class"]

    # IMPORT MODULE FOR HANDLER
    path = settings["class"].split(".")
    class_name = path[-1]
    path = ".".join(path[:-1])
    constructor = None
    try:
        temp = __import__(path, globals(), locals(), [class_name], -1)
        constructor = object.__getattribute__(temp, class_name)
    except Exception, e:
        if settings.stream and not constructor:
            # PROVIDE A DEFAULT STREAM HANLDER
            constructor = TextLog_usingThreadedStream
        else:
            Log.error("Can not find class {{class}}",  {"class": path}, cause=e)

    # IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
    if settings.filename:
        from pyLibrary.env.files import File

        f = File(settings.filename)
        if not f.parent.exists:
            f.parent.create()

    settings['class'] = None
    params = unwrap(settings)
    log_instance = constructor(**params)
    return log_instance

Пример #24
0
def get_changesets(date_range=None, revision_range=None, repo=None):
    # GET ALL CHANGESET INFO
    args = [
        "hg",
        "log",
        "--cwd",
        File(repo.directory).filename,
        "-v",
        # "-p",   # TO GET PATCH CONTENTS
        "--style",
        TEMPLATE_FILE.filename
    ]

    if date_range is not None:
        if date_range.max == None:
            if date_range.min == None:
                drange = ">0 0"
            else:
                drange = ">" + unicode(convert.datetime2unix(
                    date_range.min)) + " 0"
        else:
            if date_range.min == None:
                drange = "<" + unicode(
                    convert.datetime2unix(date_range.max) - 1) + " 0"
            else:
                drange = unicode(convert.datetime2unix(
                    date_range.min)) + " 0 to " + unicode(
                        convert.datetime2unix(date_range.max) - 1) + " 0"

        args.extend(["--date", drange])

    if revision_range is not None:
        args.extend(
            ["-r",
             str(revision_range.min) + ":" + str(revision_range.max)])

    proc = subprocess.Popen(args,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            bufsize=-1)

    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error(
                                "Unable to pull hg log: return code {{return_code}}",
                                {"return_code": proc.returncode})
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)

                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (date, node, rev, author, branches, files, file_adds,
                 file_dels, p1rev, p1node, parents, children, tags,
                 desc) = (urllib.unquote(c) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repos":
                    repo.name,
                    "date":
                    convert.unix2datetime(
                        convert.value2number(date.split(" ")[0])),
                    "node":
                    node,
                    "revision":
                    rev,
                    "author":
                    author,
                    "branches":
                    set(branches.split("\n")) - {""},
                    "file_changes":
                    files - file_adds - file_dels - {""},
                    "file_adds":
                    file_adds,
                    "file_dels":
                    file_dels,
                    "parents":
                    set(parents.split("\n")) - {""} | {p1rev + ":" + p1node},
                    "children":
                    set(children.split("\n")) - {""},
                    "tags":
                    set(tags.split("\n")) - {""},
                    "description":
                    desc
                }
                doc = elasticsearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(
                    e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})",
                          {"message": line}, e)

            Log.error("Problem iterating through log", e)
Пример #25
0
def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url,
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            try:
                while True:
                    line = proc.stdout.readline()
                    if line.startswith("abort:"):
                        Log.error(
                            "Can not clone {{repos.url}}, because {{problem}}",
                            {
                                "repos": repo,
                                "problem": line
                            })
                    if line == '':
                        break
                    Log.note("Mercurial cloning: {{status}}", {"status": line})
            finally:
                proc.wait()

    else:
        hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc"))
        if not hgrc_file.exists:
            hgrc_file.write("[paths]\ndefault = " + repo.url + "\n")

        # REPO EXISTS, PULL TO UPDATE
        with Timer("Pull hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "pull", "--cwd",
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            (output, _) = proc.communicate()

            if output.find("abort: repository default not found!") >= 0:
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: abandoned transaction found") >= 0:
                Log.error(
                    "Problem pulling repos, try \"hg recover\"\n{{reason|indent}}",
                    {"reason": output})
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: ") >= 0:
                Log.error("Problem with pull {{reason}}",
                          {"reason": between(output, "abort:", "\n")})

            Log.note("Mercurial pull results:\n{{pull_results}}",
                     {"pull_results": output})
Пример #26
0
 def execute_file(self, filename, param=None):
     content = File(filename).read()
     self.execute(content, param)
Пример #27
0
from pyLibrary.debugs.exceptions import Except
from pyLibrary.debugs.logs import Log
from pyLibrary.debugs.profiles import CProfiler
from pyLibrary.dot import coalesce, join_field, split_field, wrap, listwrap
from pyLibrary.env.files import File
from pyLibrary.maths import Math
from pyLibrary.queries import jx, meta, wrap_from
from pyLibrary.queries.containers import Container, STRUCT
from pyLibrary.queries.meta import TOO_OLD
from pyLibrary.strings import expand_template
from pyLibrary.thread.threads import Thread
from pyLibrary.times.dates import Date
from pyLibrary.times.durations import MINUTE
from pyLibrary.times.timer import Timer

BLANK = convert.unicode2utf8(File("active_data/public/error.html").read())
QUERY_SIZE_LIMIT = 10 * 1024 * 1024


@cors_wrapper
def query(path):
    with CProfiler():
        try:
            with Timer("total duration") as query_timer:
                preamble_timer = Timer("preamble")
                with preamble_timer:
                    if flask.request.headers.get("content-length",
                                                 "") in ["", "0"]:
                        # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
                        return Response(BLANK,
                                        status=400,