Python File примеры, pyLibrary.env.files.File Python примеры использования

Пример #1

0

Показать файл

def setup_ssl():
    config.flask.ssl_context = None

    if not config.flask.ssl_context:
        return

    ssl_flask = config.flask.copy()
    ssl_flask.debug = False
    ssl_flask.port = 443

    if isinstance(config.flask.ssl_context, Mapping):
        # EXPECTED PEM ENCODED FILE NAMES
        # `load_cert_chain` REQUIRES CONCATENATED LIST OF CERTS
        tempfile = NamedTemporaryFile(delete=False, suffix=".pem")
        try:
            tempfile.write(
                File(ssl_flask.ssl_context.certificate_file).read_bytes())
            if ssl_flask.ssl_context.certificate_chain_file:
                tempfile.write(
                    File(ssl_flask.ssl_context.certificate_chain_file).
                    read_bytes())
            tempfile.flush()
            tempfile.close()

            context = SSLContext(PROTOCOL_SSLv23)
            context.load_cert_chain(
                tempfile.name,
                keyfile=File(ssl_flask.ssl_context.privatekey_file).abspath)

            ssl_flask.ssl_context = context
        except Exception, e:
            Log.error("Could not handle ssl context construction", cause=e)
        finally:

Пример #2

0

Показать файл

Файл: mercurial.py Проект: klahnakoski/Activedata-ETL

def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name":repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url, File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1
            )
            try:
                while True:
                    line = proc.stdout.readline()
                    if line.startswith("abort:"):
                        Log.error("Can not clone {{repos.url}}, because {{problem}}", {
                            "repos": repo,
                            "problem": line
                        })
                    if line == '':
                        break
                    Log.note("Mercurial cloning: {{status}}", {"status": line})
            finally:
                proc.wait()


    else:
        hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc"))
        if not hgrc_file.exists:
            hgrc_file.write("[paths]\ndefault = " + repo.url + "\n")

        # REPO EXISTS, PULL TO UPDATE
        with Timer("Pull hg log for {{name}}", {"name":repo.name}):
            proc = subprocess.Popen(
                ["hg", "pull", "--cwd", File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1
            )
            (output, _) = proc.communicate()

            if output.find("abort: repository default not found!") >= 0:
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: abandoned transaction found") >= 0:
                Log.error("Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output})
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: ") >= 0:
                Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")})

            Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})

Пример #3

0

Показать файл

Файл: text_logs.py Проект: davehunt/ActiveData

    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")

Пример #4

0

Показать файл

def _upgrade():
    global _upgraded
    _upgraded = True
    try:
        import sys

        sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
        python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
        if python_dll.read_bytes() != sqlite_dll.read_bytes():
            backup = sqlite_dll.backup()
            File.copy(python_dll, sqlite_dll)
    except Exception, e:
        Log.warning("could not upgrade python's sqlite", cause=e)

Пример #5

0

Показать файл

    def test_read_home(self):
        file = "~/___test_file.json"
        source = "tests/resources/json_ref/simple.json"
        File.copy(File(source), File(file))
        content = jsons.ref.get("file://"+file)

        try:
            self.assertEqual(
                content,
                {"test_key": "test_value"}
            )
        finally:
            File(file).delete()

Пример #6

0

Показать файл

def read_settings(filename=None, defs=None):
    # READ SETTINGS
    if filename:
        settings_file = File(filename)
        if not settings_file.exists:
            Log.error("Can not file settings file {{filename}}", {
                "filename": settings_file.abspath
            })
        settings = ref.get("file:///" + settings_file.abspath)
        if defs:
            settings.args = argparse(defs)
        return settings
    else:
        defs = listwrap(defs)
        defs.append({
            "name": ["--settings", "--settings-file", "--settings_file"],
            "help": "path to JSON file with settings",
            "type": str,
            "dest": "filename",
            "default": "./settings.json",
            "required": False
        })
        args = argparse(defs)
        settings = ref.get("file://" + args.filename.replace(os.sep, "/"))
        settings.args = args
        return settings

Пример #7

0

Показать файл

Файл: util.py Проект: klahnakoski/MoDevMetrics

def path2fullpath(path):
    fullpath = "file:///" + File(path).abspath.replace("\\", "/")
    if fullpath.find("#") >= 0:
        fullpath = fullpath.replace("#", "#log=" + LOG_DIV + "&")
    else:
        fullpath = fullpath + "#log=" + LOG_DIV
    return fullpath

Пример #8

0

Показать файл

Файл: elasticsearch.py Проект: mozilla/ActiveData-ETL

 def __init__(self, settings):
     self.settings = wrap({"host": "fake", "index": "fake"})
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except IOError:
         self.data = Dict()

Пример #9

0

Показать файл

Файл: elasticsearch.py Проект: davehunt/ActiveData

 def __init__(self, filename, host="fake", index="fake", settings=None):
     self.settings = settings
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except Exception:
         self.data = Dict()

Пример #10

0

Показать файл

Файл: ref.py Проект: davehunt/ActiveData

def get_file(ref, url):
    from pyLibrary.env.files import File

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            _Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception, e:
        content = None
        _Log.error("Could not read file {{filename}}", filename=path, cause=e)

Пример #11

0

Показать файл

Файл: update_push_to_es.py Проект: mozilla/ActiveData-ETL

def _run_remote(command, name):
    File("./results/temp/" + name +
         ".sh").write("nohup " + command +
                      " >& /dev/null < /dev/null &\nsleep 20")
    put("./results/temp/" + name + ".sh", "" + name + ".sh")
    run("chmod u+x " + name + ".sh")
    run("./" + name + ".sh")

Пример #12

0

Показать файл

def _get_attr(obj, path):
    if not path:
        return obj

    attr_name = path[0]

    if isinstance(obj, ModuleType):
        if attr_name in obj.__dict__:
            return _get_attr(obj.__dict__[attr_name], path[1:])
        elif attr_name in dir(obj):
            return _get_attr(obj[attr_name], path[1:])

        # TRY FILESYSTEM
        from pyLibrary.env.files import File
        possible_error = None
        if File.new_instance(File(obj.__file__).parent,
                             attr_name).set_extension("py").exists:
            try:
                # THIS CASE IS WHEN THE __init__.py DOES NOT IMPORT THE SUBDIR FILE
                # WE CAN STILL PUT THE PATH TO THE FILE IN THE from CLAUSE
                if len(path) == 1:
                    # GET MODULE OBJECT
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[0]], 0)
                    return output
                else:
                    # GET VARIABLE IN MODULE
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[1]], 0)
                    return _get_attr(output, path[1:])
            except Exception, e:
                from pyLibrary.debugs.exceptions import Except
                possible_error = Except.wrap(e)

        # TRY A CASE-INSENSITIVE MATCH
        attr_name = lower_match(attr_name, dir(obj))
        if not attr_name:
            from pyLibrary.debugs.logs import Log
            Log.warning(PATH_NOT_FOUND + ". Returning None.",
                        cause=possible_error)
        elif len(attr_name) > 1:
            from pyLibrary.debugs.logs import Log
            Log.error(AMBIGUOUS_PATH_FOUND + " {{paths}}", paths=attr_name)
        else:
            return _get_attr(obj[attr_name[0]], path[1:])

Пример #13

0

Показать файл

Файл: update_push_to_es.py Проект: mozilla/ActiveData-ETL

def _start_es():
    File("./results/temp/start_es.sh").write(
        "nohup ./bin/elasticsearch >& /dev/null < /dev/null &\nsleep 20")
    with cd("/home/ec2-user/"):
        put("./results/temp/start_es.sh", "start_es.sh")
        run("chmod u+x start_es.sh")

    with cd("/usr/local/elasticsearch/"):
        sudo("/home/ec2-user/start_es.sh")

Пример #14

0

Показать файл

Файл: text_logs.py Проект: klahnakoski/MoDataSubmission

    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")

Пример #15

0

Показать файл

    def run_job(self, job):
        process = Process(name=job.name,
                          params=job.command,
                          cwd=job.directory,
                          env=job.environment)

        # DIRECT OUTPUT TO FILES
        self.add_file(
            process.stdout,
            coalesce(job.stdout,
                     File.newInstance(self.settings.log.directory, job.name)))

Пример #16

0

Показать файл

Файл: test_etl_speed.py Проект: mozilla/ActiveData-ETL

    def test_51586(self):
        debug_settings = {
            "trace": True,
            "cprofile": {
                "enabled": True,
                "filename": "tests/results/test_51586_profile.tab"
            }
        }
        Log.start(debug_settings)

        source_key = "51586_5124145.52"
        content = File("tests/resources/51586_5124145.52.json.gz").read_bytes()
        source = Dict(read_lines=lambda: GzipLines(content))
        with Accumulator(
                File("tests/results/51586_5124145.52.json")) as destination:
            with Timer("ETL file"):
                process_unittest_in_s3(source_key,
                                       source,
                                       destination,
                                       please_stop=None)
        Log.stop()

Пример #17

0

Показать файл

Файл: elasticsearch.py Проект: mozilla/ActiveData-ETL

    def extend(self, records):
        """
        JUST SO WE MODEL A Queue
        """
        records = {v["id"]: v["value"] for v in records}

        unwrap(self.data).update(records)

        data_as_json = convert.value2json(self.data, pretty=True)

        File(self.filename).write(data_as_json)
        Log.note("{{num}} documents added", num=len(records))

Пример #18

0

Показать файл

Файл: replicate_dependencies.py Проект: klahnakoski/MoDevETL

def main(settings):
    current_time = datetime.utcnow()
    time_file = File(settings.param.last_replication_time)

    # SYNCH WITH source ES INDEX
    source = Index(settings.source)
    destination = Cluster(settings.destination).get_or_create_index(settings.destination)

    # GET LAST UPDATED
    from_file = None
    if time_file.exists:
        from_file = convert.milli2datetime(convert.value2int(time_file.read()))
    from_es = get_last_updated(destination) - timedelta(hours=1)
    last_updated = MIN(coalesce(from_file, convert.milli2datetime(0)), from_es)
    Log.note("updating records with modified_ts>={{last_updated}}", {"last_updated": last_updated})

    pending = get_pending(source, last_updated)
    with ThreadedQueue(destination, batch_size=1000) as data_sink:
        replicate(source, data_sink, pending, last_updated)

    # RECORD LAST UPDATED
    time_file.write(unicode(convert.datetime2milli(current_time)))

Пример #19

0

Показать файл

def write_profile(profile_settings, stats):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    acc = stats[0]
    for s in stats[1:]:
        acc.add(s)

    stats = [{
        "num_calls": d[1],
        "self_time": d[2],
        "total_time": d[3],
        "self_time_per_call": d[2] / d[1],
        "total_time_per_call": d[3] / d[1],
        "file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
        "line": f[1],
        "method": f[2].lstrip("<").rstrip(">")
    } for f, d, in acc.stats.iteritems()]
    stats_file = File(profile_settings.filename,
                      suffix=convert.datetime2string(datetime.now(),
                                                     "_%Y%m%d_%H%M%S"))
    stats_file.write(convert.list2tab(stats))

Пример #20

0

Показать файл

Файл: logs.py Проект: klahnakoski/MoTreeherder

def write_profile(profile_settings, stats):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    acc = stats[0]
    for s in stats[1:]:
        acc.add(s)

    stats = [{
        "num_calls": d[1],
        "self_time": d[2],
        "total_time": d[3],
        "self_time_per_call": d[2] / d[1],
        "total_time_per_call": d[3] / d[1],
        "file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
        "line": f[1],
        "method": f[2].lstrip("<").rstrip(">")
    }
        for f, d, in acc.stats.iteritems()
    ]
    stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
    stats_file.write(convert.list2tab(stats))

Пример #21

0

Показать файл

Файл: static.py Проект: davehunt/ActiveData

def _read_file(filename):
    try:
        file = File.new_instance(STATIC_DIRECTORY, filename)
        if not file.abspath.startswith(STATIC_DIRECTORY.abspath):
            return "", 404, "text/html"

        Log.note("Read {{file}}", file=file.abspath)
        mimetype, encoding = mimetypes.guess_type(file.extension)
        if not mimetype:
            mimetype = "text/html"
        return file.read_bytes(), 200, mimetype
    except Exception:
        return "", 404, "text/html"

Пример #22

0

Показать файл

Файл: profiles.py Проект: davehunt/ActiveData

def write(profile_settings):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    profs = list(profiles.values())
    for p in profs:
        p.stats = p.stats.end()

    stats = [{
        "description": p.description,
        "num_calls": p.stats.count,
        "total_time": p.stats.count * p.stats.mean,
        "total_time_per_call": p.stats.mean
    } for p in profs if p.stats.count > 0]
    stats_file = File(profile_settings.filename,
                      suffix=convert.datetime2string(datetime.now(),
                                                     "_%Y%m%d_%H%M%S"))
    if stats:
        stats_file.write(convert.list2tab(stats))
    else:
        stats_file.write("<no profiles>")

    stats_file2 = File(profile_settings.filename,
                       suffix=convert.datetime2string(datetime.now(),
                                                      "_series_%Y%m%d_%H%M%S"))
    if not profs:
        return

    max_samples = MAX([len(p.samples) for p in profs if p.samples])
    if not max_samples:
        return

    r = range(max_samples)
    profs.insert(0, Dict(description="index", samples=r))
    stats = [{p.description: wrap(p.samples)[i]
              for p in profs if p.samples} for i in r]
    if stats:
        stats_file2.write(convert.list2tab(stats))

Пример #23

0

Показать файл

 def _worker(self, please_stop):
     if Sqlite.canonical:
         self.db = Sqlite.canonical
     else:
         self.db = sqlite3.connect(':memory:')
         try:
             full_path = File(
                 "pyLibrary/vendor/sqlite/libsqlitefunctions.so").abspath
             # self.db.execute("SELECT sqlite3_enable_load_extension(1)")
             self.db.enable_load_extension(True)
             self.db.execute("SELECT load_extension('" + full_path + "')")
         except Exception, e:
             Log.warning(
                 "loading sqlite extension functions failed, doing without. (no SQRT for you!)",
                 cause=e)

Пример #24

0

Показать файл

Файл: mysql.py Проект: mozilla/ActiveData-ETL

 def execute_file(filename,
                  host,
                  username,
                  password,
                  schema=None,
                  param=None,
                  ignore_errors=False,
                  settings=None):
     # MySQLdb provides no way to execute an entire SQL file in bulk, so we
     # have to shell out to the commandline client.
     sql = File(filename).read()
     if ignore_errors:
         try:
             MySQL.execute_sql(sql=sql, param=param, settings=settings)
         except Exception, e:
             pass

Пример #25

0

Показать файл

def _upgrade():
    global _upgraded
    _upgraded = True
    try:
        import sys

        sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
        python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
        if python_dll.read_bytes() != sqlite_dll.read_bytes():
            backup = sqlite_dll.backup()
            File.copy(python_dll, sqlite_dll)
    except Exception, e:
        Log.warning("could not upgrade python's sqlite", cause=e)

Пример #26

0

Показать файл

Файл: persistent_queue.py Проект: mozilla/ActiveData-ETL

    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " +
                         self.file.name)
        self.please_stop = Signal()
        self.db = Dict()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                try:
                    delta = convert.json2value(line)
                    apply_delta(self.db, delta)
                except:
                    pass
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                try:
                    if k != "status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                except Exception:
                    pass  # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost", num=lost)

            if DEBUG:
                Log.note("Persistent queue {{name}} found with {{num}} items",
                         name=self.file.abspath,
                         num=len(self))
        else:
            self.db.status = Dict(start=0, end=0)
            self.start = self.db.status.start
            if DEBUG:
                Log.note("New persistent queue {{name}}",
                         name=self.file.abspath)

Пример #27

0

Показать файл

Файл: persistent_queue.py Проект: klahnakoski/MoDataSubmission

    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " + self.file.name)
        self.please_stop = Signal()
        self.db = Dict()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                try:
                    delta = convert.json2value(line)
                    apply_delta(self.db, delta)
                except:
                    pass
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                try:
                    if k!="status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                except Exception:
                    pass  # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost",  num= lost)

            if DEBUG:
                Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self))
        else:
            self.db.status = Dict(
                start=0,
                end=0
            )
            self.start = self.db.status.start
            if DEBUG:
                Log.note("New persistent queue {{name}}", name=self.file.abspath)

Пример #28

0

Показать файл

Файл: __init__.py Проект: klahnakoski/MoDevETL

def _get_attr(obj, path):
    if not path:
        return obj

    attr_name = path[0]

    if isinstance(obj, ModuleType):
        if attr_name in obj.__dict__:
            return _get_attr(obj.__dict__[attr_name], path[1:])
        elif attr_name in dir(obj):
            return _get_attr(obj[attr_name], path[1:])

        # TRY FILESYSTEM
        from pyLibrary.env.files import File

        if File.new_instance(File(obj.__file__).parent, attr_name).set_extension("py").exists:
            try:
                # THIS CASE IS WHEN THE __init__.py DOES NOT IMPORT THE SUBDIR FILE
                # WE CAN STILL PUT THE PATH TO THE FILE IN THE from CLAUSE
                if len(path) == 1:
                    # GET MODULE OBJECT
                    output = __import__(obj.__name__ + "." + attr_name, globals(), locals(), [path[0]], 0)
                    return output
                else:
                    # GET VARIABLE IN MODULE
                    output = __import__(obj.__name__ + "." + attr_name, globals(), locals(), [path[1]], 0)
                    return _get_attr(output, path[1:])
            except Exception, e:
                pass

        # TRY A CASE-INSENSITIVE MATCH
        attr_name = lower_match(attr_name, dir(obj))
        if not attr_name:
            from pyLibrary.debugs.logs import Log

            Log.error(PATH_NOT_FOUND)
        elif len(attr_name) > 1:
            from pyLibrary.debugs.logs import Log

            Log.error(AMBIGUOUS_PATH_FOUND + " {{paths}}", paths=attr_name)
        else:
            return _get_attr(obj[attr_name[0]], path[1:])

Пример #29

0

Показать файл

def main():
    try:
        config = startup.read_settings(defs=[{
            "name": ["--file"],
            "help": "file to save backup",
            "type": str,
            "dest": "file",
            "required": True
        }])
        constants.set(config.constants)
        Log.start(config.debug)

        sq = elasticsearch.Index(settings=config.saved_queries)
        result = sq.search({"query": {"match_all": {}}, "size": 200000})

        File(config.args.file).write("".join(
            map(convert.json2value, result.hits.hits)))

    except Exception, e:
        Log.error("Problem with etl", e)

Пример #30

0

Показать файл

Файл: profiles.py Проект: mozilla/ChangeDetector

def write(profile_settings):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    profs = list(profiles.values())
    for p in profs:
        p.stats = p.stats.end()

    stats = [
        {
            "description": p.description,
            "num_calls": p.stats.count,
            "total_time": p.stats.count * p.stats.mean,
            "total_time_per_call": p.stats.mean,
        }
        for p in profs
        if p.stats.count > 0
    ]
    stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
    if stats:
        stats_file.write(convert.list2tab(stats))
    else:
        stats_file.write("<no profiles>")

    stats_file2 = File(
        profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S")
    )
    if not profs:
        return

    max_samples = MAX([len(p.samples) for p in profs if p.samples])
    if not max_samples:
        return

    r = range(max_samples)
    profs.insert(0, Dict(description="index", samples=r))
    stats = [{p.description: wrap(p.samples)[i] for p in profs if p.samples} for i in r]
    if stats:
        stats_file2.write(convert.list2tab(stats))

Пример #31

0

Показать файл

Файл: text_logs.py Проект: klahnakoski/MoDataSubmission

class TextLog_usingFile(TextLog):
    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")

    def write(self, template, params):
        with self.file_lock:
            self.file.append(expand_template(template, params))

Пример #32

0

Показать файл

Файл: text_logs.py Проект: davehunt/ActiveData

class TextLog_usingFile(TextLog):
    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")

    def write(self, template, params):
        with self.file_lock:
            self.file.append(expand_template(template, params))

Пример #33

0

Показать файл

Файл: mysql.py Проект: mozilla/ActiveData-ETL

 def execute_file(self, filename, param=None):
     content = File(filename).read()
     self.execute(content, param)

Пример #34

0

Показать файл

Файл: replicate.py Проект: klahnakoski/esReplicate

def main():
    global BATCH_SIZE

    current_time = Date.now()
    time_file = File(config.last_replication_time)

    # SYNCH WITH source ES INDEX
    source = elasticsearch.Index(config.source)
    destination = elasticsearch.Cluster(config.destination).get_or_create_index(config.destination)

    # GET LAST UPDATED
    if config.since != None:
        last_updated = Date(config.since).unix
    else:
        last_updated = get_last_updated(destination)

    if config.batch_size:
        BATCH_SIZE = config.batch_size

    Log.note("updating records with {{primary_field}}>={{last_updated}}", last_updated=last_updated,
             primary_field=config.primary_field)

    please_stop = Signal()
    done = Signal()

    def worker(please_stop):
        pending = Queue("pending ids", max=BATCH_SIZE*3, silent=False)

        pending_thread = Thread.run(
            "get pending",
            get_pending,
            source=source,
            since=last_updated,
            pending_bugs=pending,
            please_stop=please_stop
        )
        diff_thread = Thread.run(
            "diff",
            diff,
            source,
            destination,
            pending,
            please_stop=please_stop
        )
        replication_thread = Thread.run(
            "replication",
            replicate,
            source,
            destination,
            pending,
            config.fix,
            please_stop=please_stop
        )
        pending_thread.join()
        diff_thread.join()
        pending.add(Thread.STOP)
        replication_thread.join()
        done.go()
        please_stop.go()

    Thread.run("wait for replication to finish", worker, please_stop=please_stop)
    Thread.wait_for_shutdown_signal(please_stop=please_stop)

    if done:
        Log.note("done all")
        # RECORD LAST UPDATED< IF WE DID NOT CANCEL OUT
        time_file.write(unicode(current_time.milli))

Пример #35

0

Показать файл

Файл: sqlite.py Проект: klahnakoski/esReplicate

                        finally:
                            signal.go()
                    else:
                        try:
                            self.db.execute(command)
                        except Exception, e:
                            e = Except.wrap(e)
                            e.cause = Except(
                                type=ERROR,
                                template="Bad call to Sqlite",
                                trace=trace
                            )
                            Log.warning("Failure to execute", cause=e)

        except Exception, e:
            Log.error("Problem with sql thread", e)
        finally:
            self.db.close()


try:
    import sys

    sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
    python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
    if python_dll.read_bytes() != sqlite_dll.read_bytes():
        backup = sqlite_dll.backup()
        File.copy(python_dll, sqlite_dll)
except Exception, e:
    Log.warning("could not upgrade python's sqlite", cause=e)

Пример #36

0

Показать файл

Файл: reset.py Проект: mozilla/ActiveData-ETL

def main():
    """
    CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE
    """
    try:
        settings = startup.read_settings(defs=[{
            "name": ["--bucket"],
            "help": "bucket to reprocess",
            "type": str,
            "dest": "bucket",
            "required": True
        }, {
            "name": ["--begin", "--start"],
            "help": "lowest key (or prefix) to reprocess",
            "type": str,
            "dest": "start",
            "default": "1",
            "required": False
        }, {
            "name": ["--end", "--stop"],
            "help": "highest key (or prefix) to reprocess",
            "type": str,
            "dest": "end",
            "default": None,
            "required": False
        }, {
            "name": ["--file"],
            "help": "path to file with CR-delimited prefix list",
            "type": str,
            "dest": "file",
            "default": None,
            "required": False
        }])
        Log.start(settings.debug)

        with aws.Queue(settings.work_queue) as work_queue:
            source = Connection(settings.aws).get_bucket(settings.args.bucket)

            if settings.args.file:
                now = Date.now()
                for prefix in File(settings.args.file):
                    all_keys = source.keys(prefix=key_prefix(prefix))
                    for k in all_keys:
                        Log.note("Adding {{key}}", key=k)
                        work_queue.add({
                            "bucket": settings.args.bucket,
                            "key": k,
                            "timestamp": now.unix,
                            "date/time": now.format()
                        })
                return

            if settings.args.end and settings.args.start:
                up_to = str(int(settings.args.end) - 1)
                prefix = strings.common_prefix(settings.args.start, up_to)
            else:
                prefix = None
            start = Version(settings.args.start)
            end = Version(settings.args.end)

            all_keys = source.keys(prefix=prefix)
            with Timer("filtering {{num}} keys", {"num": len(all_keys)}):
                all_keys = [(k, Version(k)) for k in all_keys
                            if k.find("None") == -1]
                all_keys = [(k, p) for k, p in all_keys if start <= p < end]
            with Timer("sorting {{num}} keys", {"num": len(all_keys)}):
                all_keys = qb.sort(all_keys, 1)
            for k, p in all_keys:
                Log.note("Adding {{key}}", key=k)
                now = Date.now()
                work_queue.add({
                    "bucket": settings.args.bucket,
                    "key": k,
                    "timestamp": now.unix,
                    "date/time": now.format()
                })

    except Exception, e:
        Log.error("Problem with etl", e)

Пример #37

0

Показать файл

Файл: elasticsearch.py Проект: mozilla/ActiveData-ETL

def make_test_instance(name, settings):
    if settings.filename:
        File(settings.filename).delete()
    return open_test_instance(name, settings)

Пример #38

0

Показать файл

Файл: mercurial.py Проект: mozilla/ActiveData-ETL

from pyLibrary import convert
from pyLibrary.debugs import startup
from pyLibrary.maths.randoms import Random
from pyLibrary.sql.mysql import MySQL
from pyLibrary.env.files import File
from pyLibrary.debugs.logs import Log
from pyLibrary.queries import qb
from pyLibrary.strings import between
from pyLibrary.dot import coalesce, wrap
from pyLibrary.thread.multithread import Multithread
from pyLibrary.times.timer import Timer

DEBUG = True

TEMPLATE_FILE = File(
    "C:/Users/klahnakoski/git/datazilla-alerts/tests/resources/hg/changeset_nofiles.template"
)


def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url,
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,

Пример #39

0

Показать файл

    assert settings["class"]

    # IMPORT MODULE FOR HANDLER
    path = settings["class"].split(".")
    class_name = path[-1]
    path = ".".join(path[:-1])
    constructor = None
    try:
        temp = __import__(path, globals(), locals(), [class_name], -1)
        constructor = object.__getattribute__(temp, class_name)
    except Exception, e:
        if settings.stream and not constructor:
            # PROVIDE A DEFAULT STREAM HANLDER
            constructor = TextLog_usingThreadedStream
        else:
            Log.error("Can not find class {{class}}",  {"class": path}, cause=e)

    # IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
    if settings.filename:
        from pyLibrary.env.files import File

        f = File(settings.filename)
        if not f.parent.exists:
            f.parent.create()

    settings['class'] = None
    params = unwrap(settings)
    log_instance = constructor(**params)
    return log_instance

Пример #40

0

Показать файл

Файл: mercurial.py Проект: mozilla/ActiveData-ETL

def get_changesets(date_range=None, revision_range=None, repo=None):
    # GET ALL CHANGESET INFO
    args = [
        "hg",
        "log",
        "--cwd",
        File(repo.directory).filename,
        "-v",
        # "-p",   # TO GET PATCH CONTENTS
        "--style",
        TEMPLATE_FILE.filename
    ]

    if date_range is not None:
        if date_range.max == None:
            if date_range.min == None:
                drange = ">0 0"
            else:
                drange = ">" + unicode(convert.datetime2unix(
                    date_range.min)) + " 0"
        else:
            if date_range.min == None:
                drange = "<" + unicode(
                    convert.datetime2unix(date_range.max) - 1) + " 0"
            else:
                drange = unicode(convert.datetime2unix(
                    date_range.min)) + " 0 to " + unicode(
                        convert.datetime2unix(date_range.max) - 1) + " 0"

        args.extend(["--date", drange])

    if revision_range is not None:
        args.extend(
            ["-r",
             str(revision_range.min) + ":" + str(revision_range.max)])

    proc = subprocess.Popen(args,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            bufsize=-1)

    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error(
                                "Unable to pull hg log: return code {{return_code}}",
                                {"return_code": proc.returncode})
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)

                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (date, node, rev, author, branches, files, file_adds,
                 file_dels, p1rev, p1node, parents, children, tags,
                 desc) = (urllib.unquote(c) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repos":
                    repo.name,
                    "date":
                    convert.unix2datetime(
                        convert.value2number(date.split(" ")[0])),
                    "node":
                    node,
                    "revision":
                    rev,
                    "author":
                    author,
                    "branches":
                    set(branches.split("\n")) - {""},
                    "file_changes":
                    files - file_adds - file_dels - {""},
                    "file_adds":
                    file_adds,
                    "file_dels":
                    file_dels,
                    "parents":
                    set(parents.split("\n")) - {""} | {p1rev + ":" + p1node},
                    "children":
                    set(children.split("\n")) - {""},
                    "tags":
                    set(tags.split("\n")) - {""},
                    "description":
                    desc
                }
                doc = elasticsearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(
                    e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})",
                          {"message": line}, e)

            Log.error("Problem iterating through log", e)

Пример #41

0

Показать файл

Файл: mercurial.py Проект: mozilla/ActiveData-ETL

def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url,
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            try:
                while True:
                    line = proc.stdout.readline()
                    if line.startswith("abort:"):
                        Log.error(
                            "Can not clone {{repos.url}}, because {{problem}}",
                            {
                                "repos": repo,
                                "problem": line
                            })
                    if line == '':
                        break
                    Log.note("Mercurial cloning: {{status}}", {"status": line})
            finally:
                proc.wait()

    else:
        hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc"))
        if not hgrc_file.exists:
            hgrc_file.write("[paths]\ndefault = " + repo.url + "\n")

        # REPO EXISTS, PULL TO UPDATE
        with Timer("Pull hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "pull", "--cwd",
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            (output, _) = proc.communicate()

            if output.find("abort: repository default not found!") >= 0:
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: abandoned transaction found") >= 0:
                Log.error(
                    "Problem pulling repos, try \"hg recover\"\n{{reason|indent}}",
                    {"reason": output})
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: ") >= 0:
                Log.error("Problem with pull {{reason}}",
                          {"reason": between(output, "abort:", "\n")})

            Log.note("Mercurial pull results:\n{{pull_results}}",
                     {"pull_results": output})

Пример #42

0

Показать файл

from active_data import record_request, cors_wrapper
from active_data.actions import save_query
from active_data.actions.json import get_raw_json
from active_data.actions.query import query
from active_data.actions.save_query import SaveQueries, find_query
from active_data.actions.static import download
from pyLibrary import convert
from pyLibrary.debugs import constants, startup
from pyLibrary.debugs.logs import Log
from pyLibrary.env import elasticsearch
from pyLibrary.env.files import File
from pyLibrary.queries import containers
from pyLibrary.queries.meta import FromESMetadata
from pyLibrary.thread.threads import Thread

OVERVIEW = File("active_data/public/index.html").read()

app = Flask(__name__)
config = None


@app.route('/', defaults={'path': ''}, methods=['OPTIONS', 'HEAD'])
@app.route('/<path:path>', methods=['OPTIONS', 'HEAD'])
@cors_wrapper
def _head(path):
    return Response(b'', status=200)


app.add_url_rule('/tools/<path:filename>', None, download)
app.add_url_rule('/find/<path:hash>', None, find_query)
app.add_url_rule('/query',

Python File примеры использования