def setup_ssl(): config.flask.ssl_context = None if not config.flask.ssl_context: return ssl_flask = config.flask.copy() ssl_flask.debug = False ssl_flask.port = 443 if isinstance(config.flask.ssl_context, Mapping): # EXPECTED PEM ENCODED FILE NAMES # `load_cert_chain` REQUIRES CONCATENATED LIST OF CERTS tempfile = NamedTemporaryFile(delete=False, suffix=".pem") try: tempfile.write( File(ssl_flask.ssl_context.certificate_file).read_bytes()) if ssl_flask.ssl_context.certificate_chain_file: tempfile.write( File(ssl_flask.ssl_context.certificate_chain_file). read_bytes()) tempfile.flush() tempfile.close() context = SSLContext(PROTOCOL_SSLv23) context.load_cert_chain( tempfile.name, keyfile=File(ssl_flask.ssl_context.privatekey_file).abspath) ssl_flask.ssl_context = context except Exception, e: Log.error("Could not handle ssl context construction", cause=e) finally:
def _run_remote(command, name): File("./results/temp/" + name + ".sh").write("nohup " + command + " >& /dev/null < /dev/null &\nsleep 20") put("./results/temp/" + name + ".sh", "" + name + ".sh") run("chmod u+x " + name + ".sh") run("./" + name + ".sh")
def path2fullpath(path): fullpath = "file:///" + File(path).abspath.replace("\\", "/") if fullpath.find("#") >= 0: fullpath = fullpath.replace("#", "#log=" + LOG_DIV + "&") else: fullpath = fullpath + "#log=" + LOG_DIV return fullpath
def read_settings(filename=None, defs=None): # READ SETTINGS if filename: settings_file = File(filename) if not settings_file.exists: Log.error("Can not file settings file {{filename}}", { "filename": settings_file.abspath }) settings = ref.get("file:///" + settings_file.abspath) if defs: settings.args = argparse(defs) return settings else: defs = listwrap(defs) defs.append({ "name": ["--settings", "--settings-file", "--settings_file"], "help": "path to JSON file with settings", "type": str, "dest": "filename", "default": "./settings.json", "required": False }) args = argparse(defs) settings = ref.get("file://" + args.filename.replace(os.sep, "/")) settings.args = args return settings
def __init__(self, settings): self.settings = wrap({"host": "fake", "index": "fake"}) self.filename = settings.filename try: self.data = convert.json2value(File(self.filename).read()) except IOError: self.data = Dict()
def __init__(self, filename, host="fake", index="fake", settings=None): self.settings = settings self.filename = settings.filename try: self.data = convert.json2value(File(self.filename).read()) except Exception: self.data = Dict()
def get_file(ref, url): from pyLibrary.env.files import File if ref.path.startswith("~"): home_path = os.path.expanduser("~") if os.sep == "\\": home_path = "/" + home_path.replace(os.sep, "/") if home_path.endswith("/"): home_path = home_path[:-1] ref.path = home_path + ref.path[1::] elif not ref.path.startswith("/"): # CONVERT RELATIVE TO ABSOLUTE if ref.path[0] == ".": num_dot = 1 while ref.path[num_dot] == ".": num_dot += 1 parent = url.path.rstrip("/").split("/")[:-num_dot] ref.path = "/".join(parent) + ref.path[num_dot:] else: parent = url.path.rstrip("/").split("/")[:-1] ref.path = "/".join(parent) + "/" + ref.path path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\") try: if DEBUG: _Log.note("reading file {{path}}", path=path) content = File(path).read() except Exception, e: content = None _Log.error("Could not read file {{filename}}", filename=path, cause=e)
def _start_es(): File("./results/temp/start_es.sh").write( "nohup ./bin/elasticsearch >& /dev/null < /dev/null &\nsleep 20") with cd("/home/ec2-user/"): put("./results/temp/start_es.sh", "start_es.sh") run("chmod u+x start_es.sh") with cd("/usr/local/elasticsearch/"): sudo("/home/ec2-user/start_es.sh")
def __init__(self, file): assert file from pyLibrary.env.files import File self.file = File(file) if self.file.exists: self.file.backup() self.file.delete() self.file_lock = Lock("file lock for logging")
def extend(self, records): """ JUST SO WE MODEL A Queue """ records = {v["id"]: v["value"] for v in records} unwrap(self.data).update(records) data_as_json = convert.value2json(self.data, pretty=True) File(self.filename).write(data_as_json) Log.note("{{num}} documents added", num=len(records))
def test_51586(self): debug_settings = { "trace": True, "cprofile": { "enabled": True, "filename": "tests/results/test_51586_profile.tab" } } Log.start(debug_settings) source_key = "51586_5124145.52" content = File("tests/resources/51586_5124145.52.json.gz").read_bytes() source = Dict(read_lines=lambda: GzipLines(content)) with Accumulator( File("tests/results/51586_5124145.52.json")) as destination: with Timer("ETL file"): process_unittest_in_s3(source_key, source, destination, please_stop=None) Log.stop()
def _upgrade(): global _upgraded _upgraded = True try: import sys sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll") python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll") if python_dll.read_bytes() != sqlite_dll.read_bytes(): backup = sqlite_dll.backup() File.copy(python_dll, sqlite_dll) except Exception, e: Log.warning("could not upgrade python's sqlite", cause=e)
def write(profile_settings): from pyLibrary import convert from pyLibrary.env.files import File profs = list(profiles.values()) for p in profs: p.stats = p.stats.end() stats = [{ "description": p.description, "num_calls": p.stats.count, "total_time": p.stats.count * p.stats.mean, "total_time_per_call": p.stats.mean } for p in profs if p.stats.count > 0] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) if stats: stats_file.write(convert.list2tab(stats)) else: stats_file.write("<no profiles>") stats_file2 = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S")) if not profs: return max_samples = MAX([len(p.samples) for p in profs if p.samples]) if not max_samples: return r = range(max_samples) profs.insert(0, Dict(description="index", samples=r)) stats = [{p.description: wrap(p.samples)[i] for p in profs if p.samples} for i in r] if stats: stats_file2.write(convert.list2tab(stats))
def _worker(self, please_stop): if Sqlite.canonical: self.db = Sqlite.canonical else: self.db = sqlite3.connect(':memory:') try: full_path = File( "pyLibrary/vendor/sqlite/libsqlitefunctions.so").abspath # self.db.execute("SELECT sqlite3_enable_load_extension(1)") self.db.enable_load_extension(True) self.db.execute("SELECT load_extension('" + full_path + "')") except Exception, e: Log.warning( "loading sqlite extension functions failed, doing without. (no SQRT for you!)", cause=e)
def execute_file(filename, host, username, password, schema=None, param=None, ignore_errors=False, settings=None): # MySQLdb provides no way to execute an entire SQL file in bulk, so we # have to shell out to the commandline client. sql = File(filename).read() if ignore_errors: try: MySQL.execute_sql(sql=sql, param=param, settings=settings) except Exception, e: pass
def _get_attr(obj, path): if not path: return obj attr_name = path[0] if isinstance(obj, ModuleType): if attr_name in obj.__dict__: return _get_attr(obj.__dict__[attr_name], path[1:]) elif attr_name in dir(obj): return _get_attr(obj[attr_name], path[1:]) # TRY FILESYSTEM from pyLibrary.env.files import File possible_error = None if File.new_instance(File(obj.__file__).parent, attr_name).set_extension("py").exists: try: # THIS CASE IS WHEN THE __init__.py DOES NOT IMPORT THE SUBDIR FILE # WE CAN STILL PUT THE PATH TO THE FILE IN THE from CLAUSE if len(path) == 1: # GET MODULE OBJECT output = __import__(obj.__name__ + "." + attr_name, globals(), locals(), [path[0]], 0) return output else: # GET VARIABLE IN MODULE output = __import__(obj.__name__ + "." + attr_name, globals(), locals(), [path[1]], 0) return _get_attr(output, path[1:]) except Exception, e: from pyLibrary.debugs.exceptions import Except possible_error = Except.wrap(e) # TRY A CASE-INSENSITIVE MATCH attr_name = lower_match(attr_name, dir(obj)) if not attr_name: from pyLibrary.debugs.logs import Log Log.warning(PATH_NOT_FOUND + ". Returning None.", cause=possible_error) elif len(attr_name) > 1: from pyLibrary.debugs.logs import Log Log.error(AMBIGUOUS_PATH_FOUND + " {{paths}}", paths=attr_name) else: return _get_attr(obj[attr_name[0]], path[1:])
def main(): try: config = startup.read_settings(defs=[{ "name": ["--file"], "help": "file to save backup", "type": str, "dest": "file", "required": True }]) constants.set(config.constants) Log.start(config.debug) sq = elasticsearch.Index(settings=config.saved_queries) result = sq.search({"query": {"match_all": {}}, "size": 200000}) File(config.args.file).write("".join( map(convert.json2value, result.hits.hits))) except Exception, e: Log.error("Problem with etl", e)
def write_profile(profile_settings, stats): from pyLibrary import convert from pyLibrary.env.files import File acc = stats[0] for s in stats[1:]: acc.add(s) stats = [{ "num_calls": d[1], "self_time": d[2], "total_time": d[3], "self_time_per_call": d[2] / d[1], "total_time_per_call": d[3] / d[1], "file": (f[0] if f[0] != "~" else "").replace("\\", "/"), "line": f[1], "method": f[2].lstrip("<").rstrip(">") } for f, d, in acc.stats.iteritems()] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) stats_file.write(convert.list2tab(stats))
def main(): """ CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE """ try: settings = startup.read_settings(defs=[{ "name": ["--bucket"], "help": "bucket to reprocess", "type": str, "dest": "bucket", "required": True }, { "name": ["--begin", "--start"], "help": "lowest key (or prefix) to reprocess", "type": str, "dest": "start", "default": "1", "required": False }, { "name": ["--end", "--stop"], "help": "highest key (or prefix) to reprocess", "type": str, "dest": "end", "default": None, "required": False }, { "name": ["--file"], "help": "path to file with CR-delimited prefix list", "type": str, "dest": "file", "default": None, "required": False }]) Log.start(settings.debug) with aws.Queue(settings.work_queue) as work_queue: source = Connection(settings.aws).get_bucket(settings.args.bucket) if settings.args.file: now = Date.now() for prefix in File(settings.args.file): all_keys = source.keys(prefix=key_prefix(prefix)) for k in all_keys: Log.note("Adding {{key}}", key=k) work_queue.add({ "bucket": settings.args.bucket, "key": k, "timestamp": now.unix, "date/time": now.format() }) return if settings.args.end and settings.args.start: up_to = str(int(settings.args.end) - 1) prefix = strings.common_prefix(settings.args.start, up_to) else: prefix = None start = Version(settings.args.start) end = Version(settings.args.end) all_keys = source.keys(prefix=prefix) with Timer("filtering {{num}} keys", {"num": len(all_keys)}): all_keys = [(k, Version(k)) for k in all_keys if k.find("None") == -1] all_keys = [(k, p) for k, p in all_keys if start <= p < end] with Timer("sorting {{num}} keys", {"num": len(all_keys)}): all_keys = qb.sort(all_keys, 1) for k, p in all_keys: Log.note("Adding {{key}}", key=k) now = Date.now() work_queue.add({ "bucket": settings.args.bucket, "key": k, "timestamp": now.unix, "date/time": now.format() }) except Exception, e: Log.error("Problem with etl", e)
def make_test_instance(name, settings): if settings.filename: File(settings.filename).delete() return open_test_instance(name, settings)
from pyLibrary import convert from pyLibrary.debugs import startup from pyLibrary.maths.randoms import Random from pyLibrary.sql.mysql import MySQL from pyLibrary.env.files import File from pyLibrary.debugs.logs import Log from pyLibrary.queries import qb from pyLibrary.strings import between from pyLibrary.dot import coalesce, wrap from pyLibrary.thread.multithread import Multithread from pyLibrary.times.timer import Timer DEBUG = True TEMPLATE_FILE = File( "C:/Users/klahnakoski/git/datazilla-alerts/tests/resources/hg/changeset_nofiles.template" ) def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() # REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name": repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
from active_data import record_request, cors_wrapper from active_data.actions import save_query from active_data.actions.json import get_raw_json from active_data.actions.query import query from active_data.actions.save_query import SaveQueries, find_query from active_data.actions.static import download from pyLibrary import convert from pyLibrary.debugs import constants, startup from pyLibrary.debugs.logs import Log from pyLibrary.env import elasticsearch from pyLibrary.env.files import File from pyLibrary.queries import containers from pyLibrary.queries.meta import FromESMetadata from pyLibrary.thread.threads import Thread OVERVIEW = File("active_data/public/index.html").read() app = Flask(__name__) config = None @app.route('/', defaults={'path': ''}, methods=['OPTIONS', 'HEAD']) @app.route('/<path:path>', methods=['OPTIONS', 'HEAD']) @cors_wrapper def _head(path): return Response(b'', status=200) app.add_url_rule('/tools/<path:filename>', None, download) app.add_url_rule('/find/<path:hash>', None, find_query) app.add_url_rule('/query',
assert settings["class"] # IMPORT MODULE FOR HANDLER path = settings["class"].split(".") class_name = path[-1] path = ".".join(path[:-1]) constructor = None try: temp = __import__(path, globals(), locals(), [class_name], -1) constructor = object.__getattribute__(temp, class_name) except Exception, e: if settings.stream and not constructor: # PROVIDE A DEFAULT STREAM HANLDER constructor = TextLog_usingThreadedStream else: Log.error("Can not find class {{class}}", {"class": path}, cause=e) # IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS if settings.filename: from pyLibrary.env.files import File f = File(settings.filename) if not f.parent.exists: f.parent.create() settings['class'] = None params = unwrap(settings) log_instance = constructor(**params) return log_instance
def get_changesets(date_range=None, revision_range=None, repo=None): # GET ALL CHANGESET INFO args = [ "hg", "log", "--cwd", File(repo.directory).filename, "-v", # "-p", # TO GET PATCH CONTENTS "--style", TEMPLATE_FILE.filename ] if date_range is not None: if date_range.max == None: if date_range.min == None: drange = ">0 0" else: drange = ">" + unicode(convert.datetime2unix( date_range.min)) + " 0" else: if date_range.min == None: drange = "<" + unicode( convert.datetime2unix(date_range.max) - 1) + " 0" else: drange = unicode(convert.datetime2unix( date_range.min)) + " 0 to " + unicode( convert.datetime2unix(date_range.max) - 1) + " 0" args.extend(["--date", drange]) if revision_range is not None: args.extend( ["-r", str(revision_range.min) + ":" + str(revision_range.max)]) proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) def iterator(): try: while True: try: line = proc.stdout.readline() if line == '': proc.wait() if proc.returncode: Log.error( "Unable to pull hg log: return code {{return_code}}", {"return_code": proc.returncode}) return except Exception, e: Log.error("Problem getting another line", e) if line.strip() == "": continue Log.note(line) # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n" # branch = "{branch}%0A" # parent = "{parent}%0A" # tag = "{tag}%0A" # child = "{child}%0A" (date, node, rev, author, branches, files, file_adds, file_dels, p1rev, p1node, parents, children, tags, desc) = (urllib.unquote(c) for c in line.split("\t")) file_adds = set(file_adds.split("\n")) - {""} file_dels = set(file_dels.split("\n")) - {""} files = set(files.split("\n")) - set() doc = { "repos": repo.name, "date": convert.unix2datetime( convert.value2number(date.split(" ")[0])), "node": node, "revision": rev, "author": author, "branches": set(branches.split("\n")) - {""}, "file_changes": files - file_adds - file_dels - {""}, "file_adds": file_adds, "file_dels": file_dels, "parents": set(parents.split("\n")) - {""} | {p1rev + ":" + p1node}, "children": set(children.split("\n")) - {""}, "tags": set(tags.split("\n")) - {""}, "description": desc } doc = elasticsearch.scrub(doc) yield doc except Exception, e: if isinstance( e, ValueError) and e.message.startswith("need more than "): Log.error("Problem iterating through log ({{message}})", {"message": line}, e) Log.error("Problem iterating through log", e)
def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() # REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name": repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) try: while True: line = proc.stdout.readline() if line.startswith("abort:"): Log.error( "Can not clone {{repos.url}}, because {{problem}}", { "repos": repo, "problem": line }) if line == '': break Log.note("Mercurial cloning: {{status}}", {"status": line}) finally: proc.wait() else: hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc")) if not hgrc_file.exists: hgrc_file.write("[paths]\ndefault = " + repo.url + "\n") # REPO EXISTS, PULL TO UPDATE with Timer("Pull hg log for {{name}}", {"name": repo.name}): proc = subprocess.Popen( ["hg", "pull", "--cwd", File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) (output, _) = proc.communicate() if output.find("abort: repository default not found!") >= 0: File(repo.directory).delete() pull_repo(repo) return if output.find("abort: abandoned transaction found") >= 0: Log.error( "Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output}) File(repo.directory).delete() pull_repo(repo) return if output.find("abort: ") >= 0: Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")}) Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
def execute_file(self, filename, param=None): content = File(filename).read() self.execute(content, param)
from pyLibrary.debugs.exceptions import Except from pyLibrary.debugs.logs import Log from pyLibrary.debugs.profiles import CProfiler from pyLibrary.dot import coalesce, join_field, split_field, wrap, listwrap from pyLibrary.env.files import File from pyLibrary.maths import Math from pyLibrary.queries import jx, meta, wrap_from from pyLibrary.queries.containers import Container, STRUCT from pyLibrary.queries.meta import TOO_OLD from pyLibrary.strings import expand_template from pyLibrary.thread.threads import Thread from pyLibrary.times.dates import Date from pyLibrary.times.durations import MINUTE from pyLibrary.times.timer import Timer BLANK = convert.unicode2utf8(File("active_data/public/error.html").read()) QUERY_SIZE_LIMIT = 10 * 1024 * 1024 @cors_wrapper def query(path): with CProfiler(): try: with Timer("total duration") as query_timer: preamble_timer = Timer("preamble") with preamble_timer: if flask.request.headers.get("content-length", "") in ["", "0"]: # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK return Response(BLANK, status=400,