def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() # REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) try: while True: line = proc.stdout.readline() if line.startswith("abort:"): Log.error("Can not clone {{repos.url}}, because {{problem}}", { "repos": repo, "problem": line }) if line == '': break Log.note("Mercurial cloning: {{status}}", {"status": line}) finally: proc.wait() else: hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc")) if not hgrc_file.exists: hgrc_file.write("[paths]\ndefault = " + repo.url + "\n") # REPO EXISTS, PULL TO UPDATE with Timer("Pull hg log for {{name}}", {"name":repo.name}): proc = subprocess.Popen( ["hg", "pull", "--cwd", File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) (output, _) = proc.communicate() if output.find("abort: repository default not found!") >= 0: File(repo.directory).delete() pull_repo(repo) return if output.find("abort: abandoned transaction found") >= 0: Log.error("Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output}) File(repo.directory).delete() pull_repo(repo) return if output.find("abort: ") >= 0: Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")}) Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
def write_profile(profile_settings, stats): from pyLibrary import convert from pyLibrary.env.files import File acc = stats[0] for s in stats[1:]: acc.add(s) stats = [{ "num_calls": d[1], "self_time": d[2], "total_time": d[3], "self_time_per_call": d[2] / d[1], "total_time_per_call": d[3] / d[1], "file": (f[0] if f[0] != "~" else "").replace("\\", "/"), "line": f[1], "method": f[2].lstrip("<").rstrip(">") } for f, d, in acc.stats.iteritems() ] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) stats_file.write(convert.list2tab(stats))
def write_profile(profile_settings, stats): from pyLibrary import convert from pyLibrary.env.files import File acc = stats[0] for s in stats[1:]: acc.add(s) stats = [{ "num_calls": d[1], "self_time": d[2], "total_time": d[3], "self_time_per_call": d[2] / d[1], "total_time_per_call": d[3] / d[1], "file": (f[0] if f[0] != "~" else "").replace("\\", "/"), "line": f[1], "method": f[2].lstrip("<").rstrip(">") } for f, d, in acc.stats.iteritems()] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) stats_file.write(convert.list2tab(stats))
def main(settings): current_time = datetime.utcnow() time_file = File(settings.param.last_replication_time) # SYNCH WITH source ES INDEX source = Index(settings.source) destination = Cluster(settings.destination).get_or_create_index(settings.destination) # GET LAST UPDATED from_file = None if time_file.exists: from_file = convert.milli2datetime(convert.value2int(time_file.read())) from_es = get_last_updated(destination) - timedelta(hours=1) last_updated = MIN(coalesce(from_file, convert.milli2datetime(0)), from_es) Log.note("updating records with modified_ts>={{last_updated}}", {"last_updated": last_updated}) pending = get_pending(source, last_updated) with ThreadedQueue(destination, batch_size=1000) as data_sink: replicate(source, data_sink, pending, last_updated) # RECORD LAST UPDATED time_file.write(unicode(convert.datetime2milli(current_time)))
def write(profile_settings): from pyLibrary import convert from pyLibrary.env.files import File profs = list(profiles.values()) for p in profs: p.stats = p.stats.end() stats = [ { "description": p.description, "num_calls": p.stats.count, "total_time": p.stats.count * p.stats.mean, "total_time_per_call": p.stats.mean, } for p in profs if p.stats.count > 0 ] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) if stats: stats_file.write(convert.list2tab(stats)) else: stats_file.write("<no profiles>") stats_file2 = File( profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S") ) if not profs: return max_samples = MAX([len(p.samples) for p in profs if p.samples]) if not max_samples: return r = range(max_samples) profs.insert(0, Dict(description="index", samples=r)) stats = [{p.description: wrap(p.samples)[i] for p in profs if p.samples} for i in r] if stats: stats_file2.write(convert.list2tab(stats))
def write(profile_settings): from pyLibrary import convert from pyLibrary.env.files import File profs = list(profiles.values()) for p in profs: p.stats = p.stats.end() stats = [{ "description": p.description, "num_calls": p.stats.count, "total_time": p.stats.count * p.stats.mean, "total_time_per_call": p.stats.mean } for p in profs if p.stats.count > 0] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) if stats: stats_file.write(convert.list2tab(stats)) else: stats_file.write("<no profiles>") stats_file2 = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S")) if not profs: return max_samples = MAX([len(p.samples) for p in profs if p.samples]) if not max_samples: return r = range(max_samples) profs.insert(0, Dict(description="index", samples=r)) stats = [{p.description: wrap(p.samples)[i] for p in profs if p.samples} for i in r] if stats: stats_file2.write(convert.list2tab(stats))
def pull_repo(repo): if not File(os.path.join(repo.directory, ".hg")).exists: File(repo.directory).delete() # REPO DOES NOT EXIST, CLONE IT with Timer("Clone hg log for {{name}}", {"name": repo.name}): proc = subprocess.Popen( ["hg", "clone", repo.url, File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) try: while True: line = proc.stdout.readline() if line.startswith("abort:"): Log.error( "Can not clone {{repos.url}}, because {{problem}}", { "repos": repo, "problem": line }) if line == '': break Log.note("Mercurial cloning: {{status}}", {"status": line}) finally: proc.wait() else: hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc")) if not hgrc_file.exists: hgrc_file.write("[paths]\ndefault = " + repo.url + "\n") # REPO EXISTS, PULL TO UPDATE with Timer("Pull hg log for {{name}}", {"name": repo.name}): proc = subprocess.Popen( ["hg", "pull", "--cwd", File(repo.directory).filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) (output, _) = proc.communicate() if output.find("abort: repository default not found!") >= 0: File(repo.directory).delete() pull_repo(repo) return if output.find("abort: abandoned transaction found") >= 0: Log.error( "Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output}) File(repo.directory).delete() pull_repo(repo) return if output.find("abort: ") >= 0: Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")}) Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
def main(): global BATCH_SIZE current_time = Date.now() time_file = File(config.last_replication_time) # SYNCH WITH source ES INDEX source = elasticsearch.Index(config.source) destination = elasticsearch.Cluster(config.destination).get_or_create_index(config.destination) # GET LAST UPDATED if config.since != None: last_updated = Date(config.since).unix else: last_updated = get_last_updated(destination) if config.batch_size: BATCH_SIZE = config.batch_size Log.note("updating records with {{primary_field}}>={{last_updated}}", last_updated=last_updated, primary_field=config.primary_field) please_stop = Signal() done = Signal() def worker(please_stop): pending = Queue("pending ids", max=BATCH_SIZE*3, silent=False) pending_thread = Thread.run( "get pending", get_pending, source=source, since=last_updated, pending_bugs=pending, please_stop=please_stop ) diff_thread = Thread.run( "diff", diff, source, destination, pending, please_stop=please_stop ) replication_thread = Thread.run( "replication", replicate, source, destination, pending, config.fix, please_stop=please_stop ) pending_thread.join() diff_thread.join() pending.add(Thread.STOP) replication_thread.join() done.go() please_stop.go() Thread.run("wait for replication to finish", worker, please_stop=please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop) if done: Log.note("done all") # RECORD LAST UPDATED< IF WE DID NOT CANCEL OUT time_file.write(unicode(current_time.milli))