def summarize_hits(f): f = Path(f) cs = f.basename().split('.') query = cs[0] outfile = f.dirname() / (f.basename() + '.tsv') alns = parse(open(f).read()) if not alns: print 'No content: ' + f return None print(outfile) with open(outfile, 'w') as csvfile: writer = csv.writer(csvfile, delimiter='\t') for each in alns: q, t, e = each[0][:3] tid, tseq, ts, te = t qid, qseq, qs, qe = q qid = qid.split()[0].replace('lcl|', '') tid = tid.split()[0].replace('lcl|', '') if tid.startswith('UniRef'): continue if tid.startswith('pfam'): continue if tid.startswith('up_'): continue writer.writerow((tid, e))
def __get_source_info_base(source: Path) -> SourceInfo: return SourceInfo( dir_name=source.dirname(), base_name=source.basename(), size=source.getsize(), mtime=int(source.getmtime()), )
def download_file(downloaded_file_path, url, max_connections=2, max_concurrent=5): """Download file to specified location.""" file_path = Path(downloaded_file_path) assert file_path.isabs( ), "download file path must be absolute, not relative" if file_path.exists(): log("{} already downloaded".format(file_path)) return log("Downloading: {}\n".format(url)) aria2c = Command("aria2c") aria2c = aria2c("--max-connection-per-server={}".format(max_connections)) aria2c = aria2c("--max-concurrent-downloads={}".format(max_concurrent)) try: aria2c("--dir={}".format(file_path.dirname()), "--out={}.part".format(file_path.basename()), url).run() except CommandError: raise DownloadError( "Failed to download {}. Re-running may fix the problem.".format( url)) shutil.move(file_path + ".part", file_path)
def _get_best_models(args): outdir, tag, group, models_list = args outdir = Path(outdir) logfile = models_list[0][1].dirname().joinpath( '{complex_id}-min.log'.format(complex_id=tag)) _setup_logger(logfile) tag = int(tag) try: best_idx = group.sort_values('prediction', ascending=False).index[0] best_path = models_list[best_idx][1] with open(best_path, 'r') as f: lines = f.readlines() best_path.write_lines(lines + ['END\n']) best_nmin, best_psf = prepare_pdb22(best_path, best_path.stripext()) minimized, nmin, psf = minimization.minimize_energy( best_nmin, out=best_nmin[:-8] + 'min.pdb', psf=best_psf, clean=True) minimized, nmin, psf = Path(minimized), Path(nmin), Path(psf) minimized.move(outdir) nmin.move(outdir) psf.move(outdir) return tag, outdir.joinpath(minimized.basename()) except Exception as e: logging.error('Error minimizing model for line %i' % tag) logging.exception(e) return tag, None
def cache_document(src, dest=None): "Cache a document, return filename of the dest file." # TODO: use a staging area in case something breaks in the middle of adding. src = Path(src) if dest is None: # Find a reasonable filename if dest isn't specified if is_url(src): dest = CACHE / secure_filename(src) else: dest = CACHE / src.basename() else: dest = CACHE / dest if dest.exists(): # TODO: Suggest update methods or renaming the file raise SkidFileExists(dest) if is_url(src): cache_url(src, dest) elif src.exists(): # is this something on disk? src.copy2(dest) print('copy:', src, '->', dest) else: raise SkidError( "cache_document doesn't know what to do with source %r\n" "Trying to add a nonexistent file?" % str(src)) return dest
def copyfiles(self, skip_sky=False, skip_pattern=False, skip_opt=False): d = self.tempdir if os.path.exists(self.scene): fn = Path(self.scene) fn.copy(d / fn.basename()) else: fn = d / 'cscene.can' fn.write_text(self.scene) self.scene = Path(fn.basename()) if not skip_sky: if os.path.exists(self.sky): fn = Path(self.sky) fn.copy(d / fn.basename()) else: fn = d / 'sky.light' fn.write_text(self.sky) self.sky = Path(fn.basename()) if not skip_pattern: if self.infinity: if os.path.exists(self.pattern): fn = Path(self.pattern) fn.copy(d / fn.basename()) else: fn = d / 'pattern.8' fn.write_text(self.pattern) self.pattern = Path(fn.basename()) if self.sensor is not None: if os.path.exists(self.sensor): fn = Path(self.sensor) fn.copy(d / fn.basename()) else: fn = d / 'sensor.can' fn.write_text(self.sensor) self.sensor = Path(fn.basename()) if not skip_opt: optn = map(lambda (x): x + '.opt', _safe_iter(self.optnames)) try: for i, opt in enumerate(_safe_iter(self.opticals)): # safe_iter allows not to iterate along character composing the optfile name when only one optfile is given if os.path.exists(opt): # print opt fn = Path(opt) fn.copy(d / optn[i]) else: fn = d / optn[i] fn.write_text(opt) self.opticals = map(Path, _safe_iter(optn)) except IndexError: raise CaribuOptionError( "Optnames list must be None or as long as optfiles list")
def build(self, dockerfile): dockerfile = Path(dockerfile) tag = 'rf-' + dockerfile.basename().replace('.dkf', '') dockerfile.copy('redfish-client/tests/Dockerfile') response = [line for line in self.cli.build( path='redfish-client/tests', tag=tag, rm=True)] return(response)
def build(self, dockerfile): dockerfile = Path(dockerfile) tag = 'rf' + dockerfile.basename().replace('Dockerfile.', '') dockerfile.copy('redfish-client/tests/Dockerfile') response = [line for line in self.cli.build( path='redfish-client/tests', tag=tag, rm=True)] return(response)
def copyfiles(self, skip_sky=False, skip_pattern=False, skip_opt=False): d = self.tempdir if os.path.exists(self.scene): fn = Path(self.scene) fn.copy(d / fn.basename()) else: fn = d / 'cscene.can' fn.write_text(self.scene) self.scene = Path(fn.basename()) if not skip_sky: if os.path.exists(self.sky): fn = Path(self.sky) fn.copy(d / fn.basename()) else: fn = d / 'sky.light' fn.write_text(self.sky) self.sky = Path(fn.basename()) if not skip_pattern: if self.infinity: if os.path.exists(self.pattern): fn = Path(self.pattern) fn.copy(d / fn.basename()) else: fn = d / 'pattern.8' fn.write_text(self.pattern) self.pattern = Path(fn.basename()) if self.sensor is not None: if os.path.exists(self.sensor): fn = Path(self.sensor) fn.copy(d / fn.basename()) else: fn = d / 'sensor.can' fn.write_text(self.sensor) self.sensor = Path(fn.basename()) if not skip_opt: optn = map(lambda (x): x + '.opt', _safe_iter(self.optnames)) try: for i, opt in enumerate(_safe_iter(self.opticals)): # safe_iter allows not to iterate along character composing the optfile name when only one optfile is given if os.path.exists(opt): # print opt fn = Path(opt) fn.copy(d / optn[i]) else: fn = d / optn[i] fn.write_text(opt) self.opticals = map(Path, _safe_iter(optn)) except IndexError: raise CaribuOptionError("Optnames list must be None or as long as optfiles list")
def add_file(self, file: Path): if file.exists() and file.ext == ".png": tmp = Image.open(file.abspath()) size = tmp.size tmp.close() self.files.insert("", "end", text=file.basename(), values=("{}x{}".format(*size), "{}".format(file.abspath())))
def main(): parser = argparse.ArgumentParser(description='cleanup a filename, \ replace . with spaces, delete some common shit at the end of a filename\ ex: "actual.file.name-1080p.BluRay.x264-GECKOS[rarbg]" -> "actual filename"') parser.add_argument('filenames', nargs='+', help='list of files and folders to cleanup') args = parser.parse_args() for f in args.filenames: p = Path(f.strip()) newname = rename(p.basename(), p.isdir()) p = p.rename(Path.joinpath(p.dirname(), newname)) print(p.abspath(), end='')
def __init__(self, data_dir, transform, max_num_instances): self.transform = transform self.max_num_instances = max_num_instances img_dir = Path(data_dir) seg_dir = Path(os.path.join(img_dir.dirname().parent, 'segmentation', img_dir.basename())) flo_dir = [Path(os.path.join(img_dir.dirname().parent, 'flow_f', img_dir.basename())), Path(os.path.join(img_dir.dirname().parent, 'flow_b', img_dir.basename()))] intrinsics = np.genfromtxt(img_dir/'cam.txt').astype(np.float32).reshape((3, 3)) imgs = sorted(img_dir.files('*.jpg')) flof = sorted(flo_dir[0].files('*.flo')) # 00: src, 01: tgt flob = sorted(flo_dir[1].files('*.flo')) # 00: tgt, 01: src segm = sorted(seg_dir.files('*.npy')) sequence_set = [] for i in range(len(imgs)-1): sample = {'intrinsics':intrinsics, 'img0':imgs[i], 'img1':imgs[i+1], 'flof':flof[i], 'flob':flob[i], 'seg0':segm[i], 'seg1':segm[i+1]} # will be processed when getitem() is called sequence_set.append(sample) self.samples = sequence_set
def set2rank2(args): rnkfile = Path(args['rnkfile']).expanduser().abspath() gsetfile = Path(args['gsetfile']).expanduser().abspath() cachedir = Path(args['cachedir']).expanduser().abspath() gseajar = Path(args['gseajar']).expanduser().abspath() def fix_rv(rv): rv['gset'] = str(gsetfile.basename()).replace(".grp", '').replace(".list", '') rv['gset_type'] = str(gsetfile.dirname().basename()) rv['rank'] = str(rnkfile.basename()).replace(".rnk", '') rv['rank_type'] = str(rnkfile.dirname().basename()) del rv['cachedir'] return rv if os.path.exists(cachedir): rv = _check_gsea(cachedir) if rv is False: cachedir.rmtree() else: assert isinstance(rv, pd.DataFrame) return fix_rv(rv) cachedir.makedirs_p() if '.list' in gsetfile: grpfile = cachedir / (gsetfile.basename().replace('.list', '.grp')) os.symlink(gsetfile, grpfile) gsetfile = grpfile cl = ("""-cp %s -Xmx2048m xtools.gsea.GseaPreranked -gmx %s -collapse false -mode Max_probe -norm meandiv -nperm 1000 -rnk %s -scoring_scheme weighted -rpt_label my_analysis -include_only_symbols true -make_sets true -plot_top_x 1 -rnd_seed timestamp -set_max 9999 -set_min 4 -zip_report false -out %s -gui false """ % (gseajar, gsetfile, rnkfile, cachedir)).split() import sh try: java(*cl) #, _out = str(cachedir / 'gsea.out'), #_err = str(cachedir / 'gsea.err')) except sh.ErrorReturnCode_1: return False return fix_rv(_check_gsea(cachedir))
def set2rank2(args): rnkfile = Path(args['rnkfile']).expanduser().abspath() gsetfile = Path(args['gsetfile']).expanduser().abspath() cachedir = Path(args['cachedir']).expanduser().abspath() gseajar = Path(args['gseajar']).expanduser().abspath() def fix_rv(rv): rv['gset'] = str(gsetfile.basename()).replace(".grp", '').replace(".list", '') rv['gset_type'] = str(gsetfile.dirname().basename()) rv['rank'] = str(rnkfile.basename()).replace(".rnk", '') rv['rank_type'] = str(rnkfile.dirname().basename()) del rv['cachedir'] return rv if os.path.exists(cachedir): rv = _check_gsea(cachedir) if rv is False: cachedir.rmtree() else: assert isinstance(rv, pd.DataFrame) return fix_rv(rv) cachedir.makedirs_p() if '.list' in gsetfile: grpfile = cachedir / (gsetfile.basename().replace('.list', '.grp')) os.symlink(gsetfile, grpfile) gsetfile = grpfile cl = ("""-cp %s -Xmx2048m xtools.gsea.GseaPreranked -gmx %s -collapse false -mode Max_probe -norm meandiv -nperm 1000 -rnk %s -scoring_scheme weighted -rpt_label my_analysis -include_only_symbols true -make_sets true -plot_top_x 1 -rnd_seed timestamp -set_max 9999 -set_min 4 -zip_report false -out %s -gui false """ % ( gseajar, gsetfile, rnkfile, cachedir)).split() import sh try: java(*cl)#, _out = str(cachedir / 'gsea.out'), #_err = str(cachedir / 'gsea.err')) except sh.ErrorReturnCode_1: return False return fix_rv(_check_gsea(cachedir))
def summarize_ssearch_summary(f): f = Path(f) cs = f.basename().split('.') query = cs[0] outfile = f.dirname() / (f.basename() + '.tsv') r = [] started = False with open(f) as ifh: for l in ifh: if l.startswith("The best scores are:"): started = True continue if started: cs = l.strip().split() if cs: e = cs[-1] tid = cs[0] tid = tid.replace('lcl|', '') if tid.startswith('UniRef'): continue if tid.startswith('pfam'): continue if tid.startswith('up_'): continue r.append([tid, e]) else: break if not r: print 'No content: ' + f return None print(outfile) with open(outfile, 'w') as csvfile: writer = csv.writer(csvfile, delimiter='\t') for row in r: writer.writerow(row)
def load_test_results(directory: Path) -> TestResults: """Takes path to directory with test results generated by network-tests2 as argument. Returns an instance of TestResults generated from it""" hostnames = tuple(read_benchmark_hostnames(directory.joinpath("network_hosts.txt"))) kwargs = { arg: load_nc_file_as_tables(hostnames, directory.joinpath(filename)) for (arg, filename) in [ ("medians", "network_median.nc"), ("means", "network_average.nc"), ("std_dev", "network_deviation.nc") ] } return TestResults(name=directory.basename(), hostnames=hostnames, **kwargs)
def IdentFile(dir_layout, fn, suffix): fn = Path(fn) if dir_layout == 'clean': lst = [fn.dirname() + '/_spiper/' + fn.basename(), suffix] # job_name, suffix] # input_ident_file = '{pre_dir}/_spiper/{pre_base}.{job_name}.{suffix}'.format(**locals()) elif dir_layout == 'flat': lst = [fn, suffix] # lst = [fn,job_name,suffix] else: assert 0, ("dir_layout", dir_layout) # input_ident_file = '{prefix}.{job_name}.{suffix}'.format(**locals()) input_ident_file = '.'.join(lst) return Path(input_ident_file)
def load_test_results(directory: Path) -> TestResults: """Takes path to directory with test results generated by network-tests2 as argument. Returns an instance of TestResults generated from it""" hostnames = tuple( read_benchmark_hostnames(directory.joinpath("network_hosts.txt"))) kwargs = { arg: load_nc_file_as_tables(hostnames, directory.joinpath(filename)) for (arg, filename) in [( "medians", "network_median.nc"), ( "means", "network_average.nc"), ("std_dev", "network_deviation.nc")] } return TestResults(name=directory.basename(), hostnames=hostnames, **kwargs)
def main(results_path): results_path = Path(results_path) for csv_path in glob.glob(results_path / "**" / "*.csv", recursive=True): csv_path = Path(csv_path) csv_basename = csv_path.basename()[:-4] csv_parent = csv_path.parent data = pd.read_csv(csv_path) columns = data.columns no2_columns = [c for c in columns if "NO2" in c] o3_columns = [c for c in columns if "O3" in c] base_columns = set(columns) - set(o3_columns) - set(no2_columns) - { "Unnamed: 0" } with open(csv_parent / ("%s-no2.tex" % csv_basename), 'w') as fp: fp.write(data[list(base_columns) + no2_columns].to_latex()) with open(csv_parent / ("%s-o3.tex" % csv_basename), 'w') as fp: fp.write(data[list(base_columns) + o3_columns].to_latex())
def save(self, list_path='grids.list', grid_ids=None): list_path = Path(list_path) grids_dx = {} with open(list_path, 'w') as f: f.write('%i\n' % self.grid.shape[0]) for i, chan in enumerate(self.grid): if grid_ids is not None and i not in grid_ids: continue if self.names is None: name = list_path.basename().stripext() + f'.{i}.dx' else: name = f'{self.names[i]}.dx' grid_dx = _Grid(grid=chan, origin=self.origin, delta=self.delta) grid_dx.export(list_path.dirname() / name) grids_dx[name] = name f.write(name + '\n')
def bootstrap(ctx, branch, projectpath='', projectname='', taskspath='tasks', configpath='config', utilspath='utils', virtualenv=True, upgradereqs=False): cwd = Path.getcwd() if projectpath: projectpath = Path(projectpath) os.chdir(projectpath) elif INITIAL_PATH.basename() == 'tasks': projectpath = INITIAL_PATH.parent() os.chdir(projectpath) else: projectpath = INITIAL_PATH if not projectname: projectname = str(projectpath.basename()) Config.project_name = projectname Config.virtualenv = virtualenv # TODO: parse local.cfg to Config if exists? Config.get_tasks = True Config.get_config = True Config.requirements = True # Install? get_tasks(ctx, taskspath) get_config(ctx, configpath, branch=branch) activate_virtualenv(ctx, projectname) install_requirements(ctx, upgrade=upgradereqs) clone(ctx, 'config/base.cfg') fetch(ctx) # SAO sao_install(ctx) os.chdir(cwd) sao_grunt(ctx) if Path.getcwd() != INITIAL_PATH: os.chdir(INITIAL_PATH)
def parse_hits(self, f, max_evalue=10.0): f = Path(f) query_name = f.basename().split('.seq')[0] query = self[query_name] if query.size == 1: return None lines = open(f).read().splitlines() if not lines: print 'No content: '+f return None aln_num = 0 hits = Hits() tids= set() for each in lines[::-1]: tid, e = each.split() e = float(e) tid = tid.split()[0].replace('lcl|','') if query.id == tid: continue if e > max_evalue: continue if tid in tids: continue else: tids.add(tid) target = self[tid] hits.append(Hit(query, target, e, standard=self.standard, db=getattr(self, "pair2JG_judge", None))) hits.seqlen = query.seqlen hits.id = query.id hits.superfamily = query.superfamily hits.fold = query.fold hits.family = query.family hits.size = query.size return hits
def abyss(log, cfg, r1, r2, out): out = Path(out) dir = out.dirname() f1 = dir.relpathto(r1) f2 = dir.relpathto(r2) prefix = out.basename().split('-')[0] #print f1, f2, 'name=%s' % prefix, 'k=%s' % 25 #cmd("in='%s %s'" % (f1, f2),'name=%s' % prefix, 'k=%s' % 25, C=dir, _err=log, _out=log) # cmd.wait() abyss_log = os.path.join(dir, "abyss.log") kmer = cfg.assembly.kmer cmd_template = r"abyss-pe in='{f1} {f2}' name={prefix} k={kmer} -C {dir} > {abyss_log} 2>&1" cmd = cmd_template.format(f1=f1, f2=f2, prefix=prefix, kmer=kmer, dir=dir, abyss_log=abyss_log) log.write('\nRunning:\n{command}\nwriting log to {abyss_log}'.format( command=cmd, abyss_log=abyss_log)) os.system(cmd)
def step_copytree_to_workdir(ctx, source_dir): """Copy a directory :param:`source_dir` -> workdir:basename(source_dir)/""" ensure_workdir_exists(ctx) source_dir = Path(source_dir).normpath() dest_dir = source_dir.basename() step_copytree_to_workdir_with_destdir(ctx, source_dir, dest_dir)
def main(): global SOURCE global OUTPUT global STYLE_FILE global STYLE_IMAGES global CHM parser = argparse.ArgumentParser() parser.add_argument('-s', '--style', required=True, help="Style sheet for docs") parser.add_argument('docs_source', help="QT docs source directory") parser.add_argument('docs_out', help="Output directory for chm files") args = parser.parse_args() SOURCE = Path(args.docs_source).abspath() OUTPUT = Path(args.docs_out).abspath() STYLE_FILE = Path(args.style).abspath() with open(SOURCE / 'qtdoc' / 'qtdoc.index', encoding='utf-8') as r: qt_version = re.search(r'<INDEX.*version="(.*?)"', r.read()) if not qt_version: raise RuntimeError("Failed to parse QT Docs version") qt_version = qt_version.group(1) STYLE_IMAGES = ( "ico_out.png", "ico_note.png", "ico_note_attention.png", "btn_prev.png", "btn_next.png", "home.png", "arrow_bc.png", "bgrContent.png", "bullet_dn.png", "bullet_sq.png", "logo.png", ) CHM = chm.DocChm(f'Qt-{qt_version}', default_topic='qtdoc/index.html', title=f'Qt {qt_version}') OUTPUT.mkdir_p() images_dir = OUTPUT / 'images' images_dir.mkdir_p() for image in STYLE_IMAGES: shutil.copy(SOURCE / 'qtdoc' / 'images' / image, images_dir / image) # put qtdoc first process_module(SOURCE / 'qtdoc') excluded_dirs = ('config', 'global', 'qtdoc') for module in SOURCE.dirs(): if module.basename() not in excluded_dirs: process_module(module) shutil.copy(STYLE_FILE, OUTPUT / STYLE_FILE.basename()) for image in STYLE_IMAGES: CHM.append(f"images\{image}") CHM.append(STYLE_FILE.basename()) with OUTPUT: CHM.save() print(f"QT Docs v.{qt_version} are ready for CHM compilation")
num_cpus = args.cpus ref_path = args.reference patient_zero = args.reference_name # fix fasta header names fa_fps = bs.get_filepaths(seqs_dir, data_fmt='fasta', data_type='', tech='') for fa_fp in fa_fps['IonXpress']: seq = SeqIO.read(fa_fp, 'fasta') seq.id = fa_fp.split('/')[-1].split('.')[0] SeqIO.write(seq, fa_fp, 'fasta') msa_dir = out_dir/'msa' if not Path.isdir(msa_dir): Path.mkdir(msa_dir) # copy reference sequence into folder containing fasta files copy(ref_path, seqs_dir); seqs_fp = bs.concat_fasta(seqs_dir, msa_dir/out_dir.basename()) msa_fn = seqs_fp.split('/')[-1].split('.')[0] + '_aligned.fa' msa_fp = Path(seqs_fp).parent/msa_fn if not Path.isfile(Path(msa_fp)): msa_fp = bs.align_fasta(seqs_fp, msa_fp, num_cpus=num_cpus); # load multiple sequence alignment msa_data = bs.load_fasta(msa_fp, is_aligned=True) muts_dir = out_dir/'mutations' if not Path.isdir(muts_dir): Path.mkdir(muts_dir) # identify insertions insertions = bm.identify_insertions(msa_data, meta_fp=None, data_src='jordan', patient_zero=patient_zero, min_ins_len=1)
num_samples_missing_coverage = ans[ ans['percent_coverage_cds'].isna()].shape[0] # compute number of samples below 90% coverage low_coverage_samples = ans[ans["percent_coverage_cds"] < 90] # ignore samples below 90% coverage ans = ans[ans["percent_coverage_cds"] >= 90] # generate concatenated consensus sequences if not dry_run: # Transfer files transfer_files(ans, out_dir, include_bams=include_bams, ncpus=num_cpus) msa_dir = out_dir / 'msa' if not Path.isdir(msa_dir): Path.mkdir(msa_dir) seqs_dir = Path(out_dir / 'fa') copy(ref_path, seqs_dir) seqs_fp = concat_fasta(seqs_dir, msa_dir / out_dir.basename()) # load concatenated sequences cns_seqs = SeqIO.parse(msa_dir / out_dir.basename() + '.fa', 'fasta') cns_seqs = list(cns_seqs) # generate files containing metadata for Github, GISAID, GenBank create_github_meta(ans, released_samples_fpath, git_meta_cols) create_gisaid_meta(ans, gisaid_meta_cols) assemble_genbank_release(cns_seqs, ans, genbank_meta_cols, out_dir / 'genbank') sra_dir = out_dir / 'sra' if not Path.isdir(sra_dir): Path.mkdir(sra_dir) input( f"\n Have you received the BioSample.txt files and placed them inside {sra_dir}? \n Press Enter to continue..." ) create_sra_meta(ans, sra_dir)
import numpy as np import matplotlib.pyplot as plt import pandas as pd from path import Path ercc = {} for cf in snakemake.input.counts: cf = Path(cf) name = cf.basename().replace('.count', '') if cf.getsize() == 0: ercc[name] = pd.Series() else: try: c = pd.read_csv(cf, sep="\t", index_col=0, header=None)[1] ercc[name] = c except: print("Error reading %s" % cf) exit(-1) ercc = pd.DataFrame(ercc).fillna(0) try: ercc = ercc.astype(int) except: print(ercc.iloc[:5,:5]) print(ercc.iloc[-5:,-5:]) raise ercc.index.name = 'ercc' ercc_rpm = 1e6 * (ercc / ercc.sum()) groups = pd.read_csv('group.tsv', sep="\t", index_col=0, names=['group'])
#!/usr/bin/python import sys from PIL import Image, ImageFilter from path import Path # Take the path to the image file as an argument imgPath = Path(sys.argv[1]) print imgPath, imgPath.basename() # We first apply the CONTOUR filter to get the contours # Then convert the image to black n white # Traverse the pixels to get max and min positions of Black img = Image.open(imgPath) contourImg = img.filter(ImageFilter.CONTOUR) binaryImg = contourImg.convert("1") rows, cols = binaryImg.size imgData = binaryImg.load() edgeDataX = [] edgeDataY = [] print "Image size = (%d,%d)" % (rows, cols) for x in xrange(rows): for y in xrange(cols): if imgData[x, y] == 0: edgeDataX.append(x) edgeDataY.append(y) print "\nEdge Points of cropped image: "
import numpy as np #import matplotlib.pyplot as plt import pandas as pd from path import Path ercc = {} for cf in snakemake.input.counts: cf = Path(cf) name = cf.basename().replace('.count', '') if cf.getsize() == 0: ercc[name] = pd.Series() else: try: c = pd.read_csv(cf, sep="\t", index_col=0, header=None)[1] ercc[name] = c except: print("Error reading %s" % cf) exit(-1) ercc = pd.DataFrame(ercc).fillna(0) try: ercc = ercc.astype(int) except: print(ercc.iloc[:5, :5]) print(ercc.iloc[-5:, -5:]) raise ercc.index.name = 'ercc' ercc_rpm = 1e6 * (ercc / ercc.sum()) groups = pd.read_csv('group.tsv', sep="\t", index_col=0, names=['group']) erccg = ercc.T
#!/usr/bin/python import sys from PIL import Image, ImageFilter from path import Path # Take the path to the image file as an argument imgPath = Path(sys.argv[1]) print imgPath, imgPath.basename() # We first apply the CONTOUR filter to get the contours # Then convert the image to black n white # Traverse the pixels to get max and min positions of Black img = Image.open(imgPath) contourImg = img.filter(ImageFilter.CONTOUR) binaryImg = contourImg.convert("1") rows, cols = binaryImg.size imgData = binaryImg.load() edgeDataX = [] edgeDataY = [] print "Image size = (%d,%d)" % (rows, cols) for x in xrange(rows): for y in xrange(cols): if imgData[x,y] == 0: edgeDataX.append(x) edgeDataY.append(y) print "\nEdge Points of cropped image: "
def get_template(self): """ Find, copy and load the template """ if '~' in self.template: self.template = str(Path(self.template).expanduser()) if Path(self.template).isdir(): if self.transient: lg.error("must specify a template name or file, not a dir") lg.error("when in transient mode") exit(-1) k3dir = Path(self.template) / 'k3' subdirs = k3dir.dirs() if k3dir.exists() else [] if len(subdirs) == 0: lg.warning("No template found") exit(-1) if len(subdirs) > 1: lg.error("Multiple templates found: %s", ", ".join([x.basename() for x in subdirs])) lg.error("Please specify one") exit() self.name = subdirs[0].basename() lg.info('template "%s" found in %s', self.name, self.template) if not self.template == '.': # not pointing at the current folder # copy the template (& argument file) template_file = Path(self.template) / 'k3' / \ self.name / 'template.k3' if template_file != self.template_file: template_file.copy(self.template_file) elif not self.template.endswith('.k3') and \ re.match('[A-Za-z_]\w*', self.template): if (Path('k3') / self.template / 'template.k3').exists(): if self.transient: lg.error("Not possible to use k3/template") lg.error("when in transient mode") exit(-1) # the template is present self.name = self.template else: template_file = Path(self.app.conf['default_template_dir'])\ .expanduser() / ('%s.k3' % self.template) if not template_file.exists(): lg.error("Cannot find template") exit(-1) self.name = self.template self.retrieve_template_file(template_file) elif Path(self.template).exists() and self.template.endswith('.k3'): # template points to a file - get it lg.debug("Found template file: %s", self.template) template_file = Path(self.template) self.name = template_file.basename().replace('.k3', '') self.retrieve_template_file(template_file) else: raise NotImplementedError("Need other source for templates: %s", self.template) lg.debug("Template name is: %s", self.name) if not self.template_file.exists(): lg.error("No valid template found in %s", self.workdir) exit(-1) lg.debug("Found template: %s", self.name) self.ctx['template']['name'] = self.name
def plotMem( massifFile, filter, # filter by timer name minTimeDiff, # filter by difference in beginning and end minMemDiff, # filter by change in memory usage shortTimers, # exclude short timers memUnit='MB', # unit for memory displayTimers=True): # first parse the log file valgrind created for us data = msparser.parse_file(massifFile) massifFile = Path(massifFile) cmd = data['cmd'] timeUnit = data['time_unit'] snapshots = data['snapshots'] times = [] memHeap = [] for s in snapshots: try: times.append(s['time']) memHeap.append(formatMem(s['mem_heap'], memUnit)) except: pass # now parse all the snapshot pairs we took in the timers # (We compile MueLu with MueLu_TIMEMONITOR_MASSIF_SNAPSHOTS=1 ) snapshotPairs = [] for f in Path('.').glob(massifFile + "*start.out"): fEnd = f.replace('start.out', 'stop.out') label = Path(f).basename().stripext().replace('.start', '') label = label.replace(massifFile.basename() + '.', '') try: label, counter = label.rsplit('.', 1) except: pass try: data = msparser.parse_file(f) dataEnd = msparser.parse_file(fEnd) assert data['time_unit'] == timeUnit assert dataEnd['time_unit'] == timeUnit data = data['snapshots'] dataEnd = dataEnd['snapshots'] assert (len(data)) == 1 assert (len(dataEnd)) == 1 assert data[0]['time'] <= dataEnd[0]['time'], f data[0]['label'] = label data[0]['counter'] = counter data[0]['mem_heap'] = formatMem(data[0]['mem_heap'], memUnit) dataEnd[0]['mem_heap'] = formatMem(dataEnd[0]['mem_heap'], memUnit) times.append(data[0]['time']) times.append(dataEnd[0]['time']) memHeap.append(data[0]['mem_heap']) memHeap.append(dataEnd[0]['mem_heap']) snapshotPairs += [(data[0], dataEnd[0])] except FileNotFoundError: print(f) # sort the snapshots times = np.array(times) memHeap = np.array(memHeap) idx = np.argsort(times) print('maximum heap memory usage: {}'.format(memHeap.max())) times = times[idx] memHeap = memHeap[idx] times = times[memHeap > minMemDiff] memHeap = memHeap[memHeap > minMemDiff] assert (len(times) > 0) # plot the curve of memory usage plt.plot(times, memHeap) if displayTimers: # now, filter through the snapshot pairs # otherwise, the plot becomes very messy filter = re.compile(filter) told = (-2 * minTimeDiff, -2 * minTimeDiff) snapshotPairsNew = [] for i, pair in enumerate( sorted(snapshotPairs, key=lambda x: x[0]['time'])): if (filter.search(pair[0]['label']) and abs(pair[0]['mem_heap'] - pair[1]['mem_heap']) > minMemDiff): t = [pair[0]['time'], pair[1]['time']] if (abs(t[0] - told[0]) < minTimeDiff and abs(t[1] - told[1]) < minTimeDiff): print('Timers "{}" and "{}" seems to coincide'.format( nameold, pair[0]['label'])) continue if (t[1] - t[0] < shortTimers): continue told = t nameold = pair[0]['label'] snapshotPairsNew.append(pair) snapshotPairs = snapshotPairsNew # stack the snapshot pairs height = max(memHeap) / len(snapshotPairs) for i, pair in enumerate( sorted(snapshotPairs, key=lambda x: x[0]['time'])): plt.gca().add_patch( patches.Rectangle((pair[0]['time'], i * height), pair[1]['time'] - pair[0]['time'], height, alpha=0.5, facecolor='red')) plt.text(pair[0]['time'], (i + 0.5) * height, tex_escape(pair[0]['label'])) # add vertical lines at start and end for each timer plt.plot([pair[0]['time'], pair[0]['time']], [0, max(memHeap)], '-', c='grey', alpha=0.5) plt.plot([pair[1]['time'], pair[1]['time']], [0, max(memHeap)], '-', c='grey', alpha=0.5) # add circles on these lines for memory usage at beginning and end plt.scatter([pair[0]['time'], pair[1]['time']], [pair[0]['mem_heap'], pair[1]['mem_heap']], c='r') plt.xlabel(timeUnit) plt.ylabel(memUnit) plt.title(tex_escape(cmd))
import os from path import Path ################################################################## # Application configuration ################################################################## PROJECT_DIR = Path(__file__).abspath().realpath().dirname().parent PROJECT_NAME = PROJECT_DIR.basename() ROOT_URLCONF = 'mysite_backend.urls' WSGI_APPLICATION = 'mysite_backend.wsgi.application' HOST = os.environ.get('BACKEND_SERVER_URL', 'http://localhost:8000') DEBUG = True ################################################################## # Language and timezone ################################################################## # https://en.wikipedia.org/wiki/List_of_tz_zones_by_name TIME_ZONE = 'Asia/Taipei' LANGUAGE_CODE = 'zh-hant' USE_TZ = False USE_I18N = True USE_L10N = True