def is_valid_gzip(fn, lazy=False, use_pigz=False): ''' We could instead use gunzip -t to check, but that actual requires iterating through the whole file, which is very slow. This is lazy, but at least it makes sure that it's a gzip file. lazy simply tries to see if the first 10 lines can be read. It isn't very safe. use_pigz uses pigz instead of gzip. A bad idea if a number of processes have already been spawned. ''' if lazy: try: cc("gzip -dc %s | head &>/dev/null" % fn, shell=True) return True except CalledProcessError: return False # lazy has already returned. This is the "else". cmd = ("pigz" if use_pigz else "gzip") + " -dc " try: cc(cmd + " -t " + fn, shell=True) sys.stderr.write(fn + " is valid") return True except CalledProcessError: sys.stderr.write("Corrupted file " + fn + ". Delete, try again.") return False
def get_gi2tax(folder=FOLDER): if not os.path.isdir(folder): os.makedirs(folder) target = folder + "/gi_taxid_nucl.dmp" if not os.path.isfile(target): cc("curl %s | gzip -dc > %s" % (GI2TAX_MAP_PATH, target), shell=True) return target
def append_old_to_new(gi2tax_map, newmap, concat_map, folder=FOLDER): paths = co("find %s -name '*.fna'" % folder, shell=True) if isinstance(paths, bytes): paths = paths.decode() paths = paths.split() ofh = open(concat_map, "w") ofw = ofh.write print("Length of accepted things: %i" % len(gi2tax_map)) found, missing = set(), set() for path in paths: sys.stderr.write("Processing path %s" % path) fl = xfirstline(path) ptoks = fl.split("|") name = ptoks[3] key = int(ptoks[1]) try: val = gi2tax_map[key] ofw("%s\t%i\n" % (name, val)) found.add(path) except KeyError: missing.add(int(fl.split("|")[1])) print("Missing: " + str(missing)) ofh.close() cc("cat %s >> %s" % (newmap, concat_map), shell=True) return concat_map, found
def fetch_genomes(folder, names=NAMES): for name in names: ftp_path = "/".join([ARCHIVE_BASE, name]) cstr = ("wget -N -m -np -nd -e robots=off -P" " %s/%s -A .fna,.fna.gz %s") % (folder, name, ftp_path) cc(cstr, shell=True) fetch_i100(folder)
def main(): os.chdir(Path(__file__).resolve().parent) # ============== # === Client === # ============== get_url( 'client/static/vendor/mithril.min.js', 'https://cdnjs.cloudflare.com/ajax/libs/mithril/2.0.0-rc.4/mithril.min.js' ) #'https://unpkg.com/mithril/mithril.min.js') get_url('client/static/vendor/lodash.min.js', 'https://unpkg.com/lodash/lodash.min.js') get_url( 'client/static/vendor/msgpack-lite.min.js', 'https://cdnjs.cloudflare.com/ajax/libs/msgpack-lite/0.1.26/msgpack.min.js' ) get_url( 'client/static/vendor/system.min.js', 'https://raw.githubusercontent.com/systemjs/systemjs/master/dist/system.min.js' ), npmmodules = [ '@types/lodash', '@types/mithril', '@types/msgpack-lite', ] cc(['yarn', 'add'] + npmmodules, cwd="client") # ============== # === Server === # ============== # Node modules npmmodules = [ '@types/node', 'lodash', '@types/lodash', # Hyperscript for HTML templating 'hyperscript', '@types/hyperscript', # Express-related modules 'express', '@types/express', 'body-parser', '@types/body-parser', 'cookie-parser', '@types/cookie-parser', 'express-session', '@types/express-session', 'memorystore', # '@types/memorystore', 'csurf', '@types/csurf', # I always end up using websockets for something 'ws', '@types/ws', 'msgpack-lite', '@types/msgpack-lite', ] cc(['yarn', 'add'] + npmmodules, cwd="server")
def get_acceptable_taxids(taxmap, path=PATH): ret = set() cc("curl %s > tax_summary.txt" % path, shell=True) for line in open("tax_summary.txt"): toks = line.split() if toks[0] == "Accession": continue tax = int(toks[3]) if tax in ret: continue else: ret |= fill_set_from_tax(tax, taxmap) print("Size of ret: %i" % len(ret)) print("Acceptable:", ret) # cc("rm tax_summary.txt", shell=True) return ret
def make_output(paths, outpath): if not outpath: if not paths: print("Missing input in folder") return fld = "/".join(paths[0].split("/")[:-1]) toks = paths[0].split("/")[-1].split("_") toks[2] = "LALL" toks[-1] = "ALL.fastq.gz" outpath = fld + "/" + "_".join(toks) # print("Outpath: %s" % outpath) cstr = "cat " + " ".join(paths) + " > " + outpath # print(cstr) cc(cstr, shell=True) if is_valid_gzip(outpath): [cc("rm -f " + path, shell=True) for path in paths] return outpath
def retry_cc(tup): cstr, die = tup RETRY_LIMIT = 10 r = 0 while r < RETRY_LIMIT: try: print(cstr, file=sys.stderr) cc(cstr, shell=True) return except CalledProcessError: print("retry number", r, file=sys.stderr) r += 1 if die: raise Exception("Could not download via %s " "even after %i attempts." % (cstr, RETRY_LIMIT)) else: sys.stderr.write("Could not download %s even after %i attempts" % (cstr, RETRY_LIMIT))
def main(): args = getopts() gi2tax = get_gi2tax(args.folder) if args.no_download is False: fetch_genomes(args.folder) print("Getting acceptable taxids") taxmap = build_full_taxmap(args.taxonomy) acceptable_taxids = get_acceptable_taxids(taxmap) print("Appending old to new") concat, found = append_old_to_new(parse_gi2tax(gi2tax, acceptable_taxids), args.new_refseq_nameid_map, args.combined_nameid_map, args.folder) cc("sort {0} | uniq > tmp.zomg && mv tmp.zomg {0}".format(concat), shell=True) nl = int(co("wc -l %s" % concat, shell=True).decode().split()[0]) sys.stderr.write("Concatenated file of total lines " "%i is written to %s.\n" % (nl, concat)) with open(args.found if args.found else "found_paths.txt", "w") as f: for path in found: f.write(path + "\n") return 0
def main(): global TAX_PATH tax_path = TAX_PATH # Make global variable local args = getopts() ref = args.ref if args.ref else "ref" if argv[1:] and argv[1] == "nodes": if not os.path.isfile("%s/nodes.dmp" % ref): cc("curl {tax_path} -o {ref}/" "taxdump.tgz && tar -zxvf {ref}/taxdump.tgz" " && mv nodes.dmp {ref}/nodes.dmp".format(**locals()), shell=True) return 0 if not os.path.isdir(ref): os.makedirs(ref) clades = args.clades if args.clades else DEFAULT_CLADES for clade in clades: try: assert clade in ALL_CLADES_MAP or clade in ["all", "default"] except AssertionError: print("Clade %s not 'all', 'default', or one of the valid " "clades: %s" % (clade, ALL_CLADES_STR), file=sys.stderr) sys.exit(ExitCodes.EXIT_FAILURE) to_dl = get_clade_map(clades) print("About to download clades %s" % ", ".join(to_dl), file=sys.stderr) nameidmap = {} for clade in to_dl: cladeidmap = {} if not os.path.isdir(ref + "/" + clade): os.makedirs(ref + "/" + clade) if not os.path.isfile("%s/%s/as.%s.txt" % (ref, clade, clade)): cstr = ("curl %s/assembly_summary.txt " "-o %s/%s/as.%s.txt") % (to_dl[clade], ref, clade, clade) print(cstr) cc(cstr, shell=True) to_dl[clade] = parse_assembly("%s/%s/as.%s.txt" % (ref, clade, clade), cladeidmap) spoool = multiprocessing.Pool(args.threads) spoool.map(check_path_lazy if args.lazy else check_path, ("/".join([ref, clade, s.split("/")[-1]]) for s in to_dl[clade])) cstrs = [("curl %s -o %s/%s/%s" % (s, ref, clade, s.split("/")[-1])) for s in to_dl[clade] if not os.path.isfile( "%s/%s/%s" % (ref, clade, s.split("/")[-1]))] # If nodes.dmp hasn't been downloaded, grab it. if not os.path.isfile("%s/nodes.dmp" % ref): cstrs.append("curl {tax_path} -o {ref}/" "taxdump.tgz && tar -zxvf {ref}/taxdump.tgz" " && mv nodes.dmp {ref}/nodes.dmp".format(**locals())) spoool.map(retry_cc, ((cs, args.die) for cs in cstrs)) # Replace pathnames with seqids for fn in list(cladeidmap.keys()): try: cladeidmap[xfirstline("/".join( [ref, clade, fn] )).decode().split()[0][1:]] = cladeidmap[fn] del cladeidmap[fn] except FileNotFoundError: if args.die: raise pass nameidmap.update(cladeidmap) print("Done with all clades", file=sys.stderr) with open(ref + "/" + args.idmap, "w") as f: fw = f.write for k, v in nameidmap.items(): fw(k + "\t" + str(v) + "\n") return ExitCodes.EXIT_SUCCESS
def check_path(fn, lazy=False): print("Checking path " + fn) if os.path.isfile(fn): if not is_valid_gzip(fn, lazy=lazy): cc("rm " + fn, shell=True)
def set_clipboard(test_str): cc([CB, test_str])
def application(environ, start_response): ItsMe = False xiia = False auth = False vlc = False response_body = None path = os.path.normpath(environ['PATH_INFO']) files = sorted(os.listdir(os.environ['OPENSHIFT_DATA_DIR'] + 'xml'), key=lambda x: (x.split('.')[-1], x.lower())) shows = getpls(None).allpro redirect = None # print('\n'.join(['%s: %s' % (key, value) for key, value in sorted(environ.items()) if key == 'HTTP_REFERER' or key == 'REQUEST_URI' or key == 'PATH_INFO' or key == 'QUERY_STRING' or key == 'wsgi.input'])) if 'HTTP_COOKIE' in environ: rcookie = SimpleCookie(environ['HTTP_COOKIE']) if 'session' in rcookie and rcookie['session'].value == 'ItsMe' or rcookie['session'].value == 'itsme': ItsMe = True # elif 'session' in rcookie and rcookie['session'].value == 'malonso' and path == '/nextgp': # ItsMe = True if 'HTTP_USER_AGENT' in environ: if 'Dalvik/' in environ['HTTP_USER_AGENT'] or 'Lavf/' in environ['HTTP_USER_AGENT']: xiia = True if 'HTTP_AUTHORIZATION' in environ: if environ['HTTP_AUTHORIZATION'].split(' ')[-1] == 'cGktdG9uOmVsY2Fsb3JldA==': auth = True elif 'LibVL' in environ['HTTP_USER_AGENT']: xiia = True if 'HTTP_AUTHORIZATION' in environ: if environ['HTTP_AUTHORIZATION'].split(' ')[-1] == 'cGktdG9uOmVsY2Fsb3JldA==': vlc = True if 'QUERY_STRING' in environ: if environ['QUERY_STRING'].startswith('redirect='): redirect = os.path.normpath(environ['QUERY_STRING']) if path == '/' and ItsMe is True: response_body = ['<tr><td style="text-align:left;"><a href="/xml/{}" download>{}</a></td><td style="text-align:right;">{} kB</td><td style="text-align:right;">{}</td></tr>'.format(f, f, round(os.stat(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/' + f).st_size / 1024, 1), strftime('%-d/%m at %H:%M', localtime(os.stat(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/' + f).st_mtime))) for f in files if not f.startswith('.')] response_body.append('''<tr><td style="text-align:center;padding-top:25px;"><button onclick="go('/daily');">Daily</button></td><td></td><td style="text-align:center;padding-top:25px;"><button onclick="go('/hourly');">Hourly</button></td></tr></table></center><script type="text/javascript">function changetext(text){over=document.querySelector("#over");document.querySelector("#result").textContent=text;setTimeout(function(){over.style.display="none";location.reload();},2e3);}function go(cual){document.querySelector("#over").style.display="block";var xmlhttp=new XMLHttpRequest();xmlhttp.open("GET",cual);xmlhttp.onreadystatechange=function(){if(xmlhttp.readyState==4&&xmlhttp.status==200){changetext(xmlhttp.responseText);}else{changetext(xmlhttp.statusText+" "+xmlhttp.status);}};xmlhttp.send(null);}</script></body></html>''') response_body.insert(0, '<!DOCTYPE html><html><head><meta content="charset=UTF-8"/><title>pi-ton</title></head><style>td {padding: 3px;}</style><body><center><div id="over"style="display:none;position:fixed;top:0%;left:0%;width:100%;height:100%;background-color:black;-moz-opacity:0.8;opacity:.80;filter:alpha(opacity=80);"><p id="result"style="color:red;margin-top:20%;font-weight:bolder;font-size:25px;">...</p></div><table style="margin-top:8%;"><th>Archivo</th><th>Tamaño</th><th style="width:150px;text-align: right;">Fecha modif.</th>') response_body = ''.join(response_body) ctype = 'text/html; charset=UTF-8' elif path == '/login' and ItsMe is False: try: length = int(environ['CONTENT_LENGTH']) pwd = environ['wsgi.input'].read(length).decode().replace('session=', '') if pwd == 'ItsMe' or pwd == 'itsme' or pwd == 'malonso': cookie = SimpleCookie() cookie['session'] = pwd cookie['session']['path'] = '/' cookie['session']['max-age'] = '864000' cookieheaders = ('Set-Cookie', cookie['session'].OutputString()) if pwd == 'malonso': response_headers = [cookieheaders, ('Location', '/nextgp')] elif redirect is None or redirect == '/': response_headers = [cookieheaders, ('Location', '/')] else: response_headers = [cookieheaders, ('Location', '{}'.format(parse_qs(redirect)['redirect'][0]))] start_response('302 Found', response_headers) return ['1'] raise Exception except: response_body = '''<!DOCTYPE html><html><head><meta content="charset=UTF-8"/><title>pi-ton</title></head><body><center><form action=""method="post"><input name="session"type="text"size="10"placeholder="And you are...?"style="margin-top:20%;text-align:center"autofocus required><input type="submit"value="Submit"style="display:none"></form></center></body></html>''' ctype = 'text/html; charset=UTF-8' elif path == '/nextgp' and ItsMe is True: response_body = mcal().nextgptext ctype = 'text/plain; charset=UTF-8' elif path == '/report' and ItsMe is True: try: length = int(environ['CONTENT_LENGTH']) w = open(os.environ['OPENSHIFT_LOG_DIR'] + 'report.log', 'a') w.write(environ['wsgi.input'].read(length).decode() + '\n') w.close() response_body = 'ok' except: r = open(os.environ['OPENSHIFT_LOG_DIR'] + 'report.log', 'r') response_body = ''.join(list(reversed(r.readlines()))) r.close() ctype = 'text/plain; charset=UTF-8' elif path.startswith('/xml/') and path.split('/')[-1] in files and ItsMe is True: r = open(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/' + path.split('/')[-1], 'r') response_body = r.read() r.close() ctypes = {'json': 'application/json; charset=UTF-8', 'xml': 'application/xml; charset=UTF-8'} ctype = ctypes[path.split('.')[-1]] # elif path == '/xml/lostoros.xml': # response_body = '''<!DOCTYPE html><html><head><title>404 Not Found</title></head><body bgcolor="white"><h1>404 Not Found</h1><p>The URL you requested could not be found.</p><hr><address>Apache/2.2.15 (Red Hat) Server at pi-ton.rhcloud.com Port 80</address></body></html>''' # response_headers = [('content-type', 'text/html; charset=UTF-8'), ('content-length', str(len(response_body.encode('utf8'))))] # start_response('404 Not Found', response_headers) # return [response_body.encode('utf8')] # elif path == '/xml/lostoros.xml': # r = open(os.environ['OPENSHIFT_DATA_DIR'] + 'xml/lostoros.xml', 'r') # response_body = r.read() # r.close() # ctype = 'application/xml; charset=UTF-8' elif path.startswith('/pls/') and path.endswith('.pls') and path.split('/')[-1].replace('.pls', '') in shows: if ItsMe is True: ctype = 'audio/x-scpls' response_body = getpls(path.split('/')[-1].replace('.pls', '')).joinedpls elif xiia is True: if auth is True: location = getpls(path.split('/')[-1].replace('.pls', '')).joinedpls.split('\n')[1].replace('File1=', '') start_response('302 Found', [('Location', location)]) return ['1'] elif vlc is True: response_body = getpls(path.split('/')[-1].replace('.pls', '')).joinedpls start_response('200 OK', [('Content-Type', 'audio/x-scpls')]) return [response_body.encode()] else: response_body = '''<!DOCTYPE html><html><head><meta content="charset=UTF-8"/><title>pi-ton</title></head><body><center><form action="/login"method="post"><input name="session"type="text"size="10"placeholder="And you are...?"style="margin-top:20%;text-align:center"autofocus required><input type="submit"value="Submit"style="display:none"></form></center></body></html>''' response_headers = [('content-type', 'text/html; charset=UTF-8'), ('content-length', str(len(response_body.encode('utf8')))), ('WWW-Authenticate', 'Basic realm="pls@pi-ton"')] start_response('401 Unauthorized', response_headers) return [response_body.encode('utf8')] else: if redirect is None: start_response('302 Found', [('Location', '/')]) else: start_response('302 Found', [('Location', '/login?redirect={}'.format(path))]) return ['1'] elif path.startswith('/pls/') and path.endswith('.txt') and path.split('/')[-1].replace('.txt', '') in shows: if ItsMe is True: ctype = 'text/plain; charset=UTF-8' response_body = getpls(path.split('/')[-1].replace('.txt', '')).joinedpls else: if redirect is None: start_response('302 Found', [('Location', '/')]) else: start_response('302 Found', [('Location', '/login?redirect={}'.format(path))]) return ['1'] elif path == '/daily' or path == '/hourly' and ItsMe is True: sp = cc(['sh', './app-root/repo/.openshift/cron/{}/runner'.format(path.replace('/', '')), 'echo']) response_body = 'fail' if sp == 0: response_body = 'ok' ctype = 'text/plain; charset=UTF-8' # elif path == '/env': # response_body = '\n'.join(['%s: %s' % (key, value) for key, value in sorted(environ.items())]) # ctype = 'text/plain' # response_body = ['%s: %s' % (key, value) for key, value in sorted(environ.items())] # response_body.append('SCRIPT_NAME: {}'.format(environ['SCRIPT_NAME'])) # response_body = '\n'.join(response_body) elif path == '/logout': if 'HTTP_COOKIE' in environ: dcookie = SimpleCookie(environ['HTTP_COOKIE']) if 'session' in dcookie and dcookie['session'].value == 'ItsMe' or dcookie['session'].value == 'itsme' or dcookie['session'].value == 'malonso': dcookie['session']['expires'] = 'expires=Thu, 01 Jan 1970 00:00:00 GMT' cookieheaders = ('Set-Cookie', dcookie['session'].OutputString()) response_headers = [cookieheaders, ('Location', '/login')] start_response('302 Found', response_headers) return ['1'] else: if ItsMe is True: if redirect is None: start_response('302 Found', [('Location', '/')]) else: start_response('302 Found', [('Location', '{}'.format(parse_qs(redirect)['redirect'][0]))]) return ['1'] if path == '/': start_response('302 Found', [('Location', '/login'.format(path))]) else: start_response('302 Found', [('Location', '/login?redirect={}'.format(path))]) return ['1'] # always It's OK, okeeeya!? status = '200 OK' if ctype == 'audio/x-scpls': response_headers = [('Content-Type', ctype)] start_response(status, response_headers) return [response_body.encode()] response_headers = [('Content-Type', ctype), ('Content-Length', str(len(response_body.encode('utf8'))))] start_response(status, response_headers) return [response_body.encode('utf8')]
def fetch_i100(folder): if not os.path.isdir(folder + "/i100"): os.makedirs(folder + "/i100") cstr = ("wget -N -m -np -nd -e robots=off -P %s/i100 -A .gz " "http://www.bork.embl.de/~mende/simulated_data/") % folder cc(cstr, shell=True)