def download(self): import zipfile import urllib dfile = config.path("..","data",self.datafile.upper(), self.datafile.upper() + ".zip") urllib.urlretrieve(config.download % self.datafile.lower(), dfile) with zipfile.ZipFile(dfile) as zf: zf.extractall(config.path("..","data",self.datafile.upper(),"data"))
def visualize_with_json(args, vis_coords, sequences, strand, result_coords, fasta_sequence, targets): # new function cutcoords = get_coordinates_for_json_visualization(args, vis_coords, sequences, strand, result_coords) info = open(f"{args.output_dir}/run.info", 'w') info.write("%s\t%s\t%s\t%s\t%s\n" % ("".join(args.targets), args.genome, args.program_mode, args.unique_method_cong, args.guide_size)) info.close() if args.bed: print_bed(args.program_mode, vis_coords, cutcoords, f"{args.output_dir}/results.bed", vis_coords[0]["name"] if args.fasta else args.targets) if args.gen_bank: if args.fasta: seq = fasta_sequence chrom = vis_coords[0]["name"] start = 0 finish = len(fasta_sequence) else: # targets min-max (with introns) regions = targets chrom = regions[0][0:regions[0].rfind(':')] start = [] finish = [] targets = [] for region in regions: start_r = int(region[region.rfind(':') + 1:region.rfind('-')]) start_r = max(start_r, 0) start.append(start_r) finish_r = int(region[region.rfind('-') + 1:]) finish.append(finish_r) targets.append([chrom, start_r, finish_r]) start = min(start) finish = max(finish) prog = subprocess.Popen("%s -seq=%s -start=%d -end=%d %s/%s.2bit stdout 2> %s/twoBitToFa.err" % ( config.path("TWOBITTOFA"), chrom, start, finish, config.path("TWOBIT_INDEX_DIR") if not config.isoforms else config.path("ISOFORMS_INDEX_DIR"), args.genome, args.output_dir), stdout=subprocess.PIPE, shell=True) output = prog.communicate() if prog.returncode != 0: sys.stderr.write("Running twoBitToFa failed when creating GenBank file\n") sys.exit(EXIT['TWOBITTOFA_ERROR']) output = output[0].decode() output = output.split("\n") seq = ''.join(output[1:]).upper() print_genbank(args.program_mode, chrom if args.fasta else args.targets, seq, [] if args.fasta else targets, cutcoords, chrom, start, finish, strand, f"{args.output_dir}/results.gb", "CHOPCHOP results")
def __init__ (self, datafile = ""): self.datafile = datafile self.features = {} self.tags = [] self.costs = [] self.categorical = [] self.continuous = [] self.parseCodebook() self.varTables = config.get(config.path("..","data",datafile,"data","varTables.p"), gf.read_tables, datafile = datafile) self.titleMap = config.get(config.path("..","data",datafile,"data","table_map.p"), self.writeTables) self.filterIDS() self.writeDataCSV() self.getCostFeatures()
def writeDataCSV(self): """ Download data """ def download(self): import zipfile import urllib dfile = config.path("..","data",self.datafile.upper(), self.datafile.upper() + ".zip") urllib.urlretrieve(config.download % self.datafile.lower(), dfile) with zipfile.ZipFile(dfile) as zf: zf.extractall(config.path("..","data",self.datafile.upper(),"data")) path = config.path("..","data",self.datafile, "data", self.datafile.lower()) if not config.os.path.exists(path + ".dat"): download(self) if config.os.path.exists(path + ".csv"): return indices = [self.features[tag][0] for tag in self.tags] printFormat = "".join(["%s" * (high - low) + "," for low,high in zip(indices, indices[1:])]) # Categorical Mapper Path with open(path+".csv", 'wb') as g: with open(path + ".dat", 'rb') as f: format_ = printFormat + "%s" * (len(f.readline().strip()) - indices[-1] + 1) for line in f: values = (format_ % (tuple(line.strip()))).split(",") for i,value in enumerate(values): try: val = str(float(values[i])) except: val = str(values[i]) g.write(",".join(values) + "\n")
def read_tables(datafile): """ From get_features.py Parses the HTML as plain text Returns dictionary of {titles:variables} """ path = config.path("..","data",datafile,"data", "tables.txt") if not config.os.path.exists(path): page = download(datafile) else: with open(path, 'rb') as f: page = f.read() start = page.find("<a name=\"DVariable\">") if start == -1: start = page.rfind("Variable-Source Crosswalk</a>") page = page[start:] end = page.rfind("<a name=\"Appendix") soup = Soup(page[:end]) titles, tables = [], [] found_tables = soup.find_all("table", summary= re.compile("This table identifies the variable .*")) for table in found_tables: title = table.caption if table.caption != None else table.find_previous_sibling("p", {"class":"contentStyle"}) titles.append(title.text.encode("utf-8")) tables.append([var.text.encode("utf-8") for var in table.find_all("th")[3:]]) if not (len(titles) == len(tables) and titles != [] and [] not in tables): return False return dict(zip(titles,tables))
def download(datafile): """ From get_features.py\n Downloads the documentation as text file from HTML """ try: page = urllib2.urlopen(config.tables.format(datafile.lower())).read() except: print "HTTP Failed: Check your connection to the internet or check the name of the datafile" sys.exit() with open(config.path("..","data",datafile,"data", "tables.txt"), 'wb') as f: f.write(page) return page
def writeFeatureImportance(model, trainFeature, datafile): """ Formats and prints the importance of each feature author: Jazmin TODO: right now it gets the actual name of the features in a HORRIBLE NOT EFFICIENT WAY make it better """ importances = zip(range(trainFeature.shape[1]), model.feature_importances_) importances.sort(key = itemgetter(1)) with open(config.path("..", "data", datafile, "featureImportance.py"), "wb") as f: f.write("importance = ") for featureIndex,importance in importances[::-1]: variable = config.feature_dict["H147"][featureIndex] f.write(" " + str(variable) + " " + str(importance) + " " + str(dc.getData(datafile)[0][variable][0]) + " \n")
def set_from_config(kwargs): if kwargs["config"] is None: config_path = config.path() else: config_path = kwargs["config"] kwargs["config_path"] = config_path kwargs["config"] = config.read(kwargs["config_path"]) keys = {"paths": [("prefs", "prefs_root", True), ("run_info", "run_info", True)], "web-platform-tests": [("remote_url", "remote_url", False), ("branch", "branch", False), ("sync_path", "sync_path", True)], "SSL": [("openssl_binary", "openssl_binary", True), ("certutil_binary", "certutil_binary", True), ("ca_cert_path", "ca_cert_path", True), ("host_cert_path", "host_cert_path", True), ("host_key_path", "host_key_path", True)]} for section, values in keys.iteritems(): for config_value, kw_value, is_path in values: if kw_value in kwargs and kwargs[kw_value] is None: if not is_path: new_value = kwargs["config"].get(section, config.ConfigDict({})).get(config_value) else: new_value = kwargs["config"].get(section, config.ConfigDict({})).get_path(config_value) kwargs[kw_value] = new_value kwargs["test_paths"] = get_test_paths(kwargs["config"]) if kwargs["tests_root"]: if "/" not in kwargs["test_paths"]: kwargs["test_paths"]["/"] = {} kwargs["test_paths"]["/"]["tests_path"] = kwargs["tests_root"] if kwargs["metadata_root"]: if "/" not in kwargs["test_paths"]: kwargs["test_paths"]["/"] = {} kwargs["test_paths"]["/"]["metadata_path"] = kwargs["metadata_root"] if kwargs.get("manifest_path"): if "/" not in kwargs["test_paths"]: kwargs["test_paths"]["/"] = {} kwargs["test_paths"]["/"]["manifest_path"] = kwargs["manifest_path"] kwargs["suite_name"] = kwargs["config"].get("web-platform-tests", {}).get("name", "web-platform-tests") check_paths(kwargs)
def set_from_config(kwargs): if kwargs["config"] is None: config_path = config.path() else: config_path = kwargs["config"] kwargs["config_path"] = config_path kwargs["config"] = config.read(kwargs["config_path"]) kwargs["test_paths"] = OrderedDict() keys = { "paths": [("serve", "serve_root", True), ("prefs", "prefs_root", True), ("run_info", "run_info", True)], "web-platform-tests": [ ("remote_url", "remote_url", False), ("branch", "branch", False), ("sync_path", "sync_path", True), ], } for section, values in keys.iteritems(): for config_value, kw_value, is_path in values: if kw_value in kwargs and kwargs[kw_value] is None: if not is_path: new_value = kwargs["config"].get(section, {}).get(config_value) else: new_value = kwargs["config"].get(section, {}).get_path(config_value) kwargs[kw_value] = new_value # Set up test_paths for section in kwargs["config"].iterkeys(): if section.startswith("manifest:"): manifest_opts = kwargs["config"].get(section) url_base = manifest_opts.get("url_base", "/") kwargs["test_paths"][url_base] = { "tests_path": manifest_opts.get_path("tests"), "metadata_path": manifest_opts.get_path("metadata"), } if kwargs["tests_root"]: if "/" not in kwargs["test_paths"]: kwargs["test_paths"]["/"] = {} kwargs["test_paths"]["/"]["tests_path"] = kwargs["tests_root"] if kwargs["metadata_root"]: if "/" not in kwargs["test_paths"]: kwargs["test_paths"]["/"] = {} kwargs["test_paths"]["/"]["metadata_path"] = kwargs["metadata_root"]
def writeFeatures(costFeature, datafile, importance , tags): """ Writes feature importances to file in order of importance Saves to pickle file for use in future modelling Takes in costFeature index of d.tags Takes in the model Returns the costFeature, Sorted list of feature indices based on importance """ sortedFeatures = sorted(zip(tags, list(importance)) , key = (lambda x:-x[1])) with open(config.path("..","data",datafile,"features", "importances", "%s.txt" % (costFeature)),'wb')as f: for feature, importance in sortedFeatures: write = "%s#%f\n" % (feature, importance) f.write(write.replace("#", (24 - len(write)) * " ")) return sortedFeatures
def writeTables(self): """ In data.py Writing tables to file for user to reference """ path = config.path("..","data",self.datafile,"data", "variables.txt") if config.os.path.exists(path): return with open(path, 'wb') as f: f.write("Variables found for data set %s\n" % self.datafile) i = 0 varMap = {} for title, tables in self.varTables.items(): f.write("\n\n=== %s :: %s ===\n" % (string.letters[i].upper(),title)) f.write("\n".join(["\t%s%s%s" % (tag, (18 - len(tag))*" ",self.features[tag][1]) for tag in tables if tag in self.features])) varMap[string.letters[i].upper()] = (title, [tag for tag in tables if tag in self.features]) i += 1 return varMap
def set_from_config(kwargs): if kwargs["config"] is None: kwargs["config"] = config.path() kwargs["config"] = config.read(kwargs["config"]) keys = {"paths": [("tests", "tests_root", True), ("metadata", "metadata_root", True)], "web-platform-tests": [("remote_url", "remote_url", False), ("branch", "branch", False), ("sync_path", "sync_path", True)]} for section, values in keys.iteritems(): for config_value, kw_value, is_path in values: if kw_value in kwargs and kwargs[kw_value] is None: if not is_path: new_value = kwargs["config"].get(section, {}).get(config_value, None) else: new_value = kwargs["config"].get(section, {}).get_path(config_value) kwargs[kw_value] = new_value
def set_from_config(kwargs): if kwargs["config"] is None: config_path = config.path() else: config_path = kwargs["config"] kwargs["config_path"] = config_path kwargs["config"] = config.read(kwargs["config_path"]) keys = {"paths": [("serve", "serve_root", True), ("prefs", "prefs_root", True), ("run_info", "run_info", True)], "web-platform-tests": [("remote_url", "remote_url", False), ("branch", "branch", False), ("sync_path", "sync_path", True)]} for section, values in keys.iteritems(): for config_value, kw_value, is_path in values: if kw_value in kwargs and kwargs[kw_value] is None: if not is_path: new_value = kwargs["config"].get(section, {}).get(config_value) else: new_value = kwargs["config"].get(section, {}).get_path(config_value) kwargs[kw_value] = new_value kwargs["test_paths"] = get_test_paths(kwargs["config"]) if kwargs["tests_root"]: if "/" not in kwargs["test_paths"]: kwargs["test_paths"]["/"] = {} kwargs["test_paths"]["/"]["tests_path"] = kwargs["tests_root"] if kwargs["metadata_root"]: if "/" not in kwargs["test_paths"]: kwargs["test_paths"]["/"] = {} kwargs["test_paths"]["/"]["metadata_path"] = kwargs["metadata_root"]
help = "number of trees to use for decision tree algorithms") parse.add_option("-l", "--lookup", dest = "lookup", default = "", help = "looks up specific variable and prints descriptions and values") parse.add_option("-u", "--use", dest = "model", default = "", help = "use an extracted model to predict costs") parse.add_option("-e", "--extract", dest = "extract", default = "", help = "target cost to create a model for future use") (options, args) = parse.parse_args() output = sys.stdout if options.tables != "none": print "Looking up tables, please wait..." # sys.stdout = open("runOutput.txt", 'wb') d = config.get(config.path("..","data",options.datafile,"data","dHandler.p"), dc.Data, datafile = options.datafile) print "Cost Features:\n%s" % "\n".join([d.tags[tag] for tag in d.costs]) # sys.stdout = output variable_lookup(d, options.tables) sys.exit() if options.lookup != "": import feature_lookup as fl d = config.get(config.path("..","data",options.datafile,"data","dHandler.p"), dc.Data, datafile = options.datafile) print "Looking up feature, please wait..." # sys.stdout = open("runOutput.txt", 'wb') # sys.stdout = output print "=======================================" print fl.getDetails(options.datafile, options.lookup)["Description"] print print fl.getDetails(options.datafile, options.lookup)["Values"]
#!/usr/bin/env python """ Jappix Me - Your public profile, anywhere Pending profile checker License: AGPL Author: Valerian Saliou """ import xmpp, os, shutil, time, phpserialize, config BASE_DIR = config.path() ############### ### MESSAGE ### ############### def message_app_send(session, user, body, app_data): url = xmpp.Node("url", payload=[app_data["url"]]) action = xmpp.Node( "action", attrs={"type": app_data["type"], "job": app_data["job"], "success": app_data["success"]} ) data = xmpp.Node("data", attrs={"xmlns": "jappix:app:" + app_data["id"]}, payload=[action, url]) name = xmpp.Node("name", attrs={"id": app_data["id"]}, payload=[app_data["name"]])
from redis import Redis from flask import Flask, session, redirect, url_for, render_template, request from pushchat.gravatar import get_gravatar from pushchat.validators import validate_email from pushchat import publisher import config app = Flask(__name__, template_folder=config.path('templates'), static_folder=config.path('static')) redis = None def get_post(post_id): """Given a post id, return a dictionary with the body and avatar url.""" post = redis.get('post:%s' % (post_id,)) if post: user, post = post.split('|', 1) post = unicode(post, errors='ignore') return dict(body=post, user=get_gravatar(user, size=16)) def set_post(post): """Store a post and push it onto the global timeline.""" if 'email' not in session: return redirect(url_for('.login')) post_id = redis.incr('last-post-id') post = post[:140] # silently truncate! redis.set('post:%d' % (post_id,), '%s|%s' % (session['email'], post)) redis.lpush('global:timeline', post_id)
def __str__(self): return (open(config.path("..","data",self.datafile,"data", "variables.txt"), 'rb')).read()
def parseCodebook(self): """ Given the datafile name, returns the codebook needed author: chris """ import urllib2, unicodedata def download(path): page = urllib2.urlopen(config.codebook.format(self.datafile.lower())).read() with open(path, 'wb') as f: f.write(page) return page path = config.path("..","data",self.datafile,"data","codebook.txt") if not config.os.path.exists(path): page = download(path) else: with open(path, 'rb') as f: page = f.read() _input = page.find("* INPUT STATEMENTS;") _format = page.find("* FORMAT STATEMENTS;") _label = page.find("* LABEL STATEMENTS;") _value = page.find("* VALUE STATEMENTS;") indices = page[_input:_format] mapping = page[_format:_label] desc = page[_label: _value] values = page[_value:] for line in indices.split("\n")[3:]: if line.strip() == ";": break split = line.split() self.tags.append(split[-2].strip()) self.features[split[-2].strip()] = [int(split[-3].strip()[1:])] for line in desc.split("\n")[1:]: if line.strip() == ";": break split = line.split("=") self.features[split[0].strip().split()[-1]].append(split[1].strip()) mapper = {} for line in mapping.split("\n")[1:]: if line.strip() == ";": break split = line.split() mapper[split[-1].strip()[:-1]] = split[-2].strip() tag = "" value_list = [] count = 0 cost_tags = [self.tags[cost] for cost in self.costs] for line in values.split("\n")[1:]: if line.strip() == "": continue if "VALUE" in line[:6]: tag = mapper[line.split()[1].strip()] continue if "=" in line: split = line.split("=") value_list.append((split[0].strip(), split[1].strip())) if ";" == line.strip()[0]: check = value_list[-1][-1] if "-" in check and check.split("-")[-1].strip()[0] in ["$","0","1","2","3","4","5","6","7","8","9"]: self.continuous.append(self.tags.index(tag)) else: self.categorical.append(self.tags.index(tag)) self.features[tag].append(value_list) value_list = [] continue return