def create_term_file(self): kwargs = {} if 'oldCols' in self.options: kwargs['names'] = self.options['oldCols'].split(',') if 'read_csv' in self.options: for kv_str in self.options['read_csv'].split(','): kv = kv_str.split('=') kwargs[kv[0]] = kv[1] if kv[1] == 'None': kwargs[kv[0]] = None if kv[0].lower() == 'skiprows': kwargs[kv[0]] = int(kv[1]) iter_csv = util.read_csv(self.fn_source, iterator=True, chunksize=self.get_chunksize(), dtype=str, **kwargs) term_id_col = 'term_id' if 'term_id' not in self.column_map else self.column_map[ 'term_id'] term_ids = [] for chunk in iter_csv: term_ids += util.unique(chunk[term_id_col]) term_ids = util.unique(term_ids) with open(self.fn_dest, "w") as myfile: wr = csv.writer(myfile) wr.writerow(['term_id', 'term_name', 'term_type']) wr.writerows([[ term_id, self.term_name if self.term_name else term_id, self.options['typeName'] ] for term_id in term_ids])
def format(data): """Takes a list of MetricResults.""" # Distinct values in data for forest plot variables: forestcombinations = util.combinations( [util.unique(x, data) for x in config.fgraphs]) forestcombinations = [ dict(zip(config.fgraphs, x)) for x in forestcombinations ] forestcombinations = filter(lambda x: x["benchmark"] in config.forests, forestcombinations) # Distinct values in data for bar chart variables: barcombinations = util.combinations( [util.unique(x, data) for x in config.graphs]) barcombinations = [dict(zip(config.graphs, x)) for x in barcombinations] barcombinations = filter(lambda x: not x["benchmark"] in config.forests, barcombinations) # Filter out omitted configurations. allcombinations = forestcombinations + barcombinations for omit in config.omit: sieve = lambda x: not util.all( [x[y] == omit[y] for y in omit.keys() if x.has_key(y)]) allcombinations = filter(sieve, allcombinations) # Create the graph objects. figures = [] for configuration in allcombinations: if configuration in forestcombinations: graphtype = ForestPlot else: graphtype = BarChart sorteddata = graphtype.sortdata(data, configuration) if sorteddata: graph = graphtype(sorteddata) else: continue graph.create() figures.append(graph) return figures
def do_one_chunk(self, chunk): rows = [] data_key_col = self.key_col if self.key_col not in self.column_map else self.column_map[ self.key_col] data_value_col = self.value_col if self.value_col not in self.column_map else self.column_map[ self.value_col] tax_id_col = 'tax_id' if 'tax_id' not in self.column_map else self.column_map[ 'tax_id'] for k, g in chunk.groupby(data_key_col, as_index=False): #Tracer()() r = {} r[self.key_col] = self.get_term_prefix(self.key_col) + str(k) allids = self.get_term_prefix( self.value_col) + g[data_value_col].astype(str) if self.value_col == 'gid': allids = [x for x in util.unique(allids) if str(x).isdigit()] else: allids = util.unique(allids) r[self.new_value_col] = ','.join(allids) r['id_count'] = len(allids) r['ds'] = self.ds if hasattr(g.iloc[0], tax_id_col): all_tax_ids = g[tax_id_col].astype(str) r['tax_id'] = ','.join(util.unique(all_tax_ids)) else: r['tax_id'] = '9606' rows.append(r) return rows
def format(data): """Takes a list of MetricResults.""" # Distinct values in data for forest plot variables: forestcombinations = util.combinations([util.unique(x, data) for x in config.fgraphs]) forestcombinations = [dict(zip(config.fgraphs, x)) for x in forestcombinations] forestcombinations = filter(lambda x: x["benchmark"] in config.forests, forestcombinations) # Distinct values in data for bar chart variables: barcombinations = util.combinations([util.unique(x, data) for x in config.graphs]) barcombinations = [dict(zip(config.graphs, x)) for x in barcombinations] barcombinations = filter(lambda x: not x["benchmark"] in config.forests, barcombinations) # Filter out omitted configurations. allcombinations = forestcombinations + barcombinations for omit in config.omit: sieve = lambda x: not util.all([x[y] == omit[y] for y in omit.keys() if x.has_key(y)]) allcombinations = filter(sieve, allcombinations) # Create the graph objects. figures = [] for configuration in allcombinations: if configuration in forestcombinations: graphtype = ForestPlot else: graphtype = BarChart sorteddata = graphtype.sortdata(data, configuration) if sorteddata: graph = graphtype(sorteddata) else: continue graph.create() figures.append(graph) return figures
def _input_outputs(self, canonicalize=identity): """ Find the inputs and outputs of the complete computation """ allin = map(canonicalize, unique(chain(*[c.inputs for c in self.computations]))) allout = map(canonicalize, unique(chain(*[c.outputs for c in self.computations]))) inputs = remove(allout.__contains__, allin) outputs = remove(allin.__contains__, allout) ident_inputs = [i for c in self.computations if isinstance(c, Identity) for i in c.inputs] ident_outputs = [o for c in self.computations if isinstance(c, Identity) for o in c.outputs] return tuple(inputs + ident_inputs), tuple(outputs + ident_outputs)
def duplicates(self, tuples): """ Takes a list of tuples, and for each tuple that occurs mutiple times marks all but one of the occurences (in the mask that is returned). :param tuples: A size (batch, k, rank) tensor of integer tuples :return: A size (batch, k) mask indicating the duplicates """ b, k, r = tuples.size() # unique = ((tuples.float() + 1) ** primes).prod(dim=2) # unique identifier for each tuple unique = util.unique(tuples.view(b * k, r)).squeeze().view(b, k) sorted, sort_idx = torch.sort(unique, dim=1) _, unsort_idx = torch.sort(sort_idx, dim=1) mask = sorted[:, 1:] == sorted[:, :-1] # mask = mask.view(b, k - 1) zs = torch.zeros(b, 1, dtype=torch.uint8, device='cuda' if tuples.is_cuda else 'cpu') mask = torch.cat([zs, mask], dim=1) return torch.gather(mask, 1, unsort_idx)
def do(sequences): L = len(sequences[0]) # length of aligned sequences N = len(sequences) # number of sequences weights = [] for i in range(L): aa = check_mutation_position.do(sequences, i) freq = util.calc_frequency(aa) uniq_aa = util.unique(aa) # compute sequence weights w = [] for i in range(N): if aa[i] == '-' or aa[i] == 'X': w.append(0) else: if aa[i] == 'B': aa[i] = 'N' elif aa[i] == 'Z': aa[i] = 'Q' w.append(1.0 / (len(uniq_aa) * freq[aa[i]])) weights.append(w) # N x L matrix, for each position find the weights # compute average of w over all positions avg_weight = np.zeros(N) for i in range(L): avg_weight += np.array(weights[i]) avg_weight *= 1.0 / L return avg_weight
def deduplicated_materials(gltf): """ 重複マテリアルを排除する :param gltf: glTFオブジェクト :return: 重複排除後のglTFオブジェクト """ gltf = deepcopy(gltf) # VRMマテリアルを元に重複排除 vrm = gltf['extensions']['VRM'] # マテリアル名 -> 重複元マテリアル名の対応 unique_name_map = dict(unique_vrm_materials(vrm['materialProperties'])) unique_material_name_set = unique(unique_name_map.values()) # マテリアル名 -> マテリアルの対応 name2materials = {m['name']: m for m in gltf['materials']} name2vrm_materials = {m['name']: m for m in vrm['materialProperties']} # マテリアルの重複排除 gltf['materials'] = [ name2materials[name] for name in unique_material_name_set ] vrm['materialProperties'] = [ name2vrm_materials[name] for name in unique_material_name_set ] # プリミティブのマテリアル重複排除 for mesh in gltf['meshes']: for primitive in mesh['primitives']: # プリミティブの材質を置換 new_name = unique_name_map[primitive['material']['name']] primitive['material'] = name2materials[new_name] return gltf
def clean_buffer_views(gltf): """ 未使用バッファービューを削除したバッファービューリストを返す :param gltf: glTFオブジェクト :return: 新しいバッファービューリスト """ return unique(list_buffer_views(gltf))
def make_target_map(target_strs): """Return mapping from target strings to numeric values.""" target_map = {} unique_target_strs = unique(target_strs) # Special case: None always maps to None (absent targets). include_none = False if None in unique_target_strs: unique_target_strs.remove(None) include_none = True # By convention, always map "O" to 0 (IOB-like tags). # TODO: check that unique_target_strs is IOB-like tagging. next_idx = 0 if 'O' in unique_target_strs: target_map['O'] = next_idx unique_target_strs.remove('O') next_idx += 1 for t in unique_target_strs: target_map[t] = next_idx next_idx += 1 # Convert to one-hot for k in target_map: one_hot = np.zeros(len(target_map)) one_hot[target_map[k]] = 1 target_map[k] = one_hot if include_none: target_map[None] = None return Bidict(target_map)
def clean_textures(gltf): """ 未使用テクスチャを削除したテクスチャリストを返す :param gltf: glTFオブジェクト :return: 新しいテクスチャリスト """ return unique(list_textures(gltf))
def renderNonexistingImages(latexCodeList, charheightpx, alignfudge, resfudge, **kw): """ take a list of strings of latex code, render the images that don't already exist. """ latexTemplate = (kw.get('latexTemplate', defaultLatexTemplate) or defaultLatexTemplate) m = re.search(r'\\documentclass\[[^\]]*?(\d+)pt[^\]]*?\]', \ latexTemplate) if m: charsizept = int(m.group(1)) else: charsizept = 10 res = charheightpx*ptperinch/charsizept*resfudge errors = "" codeToRender = filter(lambda x: imageDoesNotExist(x, charheightpx), unique(latexCodeList)) if (not codeToRender): return unifiedCode = re.sub(r'^(\$|\\\()', r'\1|~ ', codeToRender[0]) for code in codeToRender[1:len(codeToRender)]: unifiedCode = unifiedCode + '\n\\newpage\n' + re.sub(r'^(\$|\\\()', r'\1|~ ', code) try: runLatex(unifiedCode, res, charheightpx, latexTemplate) except LatexSyntaxError, data: errors = str(data) log(errors, 'LatexSyntaxError') # FIXME translate latex line number to source line number return escape(errors)
def remap_resources(self, root, local_file_paths = None, desired_file_names = None): local_file_paths = local_file_paths if local_file_paths is not None else [] desired_file_names = desired_file_names if desired_file_names is not None else [] for child in root: if child.tag in ResourceExtractorTreeprocessor.RESOURCE_TAGS: attrib = ResourceExtractorTreeprocessor.RESOURCE_TAGS[child.tag] file_path = child.attrib[attrib] if file_path.startswith("file://"): local_file_path = os.path.join( self.configs.get("relative_path",".") , file_path[len("file://"):] ) desired_file_name = unique(os.path.basename(local_file_path), desired_file_names) child.attrib[attrib] = "%s/%s"%(self.configs["resource_dir"],desired_file_name) local_file_paths.append(local_file_path) desired_file_names.append(desired_file_name) # Recurse self.remap_resources(child, local_file_paths, desired_file_names) # Return with the resource dirs prefixed return zip(local_file_paths, ( "%s/%s"%(self.configs["resource_dir"], dfn) for dfn in desired_file_names) )
def do_one_chunk(self, chunk): rows = [] if len(chunk) == 0: return rows key = [self.key_col] if self.key_col == 'term_id' and hasattr(chunk.iloc[0], 'tax_id'): key.append('tax_id') for k, g in chunk.groupby(key, as_index=False): row = {} if self.key_col == 'gid': row[self.key_col] = k else: row[self.key_col] = self.get_term_prefix(self.key_col) + k[0] allids = self.get_term_prefix( self.value_col) + g[self.value_col].astype(str) allids = util.unique(allids) row[self.new_value_col] = ','.join(allids) row['id_count'] = len(allids) row['ds'] = self.ds if hasattr(g.iloc[0], 'tax_id'): row['tax_id'] = g.iloc[0]['tax_id'] else: row['tax_id'] = None row['term_category_id'] = self.get_type_col_value() rows.append(row) return rows
def clean_accesors(gltf): """ 未使用アクセッサーを削除したアクセッサーリストを返す :param gltf: glTFオブジェクト :return: 新しいアクセッサーリスト """ return unique(list_accessors(gltf))
def deduplicated_materials(gltf): """ 重複マテリアルを排除する :param gltf: glTFオブジェクト :return: 重複排除後のglTFオブジェクト """ gltf = deepcopy(gltf) vrm = gltf['extensions']['VRM'] # VRMマテリアルを元に重複排除 # マテリアル名 -> 重複元マテリアルの対応マップ vrm_material_map = dict(unique_materials(vrm['materialProperties'])) # VRMマテリアルの重複排除 vrm['materialProperties'] = unique(vrm_material_map.values()) # マテリアル名 -> 重複元マテリアル名の対応マップ unique_name_map = {k: v['name'] for k, v in vrm_material_map.items()} # マテリアル名 -> マテリアルの対応マップ materials_name_map = {m['name']: m for m in gltf['materials']} # プリミティブからマテリアルの重複を排除する for mesh in gltf['meshes']: for primitive in mesh['primitives']: # 重複排除後のマテリアルで更新 name = primitive['material']['name'] new_name = unique_name_map[name] primitive['material'] = materials_name_map[new_name] # マテリアルの重複排除(VRMマテリアルと同じ順番にすることに注意) gltf['materials'] = [materials_name_map[vm['name']] for vm in vrm['materialProperties']] return gltf
def renderNonexistingImages(latexCodeList, charheightpx, alignfudge, resfudge, **kw): """ take a list of strings of latex code, render the images that don't already exist. """ from string import join res = int(round(charheightpx*ptperinch/charsizept*resfudge)) errors = "" latexTemplate = (kw.get('latexTemplate', defaultLatexTemplate) or defaultLatexTemplate) codeToRender = filter(lambda x: imageDoesNotExist(x, charheightpx), unique(latexCodeList)) if (not codeToRender): return # unifiedCode = re.sub(r'^(\$|\\\()', r'\1\cdot ', codeToRender[0]) # for code in codeToRender[1:len(codeToRender)]: # unifiedCode = unifiedCode + '\n\\newpage\n' + re.sub(r'^(\$|\\\()', r'\1\cdot ', code) unifiedCode = codeToRender[0] for code in codeToRender[1:len(codeToRender)]: unifiedCode = unifiedCode + '\n\\newpage\n' + code try: runLatex(unifiedCode, charheightpx, latexTemplate) except LatexSyntaxError, data: errors = str(data) log(errors, 'LatexSyntaxError') return escape(errors)
def get_none_stock_barcode(self, jd): """ 1. [入库]和[出库]操作,条形码不存在 2. [出库]操作,条形码对应得库存数量为0 3. [出库]操作,条形码对应得出库数量不得大于库存数量 """ for barcode in util.unique(jd["barcodeLines"]): stock = Stock.query.filter_by( barcode=barcode, warehouse_id=jd["warehouse_id"]).first() if not stock: return {"title": "条形码不存在", "content": barcode} if jd["method"] == "flow-out": if not stock.quantity: return { "title": "库存[%s]数量为0" % stock.name, "content": "条形码:%s" % (barcode) } elif Counter(jd["barcodeLines"])[barcode] > stock.quantity: return { "title": "库存[%s]出库数量不得大于库存数量[%d]" % (stock.name, stock.quantity), "content": "条形码:%s" % (barcode) }
def biogrid(self, l_human_only=False): fn_source = os.path.join(SyncDB.DOWNLOAD_DIR(), "BIOGRID-ALL-3.4.134.tab2.txt") if not os.path.exists(fn_source): urllib.urlretrieve( "http://thebiogrid.org/downloads/archives/Release%20Archive/BIOGRID-3.4.134/BIOGRID-ALL-3.4.134.tab2.zip", os.path.join(SyncDB.DOWNLOAD_DIR(), "BIOGRID-ALL-3.4.134.tab2.zip")) cmd = "unzip " + os.path.join(SyncDB.DOWNLOAD_DIR(), "BIOGRID-ALL-3.4.134.tab2.zip" ) + " -d " + SyncDB.DOWNLOAD_DIR() print cmd util.unix(cmd) t = pd.read_table(fn_source, dtype=str) #print t.header() #['#BioGRID Interaction ID', 'Entrez Gene Interactor A', 'Entrez Gene Interactor B', 'BioGRID ID Interactor A', 'BioGRID ID Interactor B', 'Systematic Name Interactor A', 'Systematic Name Interactor B', 'Official Symbol Interactor A', 'Official Symbol Interactor B', 'Synonyms Interactor A', 'Synonyms Interactor B', 'Experimental System', 'Experimental System Type', 'Author', 'Pubmed ID', 'Organism Interactor A', 'Organism Interactor B', 'Throughput', 'Score', 'Modification', 'Phenotypes', 'Qualifications', 'Tags', 'Source Database'] print util.unique(t['Experimental System Type']) #t=t[(t['Organism Interactor A']=='9606') & (t['Organism Interactor B']=='9606')] t.rename2({ 'Entrez Gene Interactor A': 'gid_A', 'Entrez Gene Interactor B': 'gid_B', 'Experimental System Type': 'interaction_category', 'Experimental System': 'interaction_type', 'Pubmed ID': 'support', 'Source Database': 'ds', 'Organism Interactor A': 'tax_id_A', 'Organism Interactor B': 'tax_id_B', 'Score': 'score' }) #print t.header() t['interaction_type_id'] = 2 t = t[[ 'gid_A', 'gid_B', 'tax_id_A', 'tax_id_B', 'interaction_type_id', 'interaction_category', 'interaction_type', 'score', 'support', 'ds' ]] t = t[(t.gid_A != '-') & (t.gid_B != '-')] t['gid_A'] = t.gid_A.astype(int) t['gid_B'] = t.gid_B.astype(int) t['tax_id_A'] = t.tax_id_A.astype(int) t['tax_id_B'] = t.tax_id_B.astype(int) t['ds'] = 'BioGrid' t = t[(t.gid_A != t.gid_B) & (t.tax_id_A == t.tax_id_B) & (t.gid_A > 0) & (t.gid_B > 0)].copy() self.bio = t return self.bio
def get_tissue_specific(self): if not path.isfile( os.path.join(SyncDB.DOWNLOAD_DIR(), "gene2refseq.gz")): urllib.urlretrieve( "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2refseq.gz", os.path.join(SyncDB.DOWNLOAD_DIR(), "gene2refseq.gz")) ref2gene = util.read_csv( os.path.join(SyncDB.DOWNLOAD_DIR(), "gene2refseq.gz"), skiprows=1, header=None, sep='\t', names=[ "tax_id", "GeneID", "status", "RNA_nucleotide_accession.version", "RNA_nucleotide_gi", "protein_accession.version", "protein_gi", "genomic_nucleotide_accession.version", "genomic_nucleotide_gi", "start_position_on_the_genomic_accession", "end_position_on_the_genomic_accession", "orientation", "assembly", "mature_peptide_accession.version", "mature_peptide_gi", "Symbol" ]).query('tax_id in [9606]') #Tracer()() self.ref2gene_map = {} for i in ref2gene.index: if ref2gene.at[i, 'RNA_nucleotide_accession.version'] != '-': self.ref2gene_map[ref2gene.at[ i, 'RNA_nucleotide_accession.version'].split('.') [0]] = ref2gene.at[i, 'GeneID'] if ref2gene.at[i, 'protein_accession.version'] != '-': self.ref2gene_map[ref2gene.at[i, 'protein_accession.version']. split('.')[0]] = ref2gene.at[i, 'GeneID'] if ref2gene.at[i, 'genomic_nucleotide_accession.version'] != '-': self.ref2gene_map[ref2gene.at[ i, 'genomic_nucleotide_accession.version'].split('.') [0]] = ref2gene.at[i, 'GeneID'] t_tissue = self.tissue_specific() t_tissue.rename2({'Tissue(s)': 'Tissue'}) t_tissue['gene_id'] = t_tissue.RefSeq.apply( lambda x: self.ref2gene_map.get(x, 0)) t_tissue = t_tissue.query('gene_id > 0') data = [] for k, t_v in t_tissue.groupby('gene_id'): if k == 0: continue S = [x for x in t_v['Tissue'] if not pd.isnull(x)] s = " ".join(S) S = util.unique(s.split(" ")) data.append({'gene_id': k, 'Tissues': ";".join(S)}) t_tissue = pd.DataFrame(data) t_tissue['tax_id'] = '9606' #Tracer()() t_tissue.to_csv(self.fn_dest_tissue_specific, index=False) print "%d Tissue-specific Genes Fetched" % len(t_tissue)
def build_go_term_count(self, file): goterm_count_map = {} gene2go = util.read_csv(file, sep=r'\t', names=['tax_id','gene_id','term_id','type','description']) for k,g in gene2go.groupby('term_id', as_index=False): goterm_count_map[k] = len(util.unique(g['gene_id'].values)) return goterm_count_map;
def test_unique_recursion(self): """ Reproducing observed recursion error :return: """ # tensor of 6 1-tuples tuples = torch.tensor([[74], [75], [175], [246], [72], [72]]) dup = util.unique(tuples)
def do_update(self): print '##############################################################' download_url = 'http://mips.helmholtz-muenchen.de/corum/download/allComplexes.txt.zip' urllib.urlretrieve(download_url, self.fn_data) t = pd.read_table(self.fn_data) #print t.header() #['ComplexID', 'ComplexName', 'Organism', 'Synonyms', 'Cell line', 'subunits(UniProt IDs)', 'subunits(Entrez IDs)', 'Protein complex purification method', 'GO ID', 'GO description', 'FunCat ID', 'FunCat description', 'PubMed ID', 'subunits(Protein name)', 'subunits(Gene name)', 'subunits(Gene name syn)', 'Disease comment', 'Subunits comment', 'Complex comment', 'SWISSPROT organism'] C_TAX = {'Rat': 10116, 'Human': 9606, 'Mouse': 10090} c_gene2tax = self.get_gene2tax() out_term = [] out_gids = [] for i in t.index: id = t.ix[i, 'ComplexID'] s_go = t.ix[i, 'ComplexName'] s_des = t.ix[i, 'Complex comment'] if s_des == 'None': s_des = t.ix[i, 'GO description'] if s_des == 'None': s_des = s_go gids = t.ix[i, 'subunits(Entrez IDs)'] gids = gids.replace(';', ',') S = gids.split(',') S = [x for x in S if x in c_gene2tax] S_tax = [c_gene2tax[x] for x in S] l_new_term = True for s_tax in util.unique(S_tax): S_gid = [S[i] for i, x in enumerate(S_tax) if x == s_tax] n = len(S_gid) if n < 3: #print "Too few proteins: ", id, S continue if l_new_term: out_term.append({ 'term_id': 'CORUM:%d' % id, 'term_name': s_go, 'description': s_des }) l_new_term = False for gid in S_gid: out_gids.append({ 'gid': gid, 'term_id': 'CORUM:%d' % id, 'term_name': s_go, 'type_name': 'CORUM', 'tax_id': s_tax }) #gid, term_id, term_name, type_name, tax_id t_term = pd.DataFrame(out_term) t_term.to_csv(self.fn_dest_go_term, index=False) print "Number of Complexes: %d" % len(t_term) t_gids = pd.DataFrame(out_gids) t_gids.to_csv(self.fn_gene_term_pair, index=False) #print t_gids.header() print util.unique_count(t_gids['tax_id'].values)
def main(): voc = util.Voc(init_from_file="data/voc_b.txt") netR_path = 'output/rf_dis.pkg' netG_path = 'output/net_p' netD_path = 'output/net_d' agent_path = 'output/net_gan_%d_%d_%dx%d' % (SIGMA * 10, BL * 10, BATCH_SIZE, MC) netR = util.Environment(netR_path) agent = model.Generator(voc) agent.load_state_dict(T.load(netG_path + '.pkg')) df = pd.read_table('data/CHEMBL251.txt') df = df[df['PCHEMBL_VALUE'] >= 6.5] data = util.MolData(df, voc) loader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, collate_fn=data.collate_fn) netD = model.Discriminator(VOCAB_SIZE, EMBED_DIM, FILTER_SIZE, NUM_FILTER) if not os.path.exists(netD_path + '.pkg'): Train_dis_BCE(netD, agent, loader, epochs=100, out=netD_path) netD.load_state_dict(T.load(netD_path + '.pkg')) best_score = 0 log = open(agent_path + '.log', 'w') for epoch in range(1000): print('\n--------\nEPOCH %d\n--------' % (epoch + 1)) print('\nPolicy Gradient Training Generator : ') Train_GAN(agent, netD, netR) print('\nAdversarial Training Discriminator : ') Train_dis_BCE(netD, agent, loader, epochs=1) seqs = agent.sample(1000) ix = util.unique(seqs) smiles, valids = util.check_smiles(seqs[ix], agent.voc) scores = netR(smiles) scores[valids == False] = 0 unique = (scores >= 0.5).sum() / 1000 if best_score < unique: T.save(agent.state_dict(), agent_path + '.pkg') best_score = unique print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" % (epoch, scores.mean(), valids.mean(), unique), file=log) for i, smile in enumerate(smiles): print('%f\t%s' % (scores[i], smile), file=log) for param_group in agent.optim.param_groups: param_group['lr'] *= (1 - 0.01) log.close()
def generate_hall_of_heroes(self): # XXX should perhaps be ordered by sum of levels minus number of jobs? jobs = data.Job.all().order('-level').fetch(max_results) jobs = filter(lambda x: data.Character.by_user(x.owner).get().show_in_hall_of_heroes_p, jobs) return map(lambda x: { 'character': data.Character.by_user(x.owner).get(), 'archetype': x.archetype._static, 'primary_class': x, 'secondary_classes': filter(lambda y: y.key() != x.key(), data.Job.by_user(x.owner).order('-level').fetch(max_results)) }, util.unique(jobs,key_fn=lambda x: x.owner)[0:10])
def anygoal(s): reifiedgoals = (reify(goal, s) for goal in goals) def f(goals): for goal in goals: try: yield goaleval(goal)(s) except EarlyGoalError: pass return unique(interleave(f(reifiedgoals), [EarlyGoalError]), key=dicthash)
def __new__(cls, *computations): computations = tuple(unique(computations)) computations = exhaust(flatten)(computations) computations = exhaust(rm_identity)(computations) if len(computations) == 1: return computations[0] else: obj = object.__new__(cls) obj.computations = tuple(computations) return obj
def scrape(xml_path): """Scrape verb prefixed from the MW dictionary.""" upasargas = set(UPASARGAS.splitlines()) labels = ['name', 'prefix_type'] regexp = 'root' rows = [] for i, xml in enumerate(util.iter_mw_xml(xml_path, regexp=regexp)): key1 = xml.find('h/key1') key2 = xml.find('h/key2') entry = key1.text if not (entry.endswith('kf') or entry.endswith('BU')): continue # A root is prefixed iff it has a <root> element. Any matches without # one are almost certainly nominals, which we can disregard. root = key2.find('.//root') if root is None: continue # Remove lingering XML root.clear() key2.tag = None name = ET.tostring(key2) name = re.sub('(<.*?>)|/', '', name) # Remove groups ending in upasargas splits = [x for x in name.split('-') if x] last = splits[-1] if last in upasargas or make_tidy(last) in upasargas: continue # Add prefixes to the proper category name = ''.join(splits) _type = None if name[-1] in ('I', 'U'): _type = 'cvi' elif name.endswith('A'): _type = 'DAc' else: _type = 'other' # 'sampra' is suggested as a prefix. This is wrong. if name == 'sampra': continue rows.append((name, _type)) rows = util.unique(rows, lambda x: x[0]) rows.sort(key=lambda x: util.key_fn(x[0])) print util.make_csv_string(labels, rows)
def is_header(row, l_allow_int=False): """Check if a row can be column header row: list, containing the cells of the first row l_allow_int: boolean, default False, whether int can be used as a column name return boolean""" if len(row) != len(util.unique(row)): return False for x in row: if type(x) is float or type(x) is complex: return False if (type(x) is int) and not l_allow_int: return False if not x: return False return True
def find_optimum_threshold(X, y): '''This method finds optimum threshold as required. Args: X: score y: label Return Values: threshold: the optimum value as desired ''' # save the data into 2-d array so that we can sort them # on the X values data = np.array([X, y]) data = np.transpose(data) # metrics tps = [] tns = [] fps = [] fns = [] thresholds = [] allthresh = util.unique(list(data[:, 0])) allthresh.sort() thresh = allthresh[0] - 0.000001 tp, tn, fp, fn = calc_metrics(data, thresh) tps.append(tp) tns.append(tn) fps.append(fp) fns.append(fn) thresholds.append(thresh) for i in range(len(allthresh) - 1): thresh = (allthresh[i] + allthresh[i + 1]) / 2 tp, tn, fp, fn = calc_metrics(data, thresh) tps.append(tp) tns.append(tn) fps.append(fp) fns.append(fn) thresholds.append(thresh) thresh = allthresh[-1] + 0.000001 tp, tn, fp, fn = calc_metrics(data, thresh) tps.append(tp) tns.append(tn) fps.append(fp) fns.append(fn) thresholds.append(thresh) return thresholds, tps, tns, fps, fns
def get_gene_disease_association(self): print 'Getting GeneGo disease association data' #df = self.fetch("select distinct a.ref as gene_id, a.disid, a.disname, a.note from (select ga.note_id, g17.ref, d.disid, d.disname, d.note from disease_associations_all_v ga, GeneDBS_17 g17, diseases d, geneorgs go1 where d.disid=ga.dis_id and ga.GENE_ID=g17.gene and go1.gene=ga.gene_id and go1.org=1) a"); #Tracer()() fn = self.dir + "/gene_disease_association.csv" if not os.path.exists(fn): df = self.fetch( "select distinct d.disid TERM_ID, d.disname as TERM_NAME, gdb.ref as GID, d.note as description, orgs.taxonomyid as tax_id from gene_netw gn, genediss gd, diseases d, genedbs gdb, geneorgs o, orgs where gn.gene = gd.gene and gd.dis = d.disid and gn.gene=gdb.gene and gn.gene=o.gene and o.org=orgs.orgid and orgs.taxonomyid in (" + ','.join(self.taxidList) + ") and gdb.db=17 and d.rtyp > 0 and d.disname not like 'By %' and gd.dis in (select distinct dismbr from disrelflat where disgrp = -1173899567 and dismbr <> -1173899567)" ) df['TERM_ID'] = 'gDIS' + df['TERM_ID'].map(str) df.rename2({ "TERM_ID": "term_id", "TERM_NAME": "term_name", "GID": "gid", "DESCRIPTION": "description", "TAX_ID": "tax_id" }) df['type_name'] = 'GeneGo Disease Association' #remove disease which has more than 500 genes df = df.drop(df.index[list( it.chain.from_iterable([ g for k, g in df.groupby('term_id').groups.items() if len(g) >= 500 ]))]) df.to_csv(fn, index=False) else: df = util.read_csv(fn) df1 = pd.DataFrame( df.copy())[['gid', 'term_id', 'term_name', 'type_name', 'tax_id']] df2 = pd.DataFrame( df.copy())[['term_id', 'term_name', 'type_name', 'description']] df2 = df2.drop_duplicates() self.disease_gid2term = df1 self.disease_terms = df2 self.disease_done = True data = [] for k, t_v in df.groupby('gid'): S = util.unique([x for x in t_v['term_name'] if not pd.isnull(x)]) data.append({ 'gid': k, 'content': "; ".join(S), 'type_name': t_v['type_name'].values[0], 'annotation_field1': len(S), 'tax_id': str(int(t_v['tax_id'].values[0])) }) self.disease_annotations = pd.DataFrame(data) print 'GeneGo disease association data captured'
def remote_query(rein, user, urls, log, query_type, distinct): ''' Sends specific query to registered servers and filters for uniqueness ''' res = [] for url in urls: sel_url = "{0}query?owner={1}&query={2}&testnet={3}" data = safe_get(log, sel_url.format(url, user.maddr, query_type, rein.testnet)) if data is None or query_type not in data or len(data[query_type]) == 0: click.echo('None found') continue res += filter_and_parse_valid_sigs(rein, data[query_type]) return unique(res, distinct)
def main(): global Epsilon # Vocabulary containing all of the tokens for SMILES construction voc = util.Voc("data/voc.txt") # File path of predictor in the environment environ_path = 'output/RF_cls_ecfp6.pkg' # file path of hidden states in RNN for initialization initial_path = 'output/net_p' # file path of hidden states of optimal exploitation network agent_path = 'output/net_e_%.2f_%.1f_%dx%d' % (Epsilon, Baseline, BATCH_SIZE, MC) # file path of hidden states of exploration network explore_path = 'output/net_p' # Environment (predictor) environ = util.Environment(environ_path) # Agent (generator, exploitation network) agent = model.Generator(voc) agent.load_state_dict(torch.load(initial_path + '.pkg')) # exploration network explore = model.Generator(voc) explore.load_state_dict(torch.load(explore_path + '.pkg')) best_score = 0 log = open(agent_path + '.log', 'w') for epoch in range(1000): print('\n--------\nEPOCH %d\n--------' % (epoch + 1)) print('\nForward Policy Gradient Training Generator : ') Policy_gradient(agent, environ, explore=explore) seqs = agent.sample(1000) ix = util.unique(seqs) smiles, valids = util.check_smiles(seqs[ix], agent.voc) scores = environ(smiles) scores[valids == False] = 0 unique = (scores >= 0.5).sum() / 1000 # The model with best percentage of unique desired SMILES will be persisted on the hard drive. if best_score < unique: torch.save(agent.state_dict(), agent_path + '.pkg') best_score = unique print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" % (epoch, scores.mean(), valids.mean(), unique), file=log) for i, smile in enumerate(smiles): print('%f\t%s' % (scores[i], smile), file=log) # Learing rate exponential decay for param_group in agent.optim.param_groups: param_group['lr'] *= (1 - 0.01) log.close()
def run(n, x, *goals, **kwargs): """ Run a logic program. Obtain n solutions to satisfy goals. n - number of desired solutions. See ``take`` 0 for all None for a lazy sequence x - Output variable goals - a sequence of goals. All must be true >>> from logpy import run, var, eq >>> run(1, x, eq(x, 1)) (1,) """ return take(n, unique(reify(x, s) for s in goaleval(lallearly(*goals))({})))
def run(n, x, *goals): """ Run a logic program. Obtain n solutions to satisfy goals. n - number of desired solutions. See ``take`` 0 for all None for a lazy sequence x - Output variable goals - a sequence of goals. All must be true >>> from logpy import run, var, eq >>> run(1, x, eq(x, 1)) (1,) """ return take(n, unique(walkstar(x, s) for s in bindstar(({},), *goals)))
def test_unique(self): r = util.unique( torch.tensor([[1, 2, 3, 4], [4, 3, 2, 1], [1, 2, 3, 4]])) self.assertEqual((3, 1), r.size()) self.assertEqual(r[0], r[2]) self.assertNotEqual(r[0], r[1]) self.assertNotEqual(r[1], r[2]) r = util.nunique( torch.tensor([[[1, 2, 3, 4], [4, 3, 2, 1], [1, 2, 3, 4]]])) self.assertEqual((1, 3), r.size()) self.assertEqual(r[0, 0], r[0, 2]) self.assertNotEqual(r[0, 0], r[0, 1]) self.assertNotEqual(r[0, 1], r[0, 2])
def evaluate_binary_labeling(dataitems): gold = dataitems.target_strs pred = dataitems.prediction_strs labels = unique(chain(gold, pred)) pos = _positive_label(labels) res = {} res['acc'] = accuracy(gold, pred) bcm = evaluate_binary_classification(gold, pred, pos) res.update(bcm._asdict()) res['auc'] = skmetrics.roc_auc_score(dataitems.targets, dataitems.predictions) res['ap'] = skmetrics.average_precision_score(dataitems.targets, dataitems.predictions) maxfp = max_f_point(dataitems) res.update({'maxf-{}'.format(k): v for k, v in maxfp._asdict().items()}) return res
def run(n, x, *goals, **kwargs): """ Run a logic program. Obtain n solutions to satisfy goals. n - number of desired solutions. See ``take`` 0 for all None for a lazy sequence x - Output variable goals - a sequence of goals. All must be true >>> from logpy import run, var, eq >>> x = var() >>> run(1, x, eq(x, 1)) (1,) """ return take(n, unique(reify(x, s) for s in goaleval(lallearly(*goals))({})))
def allocate_anchors(self, headings): ids = [] labels = [] levels = [] for heading in headings: # Pick an ID id = unique(slugify(heading.text, "-"), ids) # Assign the ID to the heading heading.attrib["id"] = id # Record it ids.append(id) labels.append(heading.text) levels.append(int(heading.tag[1])) return zip(levels, labels, ids)
def token_evaluator(dataset, label=None, writer=None, mapper=None, config=defaults): """Return appropriate evaluator callback for dataset.""" if config.token_level_eval: evaluator = TokenLevelEvaluator elif is_iob_tagging(unique(dataset.tokens.target_strs)): evaluator = ConllEvaluator else: evaluator = TokenLevelEvaluator # default info('using {} for {}'.format(evaluator.__name__, dataset.name)) callbacks = [] callbacks.append(Predictor(dataset.tokens)) callbacks.append(evaluator(dataset, label=label, writer=writer)) if mapper is not None: # TODO don't assume the mapper expects sentences. callbacks.append(PredictionMapper(dataset.sentences, mapper)) # TODO do we really want a second eval here? callbacks.append(evaluator(dataset, label=label, writer=writer)) return CallbackChain(callbacks)
def __init__(self, repo, parents, text, files, filectxfn, user=None, date=None, extra=None): self._repo = repo self._rev = None self._node = None self._text = text self._date = date and util.parsedate(date) or util.makedate() self._user = user parents = [(p or nullid) for p in parents] p1, p2 = parents self._parents = [changectx(self._repo, p) for p in (p1, p2)] files = util.sort(util.unique(files)) self._status = [files, [], [], [], []] self._filectxfn = filectxfn self._extra = extra and extra.copy() or {} if 'branch' not in self._extra: self._extra['branch'] = 'default' elif self._extra.get('branch') == '': self._extra['branch'] = 'default'
def ParseGetEventSubTreeNoSelections(resp): _check_errors(resp) allmarkets = [] markets = [] # go through each event class in turn, an event class is # e.g. 'Rugby Union','Formula 1', etc. # slight trick here: # if we only polled a single event class, then resp[2] is # not a list, so we need to convert it to a list if isinstance(resp[2], list): data = resp[2] else: data = [resp[2]] for evclass in data: _ParseEventClassifier(evclass,'', markets) allmarkets = allmarkets + markets # hack: currently markets are duplicated multiple times (is this # an API error?); we want only unique markets here umarkets = util.unique(allmarkets) return umarkets
def permuteq(a, b, eq2=eq): """ Equality under permutation For example (1, 2, 2) equates to (2, 1, 2) under permutation >>> from logpy import var, run, permuteq >>> x = var() >>> run(0, x, permuteq(x, (1, 2))) ((1, 2), (2, 1)) >>> run(0, x, permuteq((2, 1, x), (2, 1, 2))) (2,) """ if isinstance(a, tuple) and isinstance(b, tuple): if len(a) != len(b): return fail elif set(a) == set(b) and len(set(a)) == len(a): return success else: c, d = a, b try: c, d = tuple(sorted(c)), tuple(sorted(d)) except: pass if len(c) == 1: return (eq2, c[0], d[0]) return condeseq((((eq2, c[i], d[0]), (permuteq, c[0:i] + c[i + 1 :], d[1:], eq2)) for i in range(len(c)))) if isvar(a) and isvar(b): raise EarlyGoalError() if isvar(a) or isvar(b): if isinstance(b, tuple): c, d = a, b elif isinstance(a, tuple): c, d = b, a return (condeseq, ([eq(c, perm)] for perm in unique(it.permutations(d, len(d)))))
def statwalk(self, files=None, match=util.always, unknown=True, ignored=False, badmatch=None, directories=False): ''' walk recursively through the directory tree, finding all files matched by the match function results are yielded in a tuple (src, filename, st), where src is one of: 'f' the file was found in the directory tree 'd' the file is a directory of the tree 'm' the file was only in the dirstate and not in the tree 'b' file was not found and matched badmatch and st is the stat result if the file was found in the directory. ''' # walk all files by default if not files: files = ['.'] dc = self._map.copy() else: files = util.unique(files) dc = self._filter(files) def imatch(file_): if file_ not in dc and self._ignore(file_): return False return match(file_) # TODO: don't walk unknown directories if unknown and ignored are False ignore = self._ignore dirignore = self._dirignore if ignored: imatch = match ignore = util.never dirignore = util.never # self._root may end with a path separator when self._root == '/' common_prefix_len = len(self._root) if not util.endswithsep(self._root): common_prefix_len += 1 normpath = util.normpath listdir = osutil.listdir lstat = os.lstat bisect_left = bisect.bisect_left isdir = os.path.isdir pconvert = util.pconvert join = os.path.join s_isdir = stat.S_ISDIR supported = self._supported _join = self._join known = {'.hg': 1} # recursion free walker, faster than os.walk. def findfiles(s): work = [s] wadd = work.append found = [] add = found.append if directories: add((normpath(s[common_prefix_len:]), 'd', lstat(s))) while work: top = work.pop() entries = listdir(top, stat=True) # nd is the top of the repository dir tree nd = normpath(top[common_prefix_len:]) if nd == '.': nd = '' else: # do not recurse into a repo contained in this # one. use bisect to find .hg directory so speed # is good on big directory. names = [e[0] for e in entries] hg = bisect_left(names, '.hg') if hg < len(names) and names[hg] == '.hg': if isdir(join(top, '.hg')): continue for f, kind, st in entries: np = pconvert(join(nd, f)) if np in known: continue known[np] = 1 p = join(top, f) # don't trip over symlinks if kind == stat.S_IFDIR: if not ignore(np): wadd(p) if directories: add((np, 'd', st)) if np in dc and match(np): add((np, 'm', st)) elif imatch(np): if supported(np, st.st_mode): add((np, 'f', st)) elif np in dc: add((np, 'm', st)) found.sort() return found # step one, find all files that match our criteria files.sort() for ff in files: nf = normpath(ff) f = _join(ff) try: st = lstat(f) except OSError, inst: found = False for fn in dc: if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'): found = True break if not found: if inst.errno != errno.ENOENT or not badmatch: self._ui.warn('%s: %s\n' % (self.pathto(ff), inst.strerror)) elif badmatch and badmatch(ff) and imatch(nf): yield 'b', ff, None continue if s_isdir(st.st_mode): if not dirignore(nf): for f, src, st in findfiles(f): yield src, f, st else: if nf in known: continue known[nf] = 1 if match(nf): if supported(ff, st.st_mode, verbose=True): yield 'f', nf, st elif ff in dc: yield 'm', nf, st
def write_prefix_groups(prefixed_roots, unprefixed_roots, upasargas, other, sandhi_rules, out_path): """Parse the prefixes in a prefix root and write out the prefix groups. The procedure is roughly as follows: for each prefixed root in `prefixed_roots`: find (p_1, ..., p_n, r), where p_x is a prefix and r is a root write the prefix group (p_1, ..., p_n) to file. We find (p_1, .., p_n) by using the rules in `sandhi_rules` and verify that `p_x` is a prefix by checking for membership in `upasargas` and `other`. """ # Loading prefixes all_prefixes = set() with util.read_csv(upasargas) as reader: all_prefixes.update([x['name'] for x in reader]) with util.read_csv(other) as reader: all_prefixes.update([x['name'] for x in reader]) # The 's' prefix is used in roots like 'saMskf' and 'parizkf'. Although it # is prefixed to a verb, it is not semantically the same as the other verb # prefixes. Here, though, we treat it as a verb prefix. all_prefixes.add('s') # Some prefixes have alternate forms. prefix_alternates = { 'pi': 'api', 'ut': 'ud', 'Ri': 'ni', 'niz': 'nis', 'iz': 'nis', 'palA': 'parA', 'pali': 'pari', 'z': 's', } all_prefixes.update(prefix_alternates.keys()) # Loading sandhi rules sandhi = make_sandhi_object(sandhi_rules) with util.read_csv(prefixed_roots) as reader: rows = [] for row in reader: # Nibble away at `prefixed_root` until we have all prefixes for the # given root. prefixes = [] prefixed_root = row['prefixed_root'] unprefixed_root = row['unprefixed_root'] last_letter = None q = Queue.PriorityQueue() for remainder in sandhi.split_off(prefixed_root, unprefixed_root): q.put_nowait((0, (), remainder)) while not q.empty(): _, cur_prefixes, remainder = q.get_nowait() # `remainder` is something we recognize: we're done! if remainder in all_prefixes: prefixes = list(cur_prefixes) if remainder: prefixes.append(remainder) last_letter = remainder[-1] break for before, after in sandhi.splits(remainder): # Prevent recursion. As of this comment, the `splits` method # returns the non-split of some term X as (X, ''). In other # words, this conditional will *never* be true. But since the # behavior of various functions is still unsettled, this check # will stay here for the time being. if after == remainder: continue if before in all_prefixes: state = (cur_prefixes + (before,), after) cost = len(after) # Incentivize short vowels. This avoids errors with roots # like "upodgrah" ("upa-ud-grah"). Without the incentive, # we could have "upa-A-ud-grah" instead. if before and before[-1] in 'aiufx': cost -= 1 q.put_nowait((cost,) + state) # Convert 'alternate' prefixes back to their original forms. prefixes = [prefix_alternates.get(x, x) for x in prefixes] if not prefixes: # Occurs if the root's prefix is unrecognized continue # We still don't know the prefix group. We can find it by splitting # off the root and keeping whatever matches `last_letter`. for group in sandhi.split_off(prefixed_root, unprefixed_root): if group[-1] == last_letter: break prefix_string = '-'.join(prefixes) rows.append((group, prefix_string)) labels = ['group', 'prefixes'] with util.write_csv(out_path, labels) as write_row: for row in util.unique(rows): datum = dict(zip(labels, row)) write_row(datum)
def UniqueLines(self): self.SelectLines() lines = self.GetSelectedText().split("\n") lines = unique(lines) self.ReplaceSelectionAndSelect("\n".join(lines))
def includes(self): return list(unique(sum([c.includes for c in self.computations], [])))
def list_nei_lines(specrange, Te, tau, Te_init=1e4, lldat=False, linefile=False,\ units='angstroms', teunit='K', minepsilon=1e-20, \ datacache=False): """ Gets list of the lines in a given spectral range for a given NEI plasma For speed purposes, this takes the nearest temperature tabulated in the linefile, and applies the exact ionization balance as calculated to this. This is not perfect, but should be good enough. Note that the output from this can be passed directly to print_lines Parameters ---------- specrange : [float,float] spectral range [min,max] to return lines on Te : float electron temperature tau : float electron density * time (cm^-3 s) Te_init : float initial ionization balance temperature lldat : see notes line data linefile : see notes line data file, see notes units : {'A' , 'keV'} units of specrange (default A) teunit : {'K' , 'keV'} units of temperatures (default K) minepsilon : float minimum emissivity (ph cm^3 s^{-1}) for inclusion in linelist Notes ----- The actual line list can be defined in one of several ways: specrange = [10,100] 1. lldat as an actual list of lines:: a = pyfits.open('apec_nei_line.fits') llist = a[30].data l = list_nei_lines(specrange, lldat=llist) 2. lldat as a numpy array of lines:: a = pyfits.open('apec_nei_line.fits') llist = numpy.array(a[30].data) l = list_nei_lines(specrange, lldat=llist) 3. lldat is a BinTableHDU from pyfits:: a = pyfits.open('apec_nei_line.fits') llist = numpy.array(a[30]) l = list_nei_lines(specrange, lldat=llist) 4. lldat is a HDUList from pyfits. In this case index must also be set:: a = pyfits.open('apec_nei_line.fits') index = 30 l = list_nei_lines(specrange, lldat=a, index=index) 5. lldat NOT set, linefile contains apec_line.fits file location, index identifies the HDU:: linefile = 'mydir/apec_v3.0.2_nei_line.fits' index = 30 l = list_nei_lines(specrange, linefile=linefile, index=index) 6. lldat NOT set & linefile NOT set, linefile is set to $ATOMDB/apec_line.fits. index identifies the HDU:: index = 30 l = list_nei_lines(specrange, Te, tau) Returns ------- linelist : dtype=([('Lambda', '>f4'), \ ('Lambda_Err', '>f4'), \ ('Epsilon', '>f4'), \ ('Epsilon_Err', '>f4'), \ ('Element', '>i4'), \ ('Elem_drv', '>i4'), \ ('Ion', '>i4'), \ ('Ion_drv', '>i4'), \ ('UpperLev', '>i4'), \ ('LowerLev', '>i4')]) A line list filtered by the various elements. """ # History # ------- # Version 0.1 - initial release # Adam Foster November 02nd 2015 # # check the units if units.lower()=='kev': specrange = [const.HC_IN_KEV_A/specrange[1], const.HC_IN_KEV_A/specrange[0]] elif units.lower() in ['a', 'angstrom', 'angstroms']: specrange = specrange else: print "*** ERROR: unknown unit %s, Must be keV or A. Exiting ***"%\ (units) if teunit.lower() == 'kev': kT = Te*1.0 elif teunit.lower() == 'ev': kT = Te/1000.0 elif teunit.lower() == 'k': kT = Te*const.KBOLTZ else: print "*** ERROR: unknown teunit %s, Must be keV or K. Exiting ***"%\ (teunit) if Te_init != False: if teunit.lower() == 'kev': kT_init = Te_init*1.0 elif teunit.lower() == 'ev': kT_init = Te_init/1000.0 elif teunit.lower() == 'k': kT_init = Te_init*const.KBOLTZ else: print "*** ERROR: unknown teunit %s, Must be keV or K. Exiting ***"%\ (teunit) # sort out the line file... if lldat != False: #options here: # (1) This is a line list, i.e. the ldata[index].data from a file, # either in original pyfits format or a numpy array # # (2) This is an hdu from a file # # (3) This is a _line.fits file, and requires an index to make sense of it if type(lldat) == pyfits.hdu.hdulist.HDUList: # go get the index te_index = get_index(kT, filename=lldat, \ teunits='keV', logscale=True) llist = numpy.array(lldat[te_index].data) elif type(lldat) == pyfits.hdu.table.BinTableHDU: # no need to get index llist = numpy.array(lldat.data) elif type(lldat) in [pyfits.fitsrec.FITS_rec, numpy.ndarray]: llist = numpy.array(lldat) else: # no line data supplied. if linefile==False: linefile = os.path.expandvars('$ATOMDB/apec_nei_line.fits') if not os.path.isfile(linefile): print "*** ERROR. Linefile %s is "%(linefile), print " not a file. Exiting" else: lldat = pyfits.open(os.path.expandvars(linefile)) te_index = get_index(kT, filename=lldat, \ teunits='keV', logscale=True) llist= numpy.array(lldat[te_index].data) # get filtered line list llist = llist[(llist['Lambda']>= specrange[0]) &\ (llist['Lambda']<= specrange[1]) &\ (llist['Epsilon'] >= minepsilon)] # get the index # get list of all the elements present Zlist = util.unique(llist['Element']) # Calculate the ionization balance. ionbal ={} for Z in Zlist: ionbal[Z] = apec.solve_ionbal_eigen(Z, kT, tau, Te_init = kT_init,\ teunit='keV', datacache=datacache) # multiply everything by the appropriate ionization fraction if 'Elem_drv' in llist.dtype.names: for il in llist: il['Epsilon'] *= ionbal[il['Elem_drv']][il['Ion_drv']-1] else: for il in llist: il['Epsilon'] *= ionbal[il['Element_drv']][il['Ion_drv']-1] # filter again based on new epsilon values llist=llist[llist['Epsilon']>minepsilon] print "done" # at this point, we have data return llist
def unique_stations(self, channel_filter=None): return unique((channel.station for channel in self.unique_channels(channel_filter)), lambda s: s.station_id)
def variables(self): return tuple(unique(chain(self.inputs, self.outputs)))
def make_spectrum(bins, index, linefile="$ATOMDB/apec_line.fits",\ cocofile="$ATOMDB/apec_coco.fits",\ binunits='keV', broadening=False, broadenunits='keV', \ elements=False, abund=False, dummyfirst=False,\ dolines = True, docont=True, dopseudo=True): r""" make_spectrum is the most generic "make me a spectrum" routine. It returns the emissivity in counts cm^3 s^-1 bin^-1. Parameters ---------- bins : array(float) The bin edges for the spectrum to be calculated on, in \ units of keV or Angstroms. Must be monotonically\ increasing. Spectrum will return len(bins)-1 values. index : int The index to plot the spectrum from. note that the AtomDB files\ the emission starts in hdu number 2. So for the first block, you\ set index=2 linefile : str The file containing all the line emission. Defaults to \ "$ATOMDB/apec_line.fits" cocofile : str The file containing all the continuum emission. Defaults to \ "$ATOMDB/apec_coco.fits" binunits : {'keV','A'} The energy units for bins. "keV" or "A". Default keV. broadening : float Line broadening to be applied broadenunits : {'keV','A'} Units of line broadening "keV" or "A". Default keV. elements : iterable of int Elements to include, listed by atomic number. if not set, include all. abund : iterable of float, length same as elements. If set, and array of length (elements) with the abundances of each\ element relative to the Andres and Grevesse values. Otherwise, assumed to\ be 1.0 for all elements dummyfirst : bool If true, add a "0" to the beginning of the return array so it is of the same length as bins (can be useful for plotting results) dolines : bool Include lines in the spectrum docont : bool Include the continuum in the spectrum dopseudo : bool Include the pseudocontinuum in the spectrum. Returns ------- array of floats Emissivity in counts cm^3 s^-1 bin^-1. """ # History # ------- # Version 0.1 - initial release # Adam Foster July 17th 2015 # # Version 0.2 # Added dummyfirst keyword # Adam Foster July 21st 2015 # # set up the bins if (sum((bins[1:]-bins[:-1])<0) > 0): print "*** ERROR: bins must be monotonically increasing. Exiting ***" return -1 if binunits.lower()=='kev': ebins = bins*1.0 elif binunits.lower() in ['a', 'angstrom', 'angstroms']: ebins = const.HC_IN_KEV_A/bins[::-1] else: print "*** ERROR: unknown binning unit %s, Must be keV or A. Exiting ***"%\ (binunits) if util.keyword_check(linefile): # ok, we should do something with this # if it is a string, look for the file name if isinstance(linefile, basestring): lfile = os.path.expandvars(linefile) if not os.path.isfile(lfile): print "*** ERROR: no such file %s. Exiting ***" %(lfile) return -1 ldat = pyfits.open(lfile) elif isinstance(linefile, pyfits.hdu.hdulist.HDUList): # no need to do anything, file is already open ldat = linefile else: print "Unknown data type for linefile. Please pass a string or an HDUList" return -1 if util.keyword_check(cocofile): if isinstance(cocofile, basestring): cfile = os.path.expandvars(cocofile) if not os.path.isfile(cfile): print "*** ERROR: no such file %s. Exiting ***" %(cfile) return -1 cdat = pyfits.open(cfile) elif isinstance(cocofile, pyfits.hdu.hdulist.HDUList): # no need to do anything, file is already open cdat = cocofile else: print "Unknown data type for cocofile. Please pass a string or an HDUList" return # lfile = os.path.expandvars(linefile) # cfile = os.path.expandvars(cocofile) # if not os.path.isfile(lfile): # print "*** ERROR: no such file %s. Exiting ***" %(lfile) # return -1 # if not os.path.isfile(cfile): # print "*** ERROR: no such file %s. Exiting ***" %(cfile) # return -1 # open the files # ldat = pyfits.open(lfile) # cdat = pyfits.open(cfile) # get the index if ((index < 2) | (index > len(ldat))): print "*** ERRROR: Index must be in range %i to %i"%(2, len(ldat)-1) return -1 lldat = ldat[index].data ccdat = cdat[index].data if not util.keyword_check(elements): Zl = util.unique(lldat['element']) Zc = util.unique(ccdat['Z']) Zlist = util.unique(numpy.append(Zl,Zc)) else: Zlist = elements if not util.keyword_check(abund): abund= numpy.ones(len(Zlist)) lspectrum = numpy.zeros(len(bins)-1, dtype=float) cspectrum = numpy.zeros(len(bins)-1, dtype=float) if dolines: for iZ, Z in enumerate(Zlist): # ADD LINES lspectrum += add_lines(Z, abund[iZ], lldat, ebins, broadening=broadening, broadenunits=broadenunits) if docont | dopseudo: for iZ, Z in enumerate(Zlist): # ADD CONTINUUM cspectrum += make_ion_index_continuum(ebins, Z, cocofile=ccdat,\ binunits=binunits, no_coco=-docont,\ no_pseudo=-dopseudo)*abund[iZ] # broaden the continuum if required: if broadening: cspectrum = broaden_continuum(ebins, cspectrum, binunits = binunits, \ broadening=broadening,\ broadenunits=broadenunits) if dummyfirst: return numpy.append([0], cspectrum+lspectrum) else: return cspectrum+lspectrum
def unique_channels(self, channel_filter=None): return unique((channel for lineup_map in self for channel in lineup_map.channels if channel_filter is None or channel.channel in channel_filter), lambda c: c.get_unique_id())
def get_program_ids(self): return unique(broadcast.program_id for schedule in self for broadcast in schedule.broadcasts)
def process_markdown(input_markdown, output_name, latex_img_dir = "./", input_path = "./", thumb_size=64): """ Produces the html file, toc file, meta file and a list of (local_file, target_name) pairs where local_file is a file on the local system and target_name is the name of the file when placed in [output_name]/*. """ md = markdown.Markdown( extensions=[ 'meta' , 'codehilite' , 'tables' , 'def_list' , 'footnotes' , 'resourceextractor' , 'abstractextractor' , 'tocextractor' , 'mathjax' , 'latex' ] , extension_configs = { "resourceextractor": ( ("resource_dir",output_name) , ("relative_path",input_path) ), "latex": ( ("latex_img_dir",latex_img_dir) , ("input_path", input_path) ), } ) # Basic HTML conversion html = md.convert(input_markdown) # Generate table of contents toc = md.toc # Choose document title (default to the output name) title = output_name # Use the first heading if possible if len(toc) > 0: title = toc[0][1] # Better yet, get the explicitly given metadata title = md.Meta.get("title", [title])[0] # Choose document subtitle (only available from metadata) subtitle = md.Meta.get("subtitle", [None])[0] # Get the image from the metadata img = md.Meta.get("img", [None])[0] img_alt = md.Meta.get("img_alt", [title])[0] # The abstract should be taken to be the first paragraph. abstract = md.abstract if md.abstract is not None else "" # Get the list of tags tags = md.Meta.get("tags", []) # Get the show option show = md.Meta.get("show", ["True"])[0] == "True" files = md.resources # Add the article image to the list of files and create a thumbnail if # possible. if img is not None and img.startswith("file://"): img = os.path.join(input_path, img[len("file://"):]) img_output_name = "%s/%s"%(output_name, unique(os.path.basename(img), [f.split("/")[-1] for (_,f) in files])) img_thumbnail = "%s.thumb.png"%img p = Popen( ["convert" , img , "-thumbnail", "%dx%d"%(thumb_size,thumb_size) , img_thumbnail] , stdin = None , stdout = sys.stderr , stderr = sys.stderr ) if p.wait() != 0: raise Exception("Creating img thumbnail failed.") files.append((img_thumbnail, img_output_name)) img = img_output_name # Generate meta-data meta_data = { "url" : output_name, "title" : title, "subtitle" : subtitle, "img" : img, "img_alt" : img_alt, "abstract" : abstract, "tags" : tags, "show" : show, } return html, toc, meta_data, files
def libs(self): return list(unique(sum([c.libs for c in self.computations], [])))
fl = repo.file(f) lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]]) except: lr = None err(lr, _("in manifest but not in changeset"), f) ui.status(_("checking files\n")) storefiles = {} for f, f2, size in repo.store.datafiles(): if not f: err(None, _("cannot decode filename '%s'") % f2) elif size > 0: storefiles[f] = True files = util.sort(util.unique(filenodes.keys() + filelinkrevs.keys())) for f in files: lr = filelinkrevs[f][0] try: fl = repo.file(f) except error.RevlogError, e: err(lr, _("broken revlog! (%s)") % e, f) continue for ff in fl.files(): try: del storefiles[ff] except KeyError: err(lr, _("missing revlog!"), ff) checklog(fl, f)
def allgoal(s): g = goaleval(reify(goals[0], s)) return unique(interleave( goaleval(reify((lall,) + tuple(goals[1:]), ss))(ss) for ss in g(s)), key=dicthash)