Пример #1
0
    def create_term_file(self):
        kwargs = {}
        if 'oldCols' in self.options:
            kwargs['names'] = self.options['oldCols'].split(',')
        if 'read_csv' in self.options:
            for kv_str in self.options['read_csv'].split(','):
                kv = kv_str.split('=')
                kwargs[kv[0]] = kv[1]
                if kv[1] == 'None':
                    kwargs[kv[0]] = None
                if kv[0].lower() == 'skiprows':
                    kwargs[kv[0]] = int(kv[1])

        iter_csv = util.read_csv(self.fn_source,
                                 iterator=True,
                                 chunksize=self.get_chunksize(),
                                 dtype=str,
                                 **kwargs)
        term_id_col = 'term_id' if 'term_id' not in self.column_map else self.column_map[
            'term_id']
        term_ids = []
        for chunk in iter_csv:
            term_ids += util.unique(chunk[term_id_col])
        term_ids = util.unique(term_ids)

        with open(self.fn_dest, "w") as myfile:
            wr = csv.writer(myfile)
            wr.writerow(['term_id', 'term_name', 'term_type'])
            wr.writerows([[
                term_id, self.term_name if self.term_name else term_id,
                self.options['typeName']
            ] for term_id in term_ids])
Пример #2
0
def format(data):
    """Takes a list of MetricResults."""
    # Distinct values in data for forest plot variables:
    forestcombinations = util.combinations(
        [util.unique(x, data) for x in config.fgraphs])
    forestcombinations = [
        dict(zip(config.fgraphs, x)) for x in forestcombinations
    ]
    forestcombinations = filter(lambda x: x["benchmark"] in config.forests,
                                forestcombinations)
    # Distinct values in data for bar chart variables:
    barcombinations = util.combinations(
        [util.unique(x, data) for x in config.graphs])
    barcombinations = [dict(zip(config.graphs, x)) for x in barcombinations]
    barcombinations = filter(lambda x: not x["benchmark"] in config.forests,
                             barcombinations)
    # Filter out omitted configurations.
    allcombinations = forestcombinations + barcombinations
    for omit in config.omit:
        sieve = lambda x: not util.all(
            [x[y] == omit[y] for y in omit.keys() if x.has_key(y)])
        allcombinations = filter(sieve, allcombinations)
    # Create the graph objects.
    figures = []
    for configuration in allcombinations:
        if configuration in forestcombinations:
            graphtype = ForestPlot
        else:
            graphtype = BarChart
        sorteddata = graphtype.sortdata(data, configuration)
        if sorteddata: graph = graphtype(sorteddata)
        else: continue
        graph.create()
        figures.append(graph)
    return figures
Пример #3
0
    def do_one_chunk(self, chunk):
        rows = []
        data_key_col = self.key_col if self.key_col not in self.column_map else self.column_map[
            self.key_col]
        data_value_col = self.value_col if self.value_col not in self.column_map else self.column_map[
            self.value_col]
        tax_id_col = 'tax_id' if 'tax_id' not in self.column_map else self.column_map[
            'tax_id']

        for k, g in chunk.groupby(data_key_col, as_index=False):
            #Tracer()()
            r = {}
            r[self.key_col] = self.get_term_prefix(self.key_col) + str(k)
            allids = self.get_term_prefix(
                self.value_col) + g[data_value_col].astype(str)
            if self.value_col == 'gid':
                allids = [x for x in util.unique(allids) if str(x).isdigit()]
            else:
                allids = util.unique(allids)
            r[self.new_value_col] = ','.join(allids)
            r['id_count'] = len(allids)
            r['ds'] = self.ds
            if hasattr(g.iloc[0], tax_id_col):
                all_tax_ids = g[tax_id_col].astype(str)
                r['tax_id'] = ','.join(util.unique(all_tax_ids))
            else:
                r['tax_id'] = '9606'
            rows.append(r)

        return rows
Пример #4
0
def format(data):
    """Takes a list of MetricResults."""
    # Distinct values in data for forest plot variables:
    forestcombinations = util.combinations([util.unique(x, data) for x in config.fgraphs])
    forestcombinations = [dict(zip(config.fgraphs, x)) for x in forestcombinations]
    forestcombinations = filter(lambda x: x["benchmark"] in config.forests, forestcombinations)
    # Distinct values in data for bar chart variables:
    barcombinations = util.combinations([util.unique(x, data) for x in config.graphs])
    barcombinations = [dict(zip(config.graphs, x)) for x in barcombinations]
    barcombinations = filter(lambda x: not x["benchmark"] in config.forests, barcombinations)
    # Filter out omitted configurations.
    allcombinations = forestcombinations + barcombinations
    for omit in config.omit:
        sieve = lambda x: not util.all([x[y] == omit[y] for y in omit.keys() if x.has_key(y)])
        allcombinations = filter(sieve, allcombinations)
    # Create the graph objects.
    figures = []
    for configuration in allcombinations:
        if configuration in forestcombinations:
            graphtype = ForestPlot
        else:
            graphtype = BarChart
        sorteddata = graphtype.sortdata(data, configuration)
        if sorteddata:
            graph = graphtype(sorteddata)
        else:
            continue
        graph.create()
        figures.append(graph)
    return figures
Пример #5
0
    def _input_outputs(self, canonicalize=identity):
        """ Find the inputs and outputs of the complete computation """
        allin = map(canonicalize, unique(chain(*[c.inputs
                                                for c in self.computations])))
        allout = map(canonicalize, unique(chain(*[c.outputs
                                                for c in self.computations])))

        inputs  = remove(allout.__contains__, allin)
        outputs = remove(allin.__contains__, allout)
        ident_inputs  = [i for c in self.computations if isinstance(c, Identity)
                           for i in c.inputs]
        ident_outputs = [o for c in self.computations if isinstance(c, Identity)
                           for o in c.outputs]
        return tuple(inputs + ident_inputs), tuple(outputs + ident_outputs)
Пример #6
0
    def duplicates(self, tuples):
        """
        Takes a list of tuples, and for each tuple that occurs mutiple times
        marks all but one of the occurences (in the mask that is returned).

        :param tuples: A size (batch, k, rank) tensor of integer tuples
        :return: A size (batch, k) mask indicating the duplicates
        """
        b, k, r = tuples.size()

        # unique = ((tuples.float() + 1) ** primes).prod(dim=2)  # unique identifier for each tuple
        unique = util.unique(tuples.view(b * k, r)).squeeze().view(b, k)

        sorted, sort_idx = torch.sort(unique, dim=1)
        _, unsort_idx = torch.sort(sort_idx, dim=1)

        mask = sorted[:, 1:] == sorted[:, :-1]
        # mask = mask.view(b, k - 1)

        zs = torch.zeros(b,
                         1,
                         dtype=torch.uint8,
                         device='cuda' if tuples.is_cuda else 'cpu')
        mask = torch.cat([zs, mask], dim=1)

        return torch.gather(mask, 1, unsort_idx)
Пример #7
0
def do(sequences):

    L = len(sequences[0])  # length of aligned sequences
    N = len(sequences)  # number of sequences

    weights = []
    for i in range(L):
        aa = check_mutation_position.do(sequences, i)
        freq = util.calc_frequency(aa)

        uniq_aa = util.unique(aa)

        # compute sequence weights
        w = []
        for i in range(N):
            if aa[i] == '-' or aa[i] == 'X':
                w.append(0)
            else:
                if aa[i] == 'B':
                    aa[i] = 'N'
                elif aa[i] == 'Z':
                    aa[i] = 'Q'
                w.append(1.0 / (len(uniq_aa) * freq[aa[i]]))

        weights.append(w)  # N x L matrix, for each position find the weights

    # compute average of w over all positions
    avg_weight = np.zeros(N)
    for i in range(L):
        avg_weight += np.array(weights[i])
    avg_weight *= 1.0 / L

    return avg_weight
Пример #8
0
def deduplicated_materials(gltf):
    """
    重複マテリアルを排除する
    :param gltf: glTFオブジェクト
    :return: 重複排除後のglTFオブジェクト
    """
    gltf = deepcopy(gltf)

    # VRMマテリアルを元に重複排除
    vrm = gltf['extensions']['VRM']
    # マテリアル名 -> 重複元マテリアル名の対応
    unique_name_map = dict(unique_vrm_materials(vrm['materialProperties']))
    unique_material_name_set = unique(unique_name_map.values())

    # マテリアル名 -> マテリアルの対応
    name2materials = {m['name']: m for m in gltf['materials']}
    name2vrm_materials = {m['name']: m for m in vrm['materialProperties']}

    # マテリアルの重複排除
    gltf['materials'] = [
        name2materials[name] for name in unique_material_name_set
    ]
    vrm['materialProperties'] = [
        name2vrm_materials[name] for name in unique_material_name_set
    ]

    # プリミティブのマテリアル重複排除
    for mesh in gltf['meshes']:
        for primitive in mesh['primitives']:
            # プリミティブの材質を置換
            new_name = unique_name_map[primitive['material']['name']]
            primitive['material'] = name2materials[new_name]

    return gltf
Пример #9
0
def clean_buffer_views(gltf):
    """
    未使用バッファービューを削除したバッファービューリストを返す
    :param gltf: glTFオブジェクト
    :return: 新しいバッファービューリスト
    """
    return unique(list_buffer_views(gltf))
Пример #10
0
def make_target_map(target_strs):
    """Return mapping from target strings to numeric values."""
    target_map = {}
    unique_target_strs = unique(target_strs)
    # Special case: None always maps to None (absent targets).
    include_none = False
    if None in unique_target_strs:
        unique_target_strs.remove(None)
        include_none = True
    # By convention, always map "O" to 0 (IOB-like tags).
    # TODO: check that unique_target_strs is IOB-like tagging.
    next_idx = 0
    if 'O' in unique_target_strs:
        target_map['O'] = next_idx
        unique_target_strs.remove('O')
        next_idx += 1
    for t in unique_target_strs:
        target_map[t] = next_idx
        next_idx += 1
    # Convert to one-hot
    for k in target_map:
        one_hot = np.zeros(len(target_map))
        one_hot[target_map[k]] = 1
        target_map[k] = one_hot
    if include_none:
        target_map[None] = None
    return Bidict(target_map)
Пример #11
0
def clean_textures(gltf):
    """
    未使用テクスチャを削除したテクスチャリストを返す
    :param gltf: glTFオブジェクト
    :return: 新しいテクスチャリスト
    """
    return unique(list_textures(gltf))
Пример #12
0
def renderNonexistingImages(latexCodeList, charheightpx, alignfudge, resfudge, **kw):
    """ take a list of strings of latex code, render the
    images that don't already exist.
    """
    latexTemplate = (kw.get('latexTemplate', defaultLatexTemplate) or
                     defaultLatexTemplate)
    m = re.search(r'\\documentclass\[[^\]]*?(\d+)pt[^\]]*?\]', \
        latexTemplate)
    if m:
        charsizept = int(m.group(1))
    else:
        charsizept = 10
    res = charheightpx*ptperinch/charsizept*resfudge
    errors = ""
    codeToRender = filter(lambda x: imageDoesNotExist(x, charheightpx), unique(latexCodeList))
    if (not codeToRender): return
    unifiedCode = re.sub(r'^(\$|\\\()', r'\1|~ ', codeToRender[0])
    for code in codeToRender[1:len(codeToRender)]:
        unifiedCode = unifiedCode + '\n\\newpage\n' + re.sub(r'^(\$|\\\()', r'\1|~ ', code)
    try:
       runLatex(unifiedCode, res, charheightpx, latexTemplate)
    except LatexSyntaxError, data:
       errors = str(data)
       log(errors, 'LatexSyntaxError')
       # FIXME translate latex line number to source line number
       return escape(errors)
Пример #13
0
	def remap_resources(self, root, local_file_paths = None, desired_file_names = None):
		local_file_paths   = local_file_paths   if local_file_paths   is not None else []
		desired_file_names = desired_file_names if desired_file_names is not None else []
		
		for child in root:
			if child.tag in ResourceExtractorTreeprocessor.RESOURCE_TAGS:
				attrib = ResourceExtractorTreeprocessor.RESOURCE_TAGS[child.tag]
				file_path = child.attrib[attrib]
				if file_path.startswith("file://"):
					local_file_path = os.path.join( self.configs.get("relative_path",".")
					                              , file_path[len("file://"):]
					                              )
					desired_file_name = unique(os.path.basename(local_file_path), desired_file_names)
					
					child.attrib[attrib] = "%s/%s"%(self.configs["resource_dir"],desired_file_name)
					
					local_file_paths.append(local_file_path)
					desired_file_names.append(desired_file_name)
			
			# Recurse
			self.remap_resources(child, local_file_paths, desired_file_names)
		
		
		# Return with the resource dirs prefixed
		return zip(local_file_paths,
		           ( "%s/%s"%(self.configs["resource_dir"], dfn)
		             for dfn in desired_file_names)
		           )
Пример #14
0
    def do_one_chunk(self, chunk):
        rows = []
        if len(chunk) == 0:
            return rows

        key = [self.key_col]
        if self.key_col == 'term_id' and hasattr(chunk.iloc[0], 'tax_id'):
            key.append('tax_id')

        for k, g in chunk.groupby(key, as_index=False):

            row = {}
            if self.key_col == 'gid':
                row[self.key_col] = k
            else:
                row[self.key_col] = self.get_term_prefix(self.key_col) + k[0]
            allids = self.get_term_prefix(
                self.value_col) + g[self.value_col].astype(str)
            allids = util.unique(allids)
            row[self.new_value_col] = ','.join(allids)
            row['id_count'] = len(allids)
            row['ds'] = self.ds
            if hasattr(g.iloc[0], 'tax_id'):
                row['tax_id'] = g.iloc[0]['tax_id']
            else:
                row['tax_id'] = None
            row['term_category_id'] = self.get_type_col_value()
            rows.append(row)
        return rows
Пример #15
0
def clean_accesors(gltf):
    """
    未使用アクセッサーを削除したアクセッサーリストを返す
    :param gltf: glTFオブジェクト
    :return: 新しいアクセッサーリスト
    """
    return unique(list_accessors(gltf))
Пример #16
0
def deduplicated_materials(gltf):
    """
    重複マテリアルを排除する
    :param gltf: glTFオブジェクト
    :return: 重複排除後のglTFオブジェクト
    """
    gltf = deepcopy(gltf)
    vrm = gltf['extensions']['VRM']

    # VRMマテリアルを元に重複排除
    # マテリアル名 -> 重複元マテリアルの対応マップ
    vrm_material_map = dict(unique_materials(vrm['materialProperties']))
    # VRMマテリアルの重複排除
    vrm['materialProperties'] = unique(vrm_material_map.values())

    # マテリアル名 -> 重複元マテリアル名の対応マップ
    unique_name_map = {k: v['name'] for k, v in vrm_material_map.items()}
    # マテリアル名 -> マテリアルの対応マップ
    materials_name_map = {m['name']: m for m in gltf['materials']}
    # プリミティブからマテリアルの重複を排除する
    for mesh in gltf['meshes']:
        for primitive in mesh['primitives']:
            # 重複排除後のマテリアルで更新
            name = primitive['material']['name']
            new_name = unique_name_map[name]
            primitive['material'] = materials_name_map[new_name]

    # マテリアルの重複排除(VRMマテリアルと同じ順番にすることに注意)
    gltf['materials'] = [materials_name_map[vm['name']] for vm in vrm['materialProperties']]

    return gltf
Пример #17
0
def renderNonexistingImages(latexCodeList, charheightpx, alignfudge, resfudge, **kw):
    """ take a list of strings of latex code, render the
    images that don't already exist.
    """
    from string import join

    res = int(round(charheightpx*ptperinch/charsizept*resfudge))
    errors = ""
    latexTemplate = (kw.get('latexTemplate', defaultLatexTemplate) or
                     defaultLatexTemplate)
    
    codeToRender = filter(lambda x: imageDoesNotExist(x, charheightpx), unique(latexCodeList))
    
    if (not codeToRender): return

#    unifiedCode = re.sub(r'^(\$|\\\()', r'\1\cdot ', codeToRender[0])
#    for code in codeToRender[1:len(codeToRender)]:
#        unifiedCode = unifiedCode + '\n\\newpage\n' + re.sub(r'^(\$|\\\()', r'\1\cdot ', code)

    unifiedCode = codeToRender[0]
    for code in codeToRender[1:len(codeToRender)]:
        unifiedCode = unifiedCode + '\n\\newpage\n' + code

    try:
       runLatex(unifiedCode, charheightpx, latexTemplate)
    except LatexSyntaxError, data:
       errors = str(data)
       log(errors, 'LatexSyntaxError')
       return escape(errors)
Пример #18
0
 def get_none_stock_barcode(self, jd):
     """
     1. [入库]和[出库]操作,条形码不存在
     2. [出库]操作,条形码对应得库存数量为0
     3. [出库]操作,条形码对应得出库数量不得大于库存数量
     """
     for barcode in util.unique(jd["barcodeLines"]):
         stock = Stock.query.filter_by(
             barcode=barcode, warehouse_id=jd["warehouse_id"]).first()
         if not stock:
             return {"title": "条形码不存在", "content": barcode}
         if jd["method"] == "flow-out":
             if not stock.quantity:
                 return {
                     "title": "库存[%s]数量为0" % stock.name,
                     "content": "条形码:%s" % (barcode)
                 }
             elif Counter(jd["barcodeLines"])[barcode] > stock.quantity:
                 return {
                     "title":
                     "库存[%s]出库数量不得大于库存数量[%d]" %
                     (stock.name, stock.quantity),
                     "content":
                     "条形码:%s" % (barcode)
                 }
Пример #19
0
    def biogrid(self, l_human_only=False):
        fn_source = os.path.join(SyncDB.DOWNLOAD_DIR(),
                                 "BIOGRID-ALL-3.4.134.tab2.txt")
        if not os.path.exists(fn_source):
            urllib.urlretrieve(
                "http://thebiogrid.org/downloads/archives/Release%20Archive/BIOGRID-3.4.134/BIOGRID-ALL-3.4.134.tab2.zip",
                os.path.join(SyncDB.DOWNLOAD_DIR(),
                             "BIOGRID-ALL-3.4.134.tab2.zip"))
            cmd = "unzip " + os.path.join(SyncDB.DOWNLOAD_DIR(),
                                          "BIOGRID-ALL-3.4.134.tab2.zip"
                                          ) + " -d " + SyncDB.DOWNLOAD_DIR()
            print cmd
            util.unix(cmd)

        t = pd.read_table(fn_source, dtype=str)
        #print t.header()
        #['#BioGRID Interaction ID', 'Entrez Gene Interactor A', 'Entrez Gene Interactor B', 'BioGRID ID Interactor A', 'BioGRID ID Interactor B', 'Systematic Name Interactor A', 'Systematic Name Interactor B', 'Official Symbol Interactor A', 'Official Symbol Interactor B', 'Synonyms Interactor A', 'Synonyms Interactor B', 'Experimental System', 'Experimental System Type', 'Author', 'Pubmed ID', 'Organism Interactor A', 'Organism Interactor B', 'Throughput', 'Score', 'Modification', 'Phenotypes', 'Qualifications', 'Tags', 'Source Database']
        print util.unique(t['Experimental System Type'])
        #t=t[(t['Organism Interactor A']=='9606') & (t['Organism Interactor B']=='9606')]
        t.rename2({
            'Entrez Gene Interactor A': 'gid_A',
            'Entrez Gene Interactor B': 'gid_B',
            'Experimental System Type': 'interaction_category',
            'Experimental System': 'interaction_type',
            'Pubmed ID': 'support',
            'Source Database': 'ds',
            'Organism Interactor A': 'tax_id_A',
            'Organism Interactor B': 'tax_id_B',
            'Score': 'score'
        })
        #print t.header()
        t['interaction_type_id'] = 2
        t = t[[
            'gid_A', 'gid_B', 'tax_id_A', 'tax_id_B', 'interaction_type_id',
            'interaction_category', 'interaction_type', 'score', 'support',
            'ds'
        ]]
        t = t[(t.gid_A != '-') & (t.gid_B != '-')]
        t['gid_A'] = t.gid_A.astype(int)
        t['gid_B'] = t.gid_B.astype(int)
        t['tax_id_A'] = t.tax_id_A.astype(int)
        t['tax_id_B'] = t.tax_id_B.astype(int)
        t['ds'] = 'BioGrid'
        t = t[(t.gid_A != t.gid_B) & (t.tax_id_A == t.tax_id_B) & (t.gid_A > 0)
              & (t.gid_B > 0)].copy()
        self.bio = t
        return self.bio
Пример #20
0
    def get_tissue_specific(self):
        if not path.isfile(
                os.path.join(SyncDB.DOWNLOAD_DIR(), "gene2refseq.gz")):
            urllib.urlretrieve(
                "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2refseq.gz",
                os.path.join(SyncDB.DOWNLOAD_DIR(), "gene2refseq.gz"))

        ref2gene = util.read_csv(
            os.path.join(SyncDB.DOWNLOAD_DIR(), "gene2refseq.gz"),
            skiprows=1,
            header=None,
            sep='\t',
            names=[
                "tax_id", "GeneID", "status",
                "RNA_nucleotide_accession.version", "RNA_nucleotide_gi",
                "protein_accession.version", "protein_gi",
                "genomic_nucleotide_accession.version",
                "genomic_nucleotide_gi",
                "start_position_on_the_genomic_accession",
                "end_position_on_the_genomic_accession", "orientation",
                "assembly", "mature_peptide_accession.version",
                "mature_peptide_gi", "Symbol"
            ]).query('tax_id in [9606]')

        #Tracer()()
        self.ref2gene_map = {}
        for i in ref2gene.index:
            if ref2gene.at[i, 'RNA_nucleotide_accession.version'] != '-':
                self.ref2gene_map[ref2gene.at[
                    i, 'RNA_nucleotide_accession.version'].split('.')
                                  [0]] = ref2gene.at[i, 'GeneID']

            if ref2gene.at[i, 'protein_accession.version'] != '-':
                self.ref2gene_map[ref2gene.at[i, 'protein_accession.version'].
                                  split('.')[0]] = ref2gene.at[i, 'GeneID']

            if ref2gene.at[i, 'genomic_nucleotide_accession.version'] != '-':
                self.ref2gene_map[ref2gene.at[
                    i, 'genomic_nucleotide_accession.version'].split('.')
                                  [0]] = ref2gene.at[i, 'GeneID']

        t_tissue = self.tissue_specific()
        t_tissue.rename2({'Tissue(s)': 'Tissue'})
        t_tissue['gene_id'] = t_tissue.RefSeq.apply(
            lambda x: self.ref2gene_map.get(x, 0))
        t_tissue = t_tissue.query('gene_id > 0')
        data = []
        for k, t_v in t_tissue.groupby('gene_id'):
            if k == 0: continue
            S = [x for x in t_v['Tissue'] if not pd.isnull(x)]
            s = " ".join(S)
            S = util.unique(s.split(" "))
            data.append({'gene_id': k, 'Tissues': ";".join(S)})
        t_tissue = pd.DataFrame(data)
        t_tissue['tax_id'] = '9606'
        #Tracer()()
        t_tissue.to_csv(self.fn_dest_tissue_specific, index=False)
        print "%d Tissue-specific Genes Fetched" % len(t_tissue)
Пример #21
0
 def build_go_term_count(self, file):
     goterm_count_map = {}
     gene2go = util.read_csv(file, sep=r'\t', 
             names=['tax_id','gene_id','term_id','type','description'])
       
     for k,g in gene2go.groupby('term_id', as_index=False):
         goterm_count_map[k] = len(util.unique(g['gene_id'].values))
     
     return goterm_count_map;
Пример #22
0
    def test_unique_recursion(self):
        """
        Reproducing observed recursion error
        :return:
        """

        # tensor of 6 1-tuples
        tuples = torch.tensor([[74], [75], [175], [246], [72], [72]])
        dup = util.unique(tuples)
Пример #23
0
    def do_update(self):
        print '##############################################################'
        download_url = 'http://mips.helmholtz-muenchen.de/corum/download/allComplexes.txt.zip'
        urllib.urlretrieve(download_url, self.fn_data)
        t = pd.read_table(self.fn_data)
        #print t.header()
        #['ComplexID', 'ComplexName', 'Organism', 'Synonyms', 'Cell line', 'subunits(UniProt IDs)', 'subunits(Entrez IDs)', 'Protein complex purification method', 'GO ID', 'GO description', 'FunCat ID', 'FunCat description', 'PubMed ID', 'subunits(Protein name)', 'subunits(Gene name)', 'subunits(Gene name syn)', 'Disease comment', 'Subunits comment', 'Complex comment', 'SWISSPROT organism']
        C_TAX = {'Rat': 10116, 'Human': 9606, 'Mouse': 10090}
        c_gene2tax = self.get_gene2tax()

        out_term = []
        out_gids = []
        for i in t.index:
            id = t.ix[i, 'ComplexID']
            s_go = t.ix[i, 'ComplexName']
            s_des = t.ix[i, 'Complex comment']
            if s_des == 'None':
                s_des = t.ix[i, 'GO description']
                if s_des == 'None':
                    s_des = s_go
            gids = t.ix[i, 'subunits(Entrez IDs)']
            gids = gids.replace(';', ',')
            S = gids.split(',')
            S = [x for x in S if x in c_gene2tax]
            S_tax = [c_gene2tax[x] for x in S]
            l_new_term = True
            for s_tax in util.unique(S_tax):
                S_gid = [S[i] for i, x in enumerate(S_tax) if x == s_tax]
                n = len(S_gid)
                if n < 3:
                    #print "Too few proteins: ", id, S
                    continue
                if l_new_term:
                    out_term.append({
                        'term_id': 'CORUM:%d' % id,
                        'term_name': s_go,
                        'description': s_des
                    })
                    l_new_term = False
                for gid in S_gid:
                    out_gids.append({
                        'gid': gid,
                        'term_id': 'CORUM:%d' % id,
                        'term_name': s_go,
                        'type_name': 'CORUM',
                        'tax_id': s_tax
                    })
        #gid, term_id, term_name, type_name, tax_id

        t_term = pd.DataFrame(out_term)
        t_term.to_csv(self.fn_dest_go_term, index=False)
        print "Number of Complexes: %d" % len(t_term)
        t_gids = pd.DataFrame(out_gids)
        t_gids.to_csv(self.fn_gene_term_pair, index=False)
        #print t_gids.header()
        print util.unique_count(t_gids['tax_id'].values)
Пример #24
0
def main():
    voc = util.Voc(init_from_file="data/voc_b.txt")
    netR_path = 'output/rf_dis.pkg'
    netG_path = 'output/net_p'
    netD_path = 'output/net_d'
    agent_path = 'output/net_gan_%d_%d_%dx%d' % (SIGMA * 10, BL * 10,
                                                 BATCH_SIZE, MC)

    netR = util.Environment(netR_path)

    agent = model.Generator(voc)
    agent.load_state_dict(T.load(netG_path + '.pkg'))

    df = pd.read_table('data/CHEMBL251.txt')
    df = df[df['PCHEMBL_VALUE'] >= 6.5]
    data = util.MolData(df, voc)
    loader = DataLoader(data,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        drop_last=True,
                        collate_fn=data.collate_fn)

    netD = model.Discriminator(VOCAB_SIZE, EMBED_DIM, FILTER_SIZE, NUM_FILTER)
    if not os.path.exists(netD_path + '.pkg'):
        Train_dis_BCE(netD, agent, loader, epochs=100, out=netD_path)
    netD.load_state_dict(T.load(netD_path + '.pkg'))

    best_score = 0
    log = open(agent_path + '.log', 'w')
    for epoch in range(1000):
        print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
        print('\nPolicy Gradient Training Generator : ')
        Train_GAN(agent, netD, netR)

        print('\nAdversarial Training Discriminator : ')
        Train_dis_BCE(netD, agent, loader, epochs=1)

        seqs = agent.sample(1000)
        ix = util.unique(seqs)
        smiles, valids = util.check_smiles(seqs[ix], agent.voc)
        scores = netR(smiles)
        scores[valids == False] = 0
        unique = (scores >= 0.5).sum() / 1000
        if best_score < unique:
            T.save(agent.state_dict(), agent_path + '.pkg')
            best_score = unique
        print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" %
              (epoch, scores.mean(), valids.mean(), unique),
              file=log)
        for i, smile in enumerate(smiles):
            print('%f\t%s' % (scores[i], smile), file=log)

        for param_group in agent.optim.param_groups:
            param_group['lr'] *= (1 - 0.01)

    log.close()
Пример #25
0
 def generate_hall_of_heroes(self):
     # XXX should perhaps be ordered by sum of levels minus number of jobs?
     jobs = data.Job.all().order('-level').fetch(max_results)
     jobs = filter(lambda x: data.Character.by_user(x.owner).get().show_in_hall_of_heroes_p, jobs)
     return map(lambda x: { 'character': data.Character.by_user(x.owner).get(),
                            'archetype': x.archetype._static,
                            'primary_class': x,
                            'secondary_classes': filter(lambda y: y.key() != x.key(),
                                                        data.Job.by_user(x.owner).order('-level').fetch(max_results)) },
                util.unique(jobs,key_fn=lambda x: x.owner)[0:10])
Пример #26
0
 def anygoal(s):
     reifiedgoals = (reify(goal, s) for goal in goals)
     def f(goals):
         for goal in goals:
             try:
                 yield goaleval(goal)(s)
             except EarlyGoalError:
                 pass
     return unique(interleave(f(reifiedgoals), [EarlyGoalError]),
                   key=dicthash)
Пример #27
0
 def __new__(cls, *computations):
     computations = tuple(unique(computations))
     computations = exhaust(flatten)(computations)
     computations = exhaust(rm_identity)(computations)
     if len(computations) == 1:
         return computations[0]
     else:
         obj = object.__new__(cls)
         obj.computations = tuple(computations)
         return obj
Пример #28
0
def scrape(xml_path):
    """Scrape verb prefixed from the MW dictionary."""

    upasargas = set(UPASARGAS.splitlines())
    labels = ['name', 'prefix_type']
    regexp = 'root'

    rows = []
    for i, xml in enumerate(util.iter_mw_xml(xml_path, regexp=regexp)):
        key1 = xml.find('h/key1')
        key2 = xml.find('h/key2')
        entry = key1.text
        if not (entry.endswith('kf') or entry.endswith('BU')):
            continue

        # A root is prefixed iff it has a <root> element. Any matches without
        # one are almost certainly nominals, which we can disregard.
        root = key2.find('.//root')
        if root is None:
            continue

        # Remove lingering XML
        root.clear()
        key2.tag = None
        name = ET.tostring(key2)
        name = re.sub('(<.*?>)|/', '', name)

        # Remove groups ending in upasargas
        splits = [x for x in name.split('-') if x]
        last = splits[-1]
        if last in upasargas or make_tidy(last) in upasargas:
            continue


        # Add prefixes to the proper category
        name = ''.join(splits)
        _type = None

        if name[-1] in ('I', 'U'):
            _type = 'cvi'
        elif name.endswith('A'):
            _type = 'DAc'
        else:
            _type = 'other'

        # 'sampra' is suggested as a prefix. This is wrong.
        if name == 'sampra':
            continue

        rows.append((name, _type))

    rows = util.unique(rows, lambda x: x[0])
    rows.sort(key=lambda x: util.key_fn(x[0]))
    print util.make_csv_string(labels, rows)
Пример #29
0
 def is_header(row, l_allow_int=False):
     """Check if a row can be column header
     row: list, containing the cells of the first row
     l_allow_int: boolean, default False, whether int can be used as a column name
     return boolean"""
     if len(row) != len(util.unique(row)):
         return False
     for x in row:
         if type(x) is float or type(x) is complex: return False
         if (type(x) is int) and not l_allow_int: return False
         if not x: return False
     return True
Пример #30
0
def scrape(xml_path):
    """Scrape verb prefixed from the MW dictionary."""

    upasargas = set(UPASARGAS.splitlines())
    labels = ['name', 'prefix_type']
    regexp = 'root'

    rows = []
    for i, xml in enumerate(util.iter_mw_xml(xml_path, regexp=regexp)):
        key1 = xml.find('h/key1')
        key2 = xml.find('h/key2')
        entry = key1.text
        if not (entry.endswith('kf') or entry.endswith('BU')):
            continue

        # A root is prefixed iff it has a <root> element. Any matches without
        # one are almost certainly nominals, which we can disregard.
        root = key2.find('.//root')
        if root is None:
            continue

        # Remove lingering XML
        root.clear()
        key2.tag = None
        name = ET.tostring(key2)
        name = re.sub('(<.*?>)|/', '', name)

        # Remove groups ending in upasargas
        splits = [x for x in name.split('-') if x]
        last = splits[-1]
        if last in upasargas or make_tidy(last) in upasargas:
            continue

        # Add prefixes to the proper category
        name = ''.join(splits)
        _type = None

        if name[-1] in ('I', 'U'):
            _type = 'cvi'
        elif name.endswith('A'):
            _type = 'DAc'
        else:
            _type = 'other'

        # 'sampra' is suggested as a prefix. This is wrong.
        if name == 'sampra':
            continue

        rows.append((name, _type))

    rows = util.unique(rows, lambda x: x[0])
    rows.sort(key=lambda x: util.key_fn(x[0]))
    print util.make_csv_string(labels, rows)
Пример #31
0
    def anygoal(s):
        reifiedgoals = (reify(goal, s) for goal in goals)

        def f(goals):
            for goal in goals:
                try:
                    yield goaleval(goal)(s)
                except EarlyGoalError:
                    pass

        return unique(interleave(f(reifiedgoals), [EarlyGoalError]),
                      key=dicthash)
Пример #32
0
def find_optimum_threshold(X, y):
    '''This method finds optimum threshold as required.

    Args:
	X: score
	y: label

    Return Values:
	threshold: the optimum value as desired

    '''

    # save the data into 2-d array so that we can sort them
    # on the X values
    data = np.array([X, y])
    data = np.transpose(data)

    # metrics
    tps = []
    tns = []
    fps = []
    fns = []
    thresholds = []
    allthresh = util.unique(list(data[:, 0]))
    allthresh.sort()

    thresh = allthresh[0] - 0.000001
    tp, tn, fp, fn = calc_metrics(data, thresh)
    tps.append(tp)
    tns.append(tn)
    fps.append(fp)
    fns.append(fn)
    thresholds.append(thresh)

    for i in range(len(allthresh) - 1):
        thresh = (allthresh[i] + allthresh[i + 1]) / 2
        tp, tn, fp, fn = calc_metrics(data, thresh)
        tps.append(tp)
        tns.append(tn)
        fps.append(fp)
        fns.append(fn)
        thresholds.append(thresh)

    thresh = allthresh[-1] + 0.000001
    tp, tn, fp, fn = calc_metrics(data, thresh)
    tps.append(tp)
    tns.append(tn)
    fps.append(fp)
    fns.append(fn)
    thresholds.append(thresh)

    return thresholds, tps, tns, fps, fns
Пример #33
0
    def get_gene_disease_association(self):
        print 'Getting GeneGo disease association data'
        #df = self.fetch("select distinct a.ref as gene_id, a.disid, a.disname, a.note from (select ga.note_id, g17.ref, d.disid, d.disname, d.note from disease_associations_all_v ga, GeneDBS_17 g17, diseases d, geneorgs go1 where d.disid=ga.dis_id and ga.GENE_ID=g17.gene and go1.gene=ga.gene_id and go1.org=1) a");
        #Tracer()()
        fn = self.dir + "/gene_disease_association.csv"
        if not os.path.exists(fn):
            df = self.fetch(
                "select distinct d.disid TERM_ID, d.disname as TERM_NAME, gdb.ref as GID, d.note as description, orgs.taxonomyid as tax_id from gene_netw gn, genediss gd, diseases d, genedbs gdb, geneorgs o, orgs where gn.gene = gd.gene and gd.dis = d.disid and gn.gene=gdb.gene and gn.gene=o.gene and o.org=orgs.orgid and orgs.taxonomyid in ("
                + ','.join(self.taxidList) +
                ") and gdb.db=17 and d.rtyp > 0 and d.disname not like 'By %' and gd.dis in (select distinct dismbr from disrelflat where disgrp = -1173899567 and dismbr <> -1173899567)"
            )
            df['TERM_ID'] = 'gDIS' + df['TERM_ID'].map(str)
            df.rename2({
                "TERM_ID": "term_id",
                "TERM_NAME": "term_name",
                "GID": "gid",
                "DESCRIPTION": "description",
                "TAX_ID": "tax_id"
            })
            df['type_name'] = 'GeneGo Disease Association'
            #remove disease which has more than 500 genes
            df = df.drop(df.index[list(
                it.chain.from_iterable([
                    g for k, g in df.groupby('term_id').groups.items()
                    if len(g) >= 500
                ]))])
            df.to_csv(fn, index=False)
        else:
            df = util.read_csv(fn)
        df1 = pd.DataFrame(
            df.copy())[['gid', 'term_id', 'term_name', 'type_name', 'tax_id']]
        df2 = pd.DataFrame(
            df.copy())[['term_id', 'term_name', 'type_name', 'description']]
        df2 = df2.drop_duplicates()
        self.disease_gid2term = df1
        self.disease_terms = df2
        self.disease_done = True

        data = []
        for k, t_v in df.groupby('gid'):
            S = util.unique([x for x in t_v['term_name'] if not pd.isnull(x)])
            data.append({
                'gid': k,
                'content': "; ".join(S),
                'type_name': t_v['type_name'].values[0],
                'annotation_field1': len(S),
                'tax_id': str(int(t_v['tax_id'].values[0]))
            })

        self.disease_annotations = pd.DataFrame(data)

        print 'GeneGo disease association data captured'
Пример #34
0
def remote_query(rein, user, urls, log, query_type, distinct):
    '''
    Sends specific query to registered servers and filters for uniqueness
    '''
    res = []
    for url in urls:
        sel_url = "{0}query?owner={1}&query={2}&testnet={3}"
        data = safe_get(log, sel_url.format(url, user.maddr, query_type, rein.testnet))
        if data is None or query_type not in data or len(data[query_type]) == 0:
            click.echo('None found')
            continue
        res += filter_and_parse_valid_sigs(rein, data[query_type])
    return unique(res, distinct)
Пример #35
0
def main():
    global Epsilon
    # Vocabulary containing all of the tokens for SMILES construction
    voc = util.Voc("data/voc.txt")
    # File path of predictor in the environment
    environ_path = 'output/RF_cls_ecfp6.pkg'
    # file path of hidden states in RNN for initialization
    initial_path = 'output/net_p'
    # file path of hidden states of optimal exploitation network
    agent_path = 'output/net_e_%.2f_%.1f_%dx%d' % (Epsilon, Baseline,
                                                   BATCH_SIZE, MC)
    # file path of hidden states of exploration network
    explore_path = 'output/net_p'

    # Environment (predictor)
    environ = util.Environment(environ_path)
    # Agent (generator, exploitation network)
    agent = model.Generator(voc)
    agent.load_state_dict(torch.load(initial_path + '.pkg'))

    # exploration network
    explore = model.Generator(voc)
    explore.load_state_dict(torch.load(explore_path + '.pkg'))

    best_score = 0
    log = open(agent_path + '.log', 'w')

    for epoch in range(1000):
        print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
        print('\nForward Policy Gradient Training Generator : ')
        Policy_gradient(agent, environ, explore=explore)
        seqs = agent.sample(1000)
        ix = util.unique(seqs)
        smiles, valids = util.check_smiles(seqs[ix], agent.voc)
        scores = environ(smiles)
        scores[valids == False] = 0
        unique = (scores >= 0.5).sum() / 1000
        # The model with best percentage of unique desired SMILES will be persisted on the hard drive.
        if best_score < unique:
            torch.save(agent.state_dict(), agent_path + '.pkg')
            best_score = unique
        print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" %
              (epoch, scores.mean(), valids.mean(), unique),
              file=log)
        for i, smile in enumerate(smiles):
            print('%f\t%s' % (scores[i], smile), file=log)

        # Learing rate exponential decay
        for param_group in agent.optim.param_groups:
            param_group['lr'] *= (1 - 0.01)
    log.close()
Пример #36
0
def run(n, x, *goals, **kwargs):
    """ Run a logic program.  Obtain n solutions to satisfy goals.

    n     - number of desired solutions.  See ``take``
            0 for all
            None for a lazy sequence
    x     - Output variable
    goals - a sequence of goals.  All must be true

    >>> from logpy import run, var, eq
    >>> run(1, x, eq(x, 1))
    (1,)
    """
    return take(n, unique(reify(x, s) for s in goaleval(lallearly(*goals))({})))
Пример #37
0
def run(n, x, *goals):
    """ Run a logic program.  Obtain n solutions to satisfy goals.

    n     - number of desired solutions.  See ``take``
            0 for all
            None for a lazy sequence
    x     - Output variable
    goals - a sequence of goals.  All must be true

    >>> from logpy import run, var, eq
    >>> run(1, x, eq(x, 1))
    (1,)
    """
    return take(n, unique(walkstar(x, s) for s in bindstar(({},), *goals)))
Пример #38
0
    def test_unique(self):
        r = util.unique(
            torch.tensor([[1, 2, 3, 4], [4, 3, 2, 1], [1, 2, 3, 4]]))

        self.assertEqual((3, 1), r.size())
        self.assertEqual(r[0], r[2])
        self.assertNotEqual(r[0], r[1])
        self.assertNotEqual(r[1], r[2])

        r = util.nunique(
            torch.tensor([[[1, 2, 3, 4], [4, 3, 2, 1], [1, 2, 3, 4]]]))

        self.assertEqual((1, 3), r.size())
        self.assertEqual(r[0, 0], r[0, 2])
        self.assertNotEqual(r[0, 0], r[0, 1])
        self.assertNotEqual(r[0, 1], r[0, 2])
Пример #39
0
def evaluate_binary_labeling(dataitems):
    gold = dataitems.target_strs
    pred = dataitems.prediction_strs
    labels = unique(chain(gold, pred))
    pos = _positive_label(labels)
    res = {}
    res['acc'] = accuracy(gold, pred)
    bcm = evaluate_binary_classification(gold, pred, pos)
    res.update(bcm._asdict())
    res['auc'] = skmetrics.roc_auc_score(dataitems.targets,
                                         dataitems.predictions)
    res['ap'] = skmetrics.average_precision_score(dataitems.targets,
                                                  dataitems.predictions)
    maxfp = max_f_point(dataitems)
    res.update({'maxf-{}'.format(k): v for k, v in maxfp._asdict().items()})
    return res
Пример #40
0
def run(n, x, *goals, **kwargs):
    """ Run a logic program.  Obtain n solutions to satisfy goals.

    n     - number of desired solutions.  See ``take``
            0 for all
            None for a lazy sequence
    x     - Output variable
    goals - a sequence of goals.  All must be true

    >>> from logpy import run, var, eq
    >>> x = var()
    >>> run(1, x, eq(x, 1))
    (1,)
    """
    return take(n,
                unique(reify(x, s) for s in goaleval(lallearly(*goals))({})))
Пример #41
0
	def allocate_anchors(self, headings):
		ids = []
		labels = []
		levels = []
		
		for heading in headings:
			# Pick an ID
			id = unique(slugify(heading.text, "-"), ids)
			
			# Assign the ID to the heading
			heading.attrib["id"] = id
			
			# Record it
			ids.append(id)
			labels.append(heading.text)
			levels.append(int(heading.tag[1]))
		
		return zip(levels, labels, ids)
Пример #42
0
def token_evaluator(dataset, label=None, writer=None, mapper=None,
                    config=defaults):
    """Return appropriate evaluator callback for dataset."""
    if config.token_level_eval:
        evaluator = TokenLevelEvaluator
    elif is_iob_tagging(unique(dataset.tokens.target_strs)):
        evaluator = ConllEvaluator
    else:
        evaluator = TokenLevelEvaluator    # default
    info('using {} for {}'.format(evaluator.__name__, dataset.name))

    callbacks = []
    callbacks.append(Predictor(dataset.tokens))
    callbacks.append(evaluator(dataset, label=label, writer=writer))
    if mapper is not None:
        # TODO don't assume the mapper expects sentences.
        callbacks.append(PredictionMapper(dataset.sentences, mapper))
        # TODO do we really want a second eval here?
        callbacks.append(evaluator(dataset, label=label, writer=writer))
    return CallbackChain(callbacks)
Пример #43
0
    def __init__(self, repo, parents, text, files, filectxfn, user=None,
                 date=None, extra=None):
        self._repo = repo
        self._rev = None
        self._node = None
        self._text = text
        self._date = date and util.parsedate(date) or util.makedate()
        self._user = user
        parents = [(p or nullid) for p in parents]
        p1, p2 = parents
        self._parents = [changectx(self._repo, p) for p in (p1, p2)]
        files = util.sort(util.unique(files))
        self._status = [files, [], [], [], []]
        self._filectxfn = filectxfn

        self._extra = extra and extra.copy() or {}
        if 'branch' not in self._extra:
            self._extra['branch'] = 'default'
        elif self._extra.get('branch') == '':
            self._extra['branch'] = 'default'
Пример #44
0
def ParseGetEventSubTreeNoSelections(resp):

    _check_errors(resp)
    
    allmarkets = []
    markets = []
    # go through each event class in turn, an event class is
    # e.g. 'Rugby Union','Formula 1', etc.
    # slight trick here:
    # if we only polled a single event class, then resp[2] is
    # not a list, so we need to convert it to a list
    if isinstance(resp[2], list):
        data = resp[2]
    else:
        data = [resp[2]]
    for evclass in data:
        _ParseEventClassifier(evclass,'', markets)
        allmarkets = allmarkets + markets
    # hack: currently markets are duplicated multiple times (is this
    # an API error?); we want only unique markets here
    umarkets = util.unique(allmarkets)
    return umarkets
Пример #45
0
def permuteq(a, b, eq2=eq):
    """ Equality under permutation

    For example (1, 2, 2) equates to (2, 1, 2) under permutation
    >>> from logpy import var, run, permuteq
    >>> x = var()
    >>> run(0, x, permuteq(x, (1, 2)))
    ((1, 2), (2, 1))

    >>> run(0, x, permuteq((2, 1, x), (2, 1, 2)))
    (2,)
    """
    if isinstance(a, tuple) and isinstance(b, tuple):
        if len(a) != len(b):
            return fail
        elif set(a) == set(b) and len(set(a)) == len(a):
            return success
        else:
            c, d = a, b
            try:
                c, d = tuple(sorted(c)), tuple(sorted(d))
            except:
                pass
            if len(c) == 1:
                return (eq2, c[0], d[0])
            return condeseq((((eq2, c[i], d[0]), (permuteq, c[0:i] + c[i + 1 :], d[1:], eq2)) for i in range(len(c))))

    if isvar(a) and isvar(b):
        raise EarlyGoalError()

    if isvar(a) or isvar(b):
        if isinstance(b, tuple):
            c, d = a, b
        elif isinstance(a, tuple):
            c, d = b, a

        return (condeseq, ([eq(c, perm)] for perm in unique(it.permutations(d, len(d)))))
Пример #46
0
    def statwalk(self, files=None, match=util.always, unknown=True,
                 ignored=False, badmatch=None, directories=False):
        '''
        walk recursively through the directory tree, finding all files
        matched by the match function

        results are yielded in a tuple (src, filename, st), where src
        is one of:
        'f' the file was found in the directory tree
        'd' the file is a directory of the tree
        'm' the file was only in the dirstate and not in the tree
        'b' file was not found and matched badmatch

        and st is the stat result if the file was found in the directory.
        '''

        # walk all files by default
        if not files:
            files = ['.']
            dc = self._map.copy()
        else:
            files = util.unique(files)
            dc = self._filter(files)

        def imatch(file_):
            if file_ not in dc and self._ignore(file_):
                return False
            return match(file_)

        # TODO: don't walk unknown directories if unknown and ignored are False
        ignore = self._ignore
        dirignore = self._dirignore
        if ignored:
            imatch = match
            ignore = util.never
            dirignore = util.never

        # self._root may end with a path separator when self._root == '/'
        common_prefix_len = len(self._root)
        if not util.endswithsep(self._root):
            common_prefix_len += 1

        normpath = util.normpath
        listdir = osutil.listdir
        lstat = os.lstat
        bisect_left = bisect.bisect_left
        isdir = os.path.isdir
        pconvert = util.pconvert
        join = os.path.join
        s_isdir = stat.S_ISDIR
        supported = self._supported
        _join = self._join
        known = {'.hg': 1}

        # recursion free walker, faster than os.walk.
        def findfiles(s):
            work = [s]
            wadd = work.append
            found = []
            add = found.append
            if directories:
                add((normpath(s[common_prefix_len:]), 'd', lstat(s)))
            while work:
                top = work.pop()
                entries = listdir(top, stat=True)
                # nd is the top of the repository dir tree
                nd = normpath(top[common_prefix_len:])
                if nd == '.':
                    nd = ''
                else:
                    # do not recurse into a repo contained in this
                    # one. use bisect to find .hg directory so speed
                    # is good on big directory.
                    names = [e[0] for e in entries]
                    hg = bisect_left(names, '.hg')
                    if hg < len(names) and names[hg] == '.hg':
                        if isdir(join(top, '.hg')):
                            continue
                for f, kind, st in entries:
                    np = pconvert(join(nd, f))
                    if np in known:
                        continue
                    known[np] = 1
                    p = join(top, f)
                    # don't trip over symlinks
                    if kind == stat.S_IFDIR:
                        if not ignore(np):
                            wadd(p)
                            if directories:
                                add((np, 'd', st))
                        if np in dc and match(np):
                            add((np, 'm', st))
                    elif imatch(np):
                        if supported(np, st.st_mode):
                            add((np, 'f', st))
                        elif np in dc:
                            add((np, 'm', st))
            found.sort()
            return found

        # step one, find all files that match our criteria
        files.sort()
        for ff in files:
            nf = normpath(ff)
            f = _join(ff)
            try:
                st = lstat(f)
            except OSError, inst:
                found = False
                for fn in dc:
                    if nf == fn or (fn.startswith(nf) and fn[len(nf)] == '/'):
                        found = True
                        break
                if not found:
                    if inst.errno != errno.ENOENT or not badmatch:
                        self._ui.warn('%s: %s\n' %
                                      (self.pathto(ff), inst.strerror))
                    elif badmatch and badmatch(ff) and imatch(nf):
                        yield 'b', ff, None
                continue
            if s_isdir(st.st_mode):
                if not dirignore(nf):
                    for f, src, st in findfiles(f):
                        yield src, f, st
            else:
                if nf in known:
                    continue
                known[nf] = 1
                if match(nf):
                    if supported(ff, st.st_mode, verbose=True):
                        yield 'f', nf, st
                    elif ff in dc:
                        yield 'm', nf, st
Пример #47
0
def write_prefix_groups(prefixed_roots, unprefixed_roots, upasargas, other,
                        sandhi_rules, out_path):
    """Parse the prefixes in a prefix root and write out the prefix groups.

    The procedure is roughly as follows:

        for each prefixed root in `prefixed_roots`:
            find (p_1, ..., p_n, r), where p_x is a prefix and r is a root
            write the prefix group (p_1, ..., p_n) to file.

    We find (p_1, .., p_n) by using the rules in `sandhi_rules` and verify
    that `p_x` is a prefix by checking for membership in `upasargas` and
    `other`.
    """

    # Loading prefixes
    all_prefixes = set()
    with util.read_csv(upasargas) as reader:
        all_prefixes.update([x['name'] for x in reader])
    with util.read_csv(other) as reader:
        all_prefixes.update([x['name'] for x in reader])

    # The 's' prefix is used in roots like 'saMskf' and 'parizkf'. Although it
    # is prefixed to a verb, it is not semantically the same as the other verb
    # prefixes. Here, though, we treat it as a verb prefix.
    all_prefixes.add('s')

    # Some prefixes have alternate forms.
    prefix_alternates = {
        'pi': 'api',
        'ut': 'ud',
        'Ri': 'ni',
        'niz': 'nis',
        'iz': 'nis',
        'palA': 'parA',
        'pali': 'pari',
        'z': 's',
    }
    all_prefixes.update(prefix_alternates.keys())

    # Loading sandhi rules
    sandhi = make_sandhi_object(sandhi_rules)

    with util.read_csv(prefixed_roots) as reader:
        rows = []
        for row in reader:
            # Nibble away at `prefixed_root` until we have all prefixes for the
            # given root.
            prefixes = []
            prefixed_root = row['prefixed_root']
            unprefixed_root = row['unprefixed_root']
            last_letter = None

            q = Queue.PriorityQueue()
            for remainder in sandhi.split_off(prefixed_root, unprefixed_root):
                q.put_nowait((0, (), remainder))

            while not q.empty():
                _, cur_prefixes, remainder = q.get_nowait()

                # `remainder` is something we recognize: we're done!
                if remainder in all_prefixes:
                    prefixes = list(cur_prefixes)
                    if remainder:
                        prefixes.append(remainder)
                        last_letter = remainder[-1]
                    break

                for before, after in sandhi.splits(remainder):
                    # Prevent recursion. As of this comment, the `splits` method
                    # returns the non-split of some term X as (X, ''). In other
                    # words, this conditional will *never* be true. But since the
                    # behavior of various functions is still unsettled, this check
                    # will stay here for the time being.
                    if after == remainder:
                        continue

                    if before in all_prefixes:
                        state = (cur_prefixes + (before,), after)
                        cost = len(after)

                        # Incentivize short vowels. This avoids errors with roots
                        # like "upodgrah" ("upa-ud-grah"). Without the incentive,
                        # we could have "upa-A-ud-grah" instead.
                        if before and before[-1] in 'aiufx':
                            cost -= 1
                        q.put_nowait((cost,) + state)

            # Convert 'alternate' prefixes back to their original forms.
            prefixes = [prefix_alternates.get(x, x) for x in prefixes]
            if not prefixes:
                # Occurs if the root's prefix is unrecognized
                continue

            # We still don't know the prefix group. We can find it by splitting
            # off the root and keeping whatever matches `last_letter`.
            for group in sandhi.split_off(prefixed_root, unprefixed_root):
                if group[-1] == last_letter:
                    break
            prefix_string = '-'.join(prefixes)
            rows.append((group, prefix_string))

    labels = ['group', 'prefixes']
    with util.write_csv(out_path, labels) as write_row:
        for row in util.unique(rows):
            datum = dict(zip(labels, row))
            write_row(datum)
Пример #48
0
 def UniqueLines(self):
     self.SelectLines()
     lines = self.GetSelectedText().split("\n")
     lines = unique(lines)
     self.ReplaceSelectionAndSelect("\n".join(lines))
Пример #49
0
 def includes(self):
     return list(unique(sum([c.includes for c in self.computations], [])))
Пример #50
0
def list_nei_lines(specrange, Te, tau, Te_init=1e4,  lldat=False, linefile=False,\
              units='angstroms', teunit='K', minepsilon=1e-20, \
              datacache=False):
  """
  Gets list of the lines in a given spectral range for a given NEI plasma
  
  For speed purposes, this takes the nearest temperature tabulated in the
  linefile, and applies the exact ionization balance as calculated to this.
  This is not perfect, but should be good enough.
  
  Note that the output from this can be passed directly to print_lines
  
  
  Parameters
  ----------
  specrange : [float,float]
    spectral range [min,max] to return lines on
  Te : float
    electron temperature
  tau : float
    electron density * time (cm^-3 s)
  Te_init : float
    initial ionization balance temperature
  lldat : see notes
    line data
  linefile : see notes
    line data file, see notes
  units : {'A' , 'keV'}
    units of specrange (default A)
  teunit : {'K' , 'keV'}
    units of temperatures (default K)
  minepsilon : float
    minimum emissivity (ph cm^3 s^{-1}) for inclusion in linelist
  
  Notes
  -----
  The actual line list can be defined in one of several ways:

  specrange = [10,100]

  1. lldat as an actual list of lines::

       a = pyfits.open('apec_nei_line.fits')
       llist = a[30].data
       l = list_nei_lines(specrange, lldat=llist)

  2. lldat as a numpy array of lines::

       a = pyfits.open('apec_nei_line.fits')
       llist = numpy.array(a[30].data)
       l = list_nei_lines(specrange, lldat=llist)

  3. lldat is a BinTableHDU from pyfits::

       a = pyfits.open('apec_nei_line.fits')
       llist = numpy.array(a[30])
       l = list_nei_lines(specrange, lldat=llist)

  4. lldat is a HDUList from pyfits. In this case index must also be set::

       a = pyfits.open('apec_nei_line.fits')
       index = 30
       l = list_nei_lines(specrange, lldat=a, index=index)

  5. lldat NOT set, linefile contains apec_line.fits file location, index 
     identifies the HDU::

       linefile = 'mydir/apec_v3.0.2_nei_line.fits'
       index = 30
       l = list_nei_lines(specrange, linefile=linefile, index=index)

  6. lldat NOT set & linefile NOT set, linefile is set to 
     $ATOMDB/apec_line.fits. index identifies the HDU::
       
       index = 30
       l = list_nei_lines(specrange, Te, tau)

  Returns
  -------
  linelist : dtype=([('Lambda', '>f4'), \
           ('Lambda_Err', '>f4'), \
           ('Epsilon', '>f4'), \
           ('Epsilon_Err', '>f4'), \
           ('Element', '>i4'), \
           ('Elem_drv', '>i4'), \
           ('Ion', '>i4'), \
           ('Ion_drv', '>i4'), \
           ('UpperLev', '>i4'), \
           ('LowerLev', '>i4')])
     A line list filtered by the various elements.
  """

#  History
#  -------
#  Version 0.1 - initial release
#    Adam Foster November 02nd 2015
#
           
  # check the units
  
  if units.lower()=='kev':
    specrange = [const.HC_IN_KEV_A/specrange[1], const.HC_IN_KEV_A/specrange[0]]
  elif units.lower() in ['a', 'angstrom', 'angstroms']:
    specrange = specrange
  else:
    print "*** ERROR: unknown unit %s, Must be keV or A. Exiting ***"%\
          (units)

  if teunit.lower() == 'kev':
    kT = Te*1.0
  elif teunit.lower() == 'ev':
    kT = Te/1000.0
  elif teunit.lower() == 'k':
    kT = Te*const.KBOLTZ
  else:
    print "*** ERROR: unknown teunit %s, Must be keV or K. Exiting ***"%\
          (teunit)


  if Te_init != False:
    if teunit.lower() == 'kev':
      kT_init = Te_init*1.0
    elif teunit.lower() == 'ev':
      kT_init = Te_init/1000.0
    elif teunit.lower() == 'k':
      kT_init = Te_init*const.KBOLTZ
    else:
      print "*** ERROR: unknown teunit %s, Must be keV or K. Exiting ***"%\
          (teunit)


  # sort out the line file...
    
  if lldat != False:
    #options here:
    # (1) This is a line list, i.e. the ldata[index].data from a file,
    #     either in original pyfits format or a numpy array
    #
    # (2) This is an hdu from a file
    #
    # (3) This is a _line.fits file, and requires an index to make sense of it
    
    if type(lldat) == pyfits.hdu.hdulist.HDUList:
      # go get the index
      te_index = get_index(kT, filename=lldat, \
              teunits='keV', logscale=True)
      llist = numpy.array(lldat[te_index].data)
    elif type(lldat) == pyfits.hdu.table.BinTableHDU:
      # no need to get index
      llist = numpy.array(lldat.data)
    elif type(lldat) in [pyfits.fitsrec.FITS_rec, numpy.ndarray]:
      llist = numpy.array(lldat)
  else:
    # no line data supplied.
    if linefile==False:
      linefile = os.path.expandvars('$ATOMDB/apec_nei_line.fits')
    if not os.path.isfile(linefile):
      print "*** ERROR. Linefile %s is "%(linefile),
      print " not a file. Exiting"
    else:
      lldat = pyfits.open(os.path.expandvars(linefile))
      te_index = get_index(kT, filename=lldat, \
              teunits='keV', logscale=True)
      llist= numpy.array(lldat[te_index].data)

  # get filtered line list

  llist = llist[(llist['Lambda']>= specrange[0]) &\
                (llist['Lambda']<= specrange[1]) &\
                (llist['Epsilon'] >= minepsilon)]

  # get the index

  # get list of all the elements present
  Zlist = util.unique(llist['Element'])
  # Calculate the ionization balance.
  ionbal ={}
  for Z in Zlist:
    ionbal[Z] = apec.solve_ionbal_eigen(Z, kT, tau, Te_init = kT_init,\
                                        teunit='keV', datacache=datacache)
 
  # multiply everything by the appropriate ionization fraction
  if 'Elem_drv' in llist.dtype.names:
    
    for il in llist:
      il['Epsilon'] *= ionbal[il['Elem_drv']][il['Ion_drv']-1]
  else: 
    for il in llist:
      il['Epsilon'] *= ionbal[il['Element_drv']][il['Ion_drv']-1]
      
  # filter again based on new epsilon values
  llist=llist[llist['Epsilon']>minepsilon]
  print "done"
  # at this point, we have data
  return llist
Пример #51
0
 def unique_stations(self, channel_filter=None):
     return unique((channel.station
                    for channel in self.unique_channels(channel_filter)), lambda s: s.station_id)
Пример #52
0
 def variables(self):
     return tuple(unique(chain(self.inputs, self.outputs)))
Пример #53
0
def make_spectrum(bins, index, linefile="$ATOMDB/apec_line.fits",\
                  cocofile="$ATOMDB/apec_coco.fits",\
                  binunits='keV', broadening=False, broadenunits='keV', \
                  elements=False, abund=False, dummyfirst=False,\
                  dolines = True, docont=True, dopseudo=True):

  r"""
  make_spectrum is the most generic "make me a spectrum" routine.
  
  It returns the emissivity in counts cm^3 s^-1 bin^-1.
  
  Parameters
  ----------
  bins : array(float)
       The bin edges for the spectrum to be calculated on, in \
       units of keV or Angstroms. Must be monotonically\
       increasing. Spectrum will return len(bins)-1 values.
  index : int
       The index to plot the spectrum from. note that the AtomDB files\
       the emission starts in hdu number 2. So for the first block, you\
       set index=2
  linefile : str
       The file containing all the line emission. Defaults to \
       "$ATOMDB/apec_line.fits"
  cocofile : str
       The file containing all the continuum emission. Defaults to \
       "$ATOMDB/apec_coco.fits"
  binunits : {'keV','A'}
       The energy units for bins. "keV" or "A". Default keV.
  broadening : float
       Line broadening to be applied
  broadenunits : {'keV','A'}
       Units of line broadening "keV" or "A". Default keV.
  elements : iterable of int
       Elements to include, listed by atomic number. if not set, include all.
  abund : iterable of float, length same as elements.
       If set, and array of length (elements) with the abundances of each\
       element relative to the Andres and Grevesse values. Otherwise, assumed to\
       be 1.0 for all elements
  dummyfirst : bool
       If true, add a "0" to the beginning of the return array so it is of the 
       same length as bins (can be useful for plotting results)
  dolines : bool
       Include lines in the spectrum
  docont : bool
       Include the continuum in the spectrum
  dopseudo : bool
       Include the pseudocontinuum in the spectrum.
  
  Returns
  -------
  array of floats
      Emissivity in counts cm^3 s^-1 bin^-1.
  
  """  
#  History
#  -------    
#  Version 0.1 - initial release
#    Adam Foster July 17th 2015
#  
#  Version 0.2
#    Added dummyfirst keyword
#    Adam Foster July 21st 2015
# 


  # set up the bins
  if (sum((bins[1:]-bins[:-1])<0) > 0):
    print "*** ERROR: bins must be monotonically increasing. Exiting ***"
    return -1
  
  if binunits.lower()=='kev':
    ebins = bins*1.0
  elif binunits.lower() in ['a', 'angstrom', 'angstroms']:
    ebins = const.HC_IN_KEV_A/bins[::-1]
  else:
    print "*** ERROR: unknown binning unit %s, Must be keV or A. Exiting ***"%\
          (binunits)





  if util.keyword_check(linefile):
    # ok, we should do something with this
    # if it is a string, look for the file name
    if isinstance(linefile, basestring):
      lfile = os.path.expandvars(linefile)
      if not os.path.isfile(lfile):
        print "*** ERROR: no such file %s. Exiting ***" %(lfile)
        return -1
      ldat = pyfits.open(lfile)
    elif isinstance(linefile, pyfits.hdu.hdulist.HDUList):
      # no need to do anything, file is already open
      ldat = linefile
    else:
      print "Unknown data type for linefile. Please pass a string or an HDUList"
      return -1
   
  if util.keyword_check(cocofile):
    if isinstance(cocofile, basestring):
      cfile = os.path.expandvars(cocofile)
      if not os.path.isfile(cfile):
        print "*** ERROR: no such file %s. Exiting ***" %(cfile)
        return -1
      cdat = pyfits.open(cfile)
    elif isinstance(cocofile, pyfits.hdu.hdulist.HDUList):
      # no need to do anything, file is already open
      cdat = cocofile
    else:
      print "Unknown data type for cocofile. Please pass a string or an HDUList"
      return
 
 
 




#  lfile = os.path.expandvars(linefile)
#  cfile = os.path.expandvars(cocofile)
#  if not os.path.isfile(lfile):
#    print "*** ERROR: no such file %s. Exiting ***" %(lfile)
#    return -1
#  if not os.path.isfile(cfile):
#    print "*** ERROR: no such file %s. Exiting ***" %(cfile)
#    return -1
  
  # open the files
#  ldat = pyfits.open(lfile)
#  cdat = pyfits.open(cfile)
      
  # get the index
  if ((index < 2) | (index > len(ldat))):
    print "*** ERRROR: Index must be in range %i to %i"%(2, len(ldat)-1)
    return -1
    
  lldat = ldat[index].data
  ccdat = cdat[index].data
  
  
  if not util.keyword_check(elements):
    Zl = util.unique(lldat['element'])
    Zc = util.unique(ccdat['Z'])
    Zlist = util.unique(numpy.append(Zl,Zc))
  
  else:
    Zlist = elements
  
  if not util.keyword_check(abund):
    abund= numpy.ones(len(Zlist))

  lspectrum = numpy.zeros(len(bins)-1, dtype=float)
  cspectrum = numpy.zeros(len(bins)-1, dtype=float)

  if dolines:  
    for iZ, Z in enumerate(Zlist):
      # ADD  LINES
      lspectrum += add_lines(Z, abund[iZ], lldat, ebins, broadening=broadening, broadenunits=broadenunits)
  
  if docont | dopseudo:
    for iZ, Z in enumerate(Zlist):
    # ADD  CONTINUUM
      cspectrum += make_ion_index_continuum(ebins, Z, cocofile=ccdat,\
                                           binunits=binunits, no_coco=-docont,\
                                           no_pseudo=-dopseudo)*abund[iZ]
  
  # broaden the continuum if required:
  if broadening:
    cspectrum = broaden_continuum(ebins, cspectrum, binunits = binunits, \
                      broadening=broadening,\
                      broadenunits=broadenunits)
  if dummyfirst:
    return numpy.append([0],   cspectrum+lspectrum)
  else:
    return cspectrum+lspectrum
Пример #54
0
 def unique_channels(self, channel_filter=None):
     return unique((channel
                    for lineup_map in self
                    for channel in lineup_map.channels
                    if channel_filter is None or channel.channel in channel_filter), lambda c: c.get_unique_id())
Пример #55
0
 def get_program_ids(self):
     return unique(broadcast.program_id for schedule in self for broadcast in schedule.broadcasts)
Пример #56
0
def process_markdown(input_markdown, output_name, latex_img_dir = "./", input_path = "./", thumb_size=64):
	"""
	Produces the html file, toc file, meta file and a list of (local_file,
	target_name) pairs where local_file is a file on the local system and
	target_name is the name of the file when placed in [output_name]/*.
	"""
	md = markdown.Markdown( extensions=[ 'meta'
	                                   , 'codehilite'
	                                   , 'tables'
	                                   , 'def_list'
	                                   , 'footnotes'
	                                   , 'resourceextractor'
	                                   , 'abstractextractor'
	                                   , 'tocextractor'
	                                   , 'mathjax'
	                                   , 'latex'
	                                   ]
	                      , extension_configs = {
	                          "resourceextractor":
	                            ( ("resource_dir",output_name)
	                            , ("relative_path",input_path)
	                            ),
	                          "latex":
	                            ( ("latex_img_dir",latex_img_dir)
	                            , ("input_path", input_path)
	                            ),
	                        }
	                      )
	
	# Basic HTML conversion
	html = md.convert(input_markdown)
	
	# Generate table of contents
	toc  = md.toc
	
	# Choose document title (default to the output name)
	title = output_name
	# Use the first heading if possible
	if len(toc) > 0:
		title = toc[0][1]
	# Better yet, get the explicitly given metadata
	title = md.Meta.get("title", [title])[0]
	
	# Choose document subtitle (only available from metadata)
	subtitle = md.Meta.get("subtitle", [None])[0]
	
	# Get the image from the metadata
	img = md.Meta.get("img", [None])[0]
	img_alt = md.Meta.get("img_alt", [title])[0]
	
	# The abstract should be taken to be the first paragraph.
	abstract = md.abstract if md.abstract is not None else ""
	
	# Get the list of tags
	tags = md.Meta.get("tags", [])
	
	# Get the show option
	show = md.Meta.get("show", ["True"])[0] == "True"
	
	files = md.resources
	
	# Add the article image to the list of files and create a thumbnail if
	# possible.
	if img is not None and img.startswith("file://"):
		img = os.path.join(input_path, img[len("file://"):])
		img_output_name = "%s/%s"%(output_name,
		                           unique(os.path.basename(img),
		                                  [f.split("/")[-1] for (_,f) in files]))
		
		img_thumbnail = "%s.thumb.png"%img
		
		p = Popen( ["convert"
		           , img
		           , "-thumbnail", "%dx%d"%(thumb_size,thumb_size)
		           , img_thumbnail]
		         , stdin  = None
		         , stdout = sys.stderr
		         , stderr = sys.stderr
		         )
		if p.wait() != 0:
			raise Exception("Creating img thumbnail failed.")
		
		files.append((img_thumbnail, img_output_name))
		img = img_output_name
	
	# Generate meta-data
	meta_data = {
		"url" : output_name,
		"title" : title,
		"subtitle" : subtitle,
		"img" : img,
		"img_alt" : img_alt,
		"abstract" : abstract,
		"tags" : tags,
		"show" : show,
	}
	
	return html, toc, meta_data, files
Пример #57
0
 def libs(self):
     return list(unique(sum([c.libs for c in self.computations], [])))
Пример #58
0
                    fl = repo.file(f)
                    lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                except:
                    lr = None
                err(lr, _("in manifest but not in changeset"), f)

    ui.status(_("checking files\n"))

    storefiles = {}
    for f, f2, size in repo.store.datafiles():
        if not f:
            err(None, _("cannot decode filename '%s'") % f2)
        elif size > 0:
            storefiles[f] = True

    files = util.sort(util.unique(filenodes.keys() + filelinkrevs.keys()))
    for f in files:
        lr = filelinkrevs[f][0]
        try:
            fl = repo.file(f)
        except error.RevlogError, e:
            err(lr, _("broken revlog! (%s)") % e, f)
            continue

        for ff in fl.files():
            try:
                del storefiles[ff]
            except KeyError:
                err(lr, _("missing revlog!"), ff)

        checklog(fl, f)
Пример #59
0
 def allgoal(s):
     g = goaleval(reify(goals[0], s))
     return unique(interleave(
                     goaleval(reify((lall,) + tuple(goals[1:]), ss))(ss)
                     for ss in g(s)),
                   key=dicthash)