Python nested_dict примеры, nested_dict.nested_dict Python примеры использования

Пример #1

0

Показать файл

Файл: atcg_stats.py Проект: Tsinghua-gongjing/test

def read_tmp_out(tmp_out=None,file_str=None,sample=None):
	gj.printFuncRun('read_tmp_out')
	gj.printFuncArgs()
	fa_dict = read_fa()
	tx_base_pos_dict = nested_dict(2, list) # {tx:{'A':[pos1,pos2],'T':[]}}
	base_enrich_dict = nested_dict(1, int)
	with open(tmp_out, 'r') as TMP_OUT:
		for line in TMP_OUT:
			line = line.strip()
			if not line or line.startswith('#'): continue
			arr = line.split('\t')
			transcript_id = arr[0]
			transcript_len = int(arr[1])
			if transcript_len != len(fa_dict[transcript_id]):
				print "transcirpt length not conistent with reference: %s, tmp_out len: %s, reference len: %s"%(transcript_id, transcript_len, len(fa_dict[transcript_id]))
				sys.exit()
			for n,base_enrichment_score in enumerate(arr[4:]):
				score = base_enrichment_score.split(',')[0]
				#if score != "NULL" and float(score) != 0 and float(score) >= 0.3:
				if score != "NULL" and float(score) != 0:
					base = fa_dict[transcript_id][n]
					tx_base_pos_dict[transcript_id][base].append(n)
					base_enrich_dict[base.upper()] += 1
	print base_enrich_dict

	#val_ls = [base_enrich_dict[i] for i in ['A','T','C','G']]
	#gj.plot_ls_pie(labels=['A','T','C','G'],val=val_ls,dic="",title_str="",file_str=file_str)
	TXT = open(file_str, 'w')
	for i,j in base_enrich_dict.items():
		print >>TXT,i+'\t'+str(j)
	TXT.close()

	gj.printFuncRun('read_tmp_out')

Пример #2

0

Показать файл

Файл: paris_lncRNA_degree_distribution.py Проект: Tsinghua-gongjing/test

def degree_hist(dg_txt=None):
	if dg_txt is None:
		dg_txt = '/Share/home/zhangqf7/gongjing/zebrafish/data/paris/shi-zp-5-rep-combine/27-DG.txt'
	dg_dict = read_dg_txt(dg_txt)

	trans_dict = loadTransGtfBed2()

	RRI_dict = nested_dict(2, list)
	for i,j in dg_dict.items():
		if j['lchr'] != j['rchr']:
			RRI_dict[j['RRI_type']][j['lchr']].append(j['rchr'])
			RRI_dict[j['RRI_type']][j['rchr']].append(j['lchr'])
		else:
			RRI_dict[j['RRI_type']][j['lchr']].append(j['rchr'])

	for i in ['inter', 'intra']:
		savefn = dg_txt.replace('.txt', '.%s.degree.txt'%(i))
		degree_ls_ls = [[], [], []]

		with open(savefn, 'w') as SAVEFN:
			for k,v in RRI_dict[i].items():
				print >>SAVEFN, '\t'.join(map(str, [ k, trans_dict[k]['type'], len(v), len(set(v)), ','.join(list(set(v))) ]))
				degree_ls_ls[0].append(len(set(v)))

				if trans_dict[k]['type'] == 'mRNA':
					degree_ls_ls[1].append(len(set(v)))
				if trans_dict[k]['type'] == 'lncRNA':
					degree_ls_ls[2].append(len(set(v)))

		degree_mean_ls = [np.mean(i) for i in degree_ls_ls]
		gj.cumulate_dist_plot(ls_ls=degree_ls_ls,ls_ls_label=['%s, mean=%.2f'%(i,j) for i,j in zip(['all', 'mRNA', 'lncRNA'], degree_mean_ls)], bins=40,title='degree distribution',ax=None,savefn=savefn.replace('.txt', '.pdf'),xlabel='log2(# of interacting partners)',ylabel=None,add_vline=None,add_hline=None,log2transform=1,xlim=None,ylim=None)

Пример #3

0

Показать файл

Файл: paris_lncRNA_degree_distribution.py Проект: Tsinghua-gongjing/test

def read_dg_txt(dg_txt=None, support=3, filter_rRNA=True, only_mRNA_lncRNA=True):
	if dg_txt is None:
		dg_txt = '/Share/home/zhangqf7/gongjing/zebrafish/data/paris/shi-zp-4/7-DG.txt'
	dg_dict = nested_dict()
	with open(dg_txt, 'r') as DG:
		for line in DG:
			line = line.strip()
			if line.startswith('#'):
				header_ls = line.replace('#', '').split('\t')
				continue
			if not line:
				continue
			arr = line.split('\t')
			if int(arr[9]) < support:
				continue
			if (filter_rRNA and arr[13] == 'rRNA') or (filter_rRNA and arr[14] == 'rRNA'):
				continue
			if only_mRNA_lncRNA and arr[13] not in ['mRNA', 'lncRNA']:
				continue
			if only_mRNA_lncRNA and arr[14] not in ['mRNA', 'lncRNA']:
				continue
			for i,j in zip(header_ls, arr):
				dg_dict[arr[0]][i] = j
			if arr[1] == arr[5]:
				dg_dict[arr[0]]['RRI_type'] = 'intra'
			else:
				dg_dict[arr[0]]['RRI_type'] = 'inter'
	print "DG num: %s, file: %s"%(len(dg_dict), dg_txt)
	return dg_dict.to_dict()

Пример #4

0

Показать файл

Файл: dbdata.py Проект: countvoncount/tools

def get_merchant_db_data(store, numdays):
    start, end = get_sliding_date_range(numdays)
    merchant_report = MerchantDbReport(store, ["marketingSource","keyword"],
                                           ["revenue", "order_count"], ["add", "add"], None, time_aggregation=None)
    merchant_data = merchant_report.get_data(mode=None, length=None, start_date=start, end_date=end)

    entries = merchant_data["entries"]

    #vurve_sales = filter(lambda x: x["dimensions"]["marketingSource"]["value"] == "Vurve", entries)
    #nonvurve_sales = filter(lambda x: x["dimensions"]["marketingSource"]["value"] != "Vurve", entries)

    keyword_rev_data = nested_dict()

    for e in entries:
        for kw in e['dimensions']['keyword']['value']:
            keyword_rev_data[kw.lower()]['marketingSource'] = e["dimensions"]["marketingSource"]["value"]
            keyword_rev_data[kw.lower()]['revenue'] = e["metrics"]["revenue"]["value"]
            keyword_rev_data[kw.lower()]['order_count'] = e["metrics"]["order_count"]["value"]

    """
    for entry in vurve_sales:
        for kw in entry["dimensions"]["keyword"]["value"]:
            keyword_rev_data[kw.lower()]["vurve"] = entry["metrics"]["revenue"]["value"]

    for entry in nonvurve_sales:
        for kw in entry["dimensions"]["keyword"]["value"]:
            keyword_rev_data[kw.lower()]["nonvurve"] = entry["metrics"]["revenue"]["value"]
    """

    return keyword_rev_data

Пример #5

0

Показать файл

Файл: network_downsampling_stats.py Проект: Tsinghua-gongjing/test

def readDGFrameFile(filename, interRRI_norRNA=1, support_read=3):
	fn_stat_dict = nested_dict()
	inter, intra = 0, 0
	with open(filename, 'r') as TXT:
		for line in TXT:
			line = line.strip()
			if not line or line.startswith('#'):
				continue
			arr = line.split('\t')
			if arr[1] == arr[5]:
				intra += 1
			else:
				inter += 1
	fn_stat_dict['inter'] = inter
	fn_stat_dict['intra'] = intra
	fn_stat_dict['all'] = intra + inter

	df = pd.read_csv(filename, header=0, sep='\t')
	df['type'] = ['intra' if i == j else 'inter' for i,j in zip(df['lchr'], df['rchr'])]
	df_inter_RRI = df[df['type']=='inter']
	nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
	fn_stat_dict['uniq RRI']  = len(nx_inter_RRI.edges())
	if interRRI_norRNA:
		df_inter_RRI = df_inter_RRI[(df_inter_RRI['ltype'].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI['rtype'].isin(['mRNA', 'lncRNA']))]
	df_inter_RRI = df_inter_RRI[df_inter_RRI['support']>=support_read]
	nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
	nx_inter_RRI_info_dict, G_largest = RRI_network_property2(nx_inter_RRI)
	for i,j in nx_inter_RRI_info_dict.items():
		fn_stat_dict[i] = j
	# fn_stat_df['sampling'] = ''
	fn_stat_df = pd.DataFrame(fn_stat_dict, index=[0])
	return fn_stat_df

Пример #6

0

Показать файл

Файл: multi_peaks_UpSet_extract.py Проект: Tsinghua-gongjing/test

def main():
	# sample_ls = ['egg_cell1', 'cell1_cell4', 'cell4_cell64', 'cell64_sphere', 'sphere_shield']
	sample_ls = ['egg_cell1_egg_cell1', 'cell1_cell4_cell1_cell4', 'cell4_cell64_cell4_cell64', 'cell64_sphere_cell64_sphere', 'sphere_shield_sphere_shield']
	all_sample = generate_sample(sample_ls)
	all_sample_d = nested_dict(2, list)
	# save_dir = '/Share/home/zhangqf7/gongjing/zebrafish/result/dynamic_merge_region/005_005_new/abs/mergepeaks_d10'
	save_dir = '/Share/home/zhangqf7/gongjing/zebrafish/result/dynamic_merge_region/005_005_new/abs/new_mergepeaks_d10'
	for sample in all_sample:
		bed = '%s/%s'%(save_dir, sample)
		d = read_bed(bed)
		for i,j in d.items():
			for m,n in j.items():
				for tx_start_end in n:
					all_sample_d[i][m].append(tx_start_end)
	print len(all_sample_d['egg_cell1/window-anno.bed']), len(all_sample_d['egg_cell1/window-anno.bed'][1]), len(all_sample_d['egg_cell1/window-anno.bed'][4])

	for i,j in all_sample_d.items():
		savefn = '%s/separate/%s.bed'%(save_dir, i.split('/')[0])
		with open(savefn, 'w') as SAVEFN:
			for m,n in j.items():
				for tx_start_end in n:
					print >>SAVEFN, tx_start_end.replace('|', '\t')

	for way in range(1, len(sample_ls)+1):
		way_ls = []
		for i,j in all_sample_d.items():
				for m,n in j.items():
					if m == way:
						for tx_start_end in n:
							way_ls.append(tx_start_end)
		savefn = '%s/separate/way%s.bed'%(save_dir, way)
		with open(savefn, 'w') as SAVEFN:
			for tx_start_end in set(way_ls):
				print >>SAVEFN, tx_start_end.replace('|', '\t')

Пример #7

0

Показать файл

Файл: hierarchy2tree.py Проект: MG-RAST/myM5NR

def buildTree(fname, skipdash=False):
    aTree = nested_dict()
    hdl  = open(fname, 'r')
    for i, line in enumerate(hdl):
        cols = line.strip().split('\t')
        cid = cols.pop(0)
        if skipdash:
            cols = filter(lambda x: x != '-', cols)
        else:
            cols = map(lambda x: 'null' if x == '-' else x, cols)
        if len(cols) == 0:
            continue
        branch = {'id': cid, 'depth': len(cols)}
        for c in reversed(cols):
            branch = {c: branch}
        aTree.update(nested_dict(branch))
    hdl.close()
    return aTree.to_dict(), i

Пример #8

0

Показать файл

Файл: add_dmp.py Проект: thomasvangurp/epiGBS

def make_gff3(base_name, args, mapping_dict):
    """make gff3 output of blast hits for jbrowse display"""
    DMP_dict = nested_dict()
    for context in ['cg','chg','chh']:
        file = os.path.join(args.outputdir,'%s.%s.pickle' % (base_name, context))
        with open(file) as input_handle:
            dict_entry = nested_dict(pickle.load(input_handle))
            DMP_dict.update(dict_entry)
    gff3_dir = os.path.join(args.outputdir,'gff3')
    if not os.path.exists(gff3_dir):
        os.mkdir(gff3_dir)
    gff3_output = open(os.path.join(gff3_dir, base_name.replace('(','').replace(')','').replace('.','') + '.gff3'), 'w')
    gff3_output.write('##gff-version 3.2.1\n')
    for gene, subdict in DMP_dict.items():
        contig = None
        for position,subdict in sorted(subdict.items()):
            if subdict['original_contig'] != contig:
                contig = subdict['original_contig']
                concat_contig, concat_start_pos, concat_end_pos = mapping_dict[contig]
                gff3_output.write('##sequence-region %s %s %s\n' % (concat_contig, concat_start_pos, concat_end_pos))
            out_line = []
            out_line.append('%(concatenated_contig)s' % subdict)            #1 seqid
            out_line.append('RnBeads_%(context)s' % subdict)                #2 source
            out_line.append('5_methylcytosine')                             #3 type 5_methylcytosine  see http://www.sequenceontology.org/browser/current_svn/term/SO:0001918
            out_line.append(str(int('%(concat_contig_pos)s' % subdict)))  #4 start
            out_line.append(str(int('%(concat_contig_pos)s' % subdict)))  #5 end
            out_line.append('%(diffmeth.p.val)s' % subdict)                 #6 score
            out_line.append('%(Strand)s' % subdict)                         #6 strand
            out_line.append('0')                                            #6 Phase . or 0
            attributes = 'ID=%(context)s_%(combinedRank)s;' % subdict
            subdict['diffmeth.p.val'] = '%.2e' % float(subdict['diffmeth.p.val'])
            subdict['diffmeth.p.adj.fdr'] = '%.2e' % float(subdict['diffmeth.p.adj.fdr'])
            attributes += 'Name=%(mean.diff)s p-value:%(diffmeth.p.val)s str:(%(Strand)s);' % (subdict)       #7 Attributes, start with unique ID
            attributes += 'Description=FDR-adjusted p-value:%(diffmeth.p.adj.fdr)s;' % subdict       #7 Attributes, start with unique ID
            attributes += 'Alias=%(concatenated_contig)s_%(concat_contig_pos)s;' % subdict       #7 Attributes, start with unique ID
            attributes += 'Ontology_term=SO:0001918;' % subdict       #7 Attributes, start with unique ID
            for k,v in subdict.items():
                k = k[0].upper() + k[1:]
                attributes += '%s=%s;' % (k,v)
            attributes = attributes[:-1]
            out_line.append(attributes)                                            #6 Phase . or 0
            gff3_output.write('\t'.join(out_line) + '\n')
    os.system('bgzip -f %s' % (os.path.join(gff3_dir, base_name.replace('(','').replace(')','').replace('.','') + '.gff3')) )
    os.system('tabix -p gff %s.gz' % (os.path.join(gff3_dir, base_name.replace('(','').replace(')','').replace('.','') + '.gff3')) )

Пример #9

0

Показать файл

Файл: UniversalConfigParser.py Проект: HZZ4l/Datacards13TeV

def update_leaf(initial_dict, update_dict):
    """Smart update of dictionaries, by leaf.

    TODO Not implemented yet. Returning simple dict update.
    IDEA: Make tuple dict where tuple is made of keys and value is the last node.


    It will update recursivlely, only when the leaf is found.
    For example:
    a = {'a':1, 'b':{'c':2, 'd':'3'}}
    b = {'a':2, 'b':{'c':3, 'e':'3'}}
    c = update_leafs(a,b)
    c = {'a':2, 'b':{'c':3,'e':'3','d':'3'}}
    """
    import nested_dict as nd
    nd_initial_dict = nd.nested_dict(initial_dict)
    nd_update_dict = nd.nested_dict(update_dict)
    nd_initial_dict.update(nd_update_dict)
    return nd_initial_dict.to_dict()

Пример #10

0

Показать файл

Файл: tranWig_to_genomeWig.py Проект: Tsinghua-gongjing/test

def bed_coordinate_conversion(genome_coordinate_bed=None, trans_bed=None, genome_bed=None, write_wig=1):
	if genome_bed is None:
		genome_bed = trans_bed.replace('.bed', '.genome.bed')
	
	print "ref genome bed: %s"%(genome_coordinate_bed)
	print "input trans bed: %s"%(trans_bed)
	print "output genome bed: %s"%(genome_bed)

	Parser = ParseTransClass(genomeCoorBedFile = genome_coordinate_bed)
	GENOME_BED = open(genome_bed, 'w')
	convert_dict = nested_dict(2, list) # chr:['start'],['end'],['score']
	with open(trans_bed, 'r') as TRANS_BED:
		for line in TRANS_BED:
			line = line.strip()
			if not line or line.startswith(('#', 'track')):
				continue
			arr = line.split('\t')
			trans_id = arr[0]
			start = int(arr[1])+1 # need 1-based
			end = int(arr[2])
			score = float(arr[4])
			# print trans_id,start
			convert_ls = Parser.transCoor2geneCoor(trans_id, start, end) # output also 1-based
			"""
			>>> Parser.transCoor2geneCoor("ENSDART00000168451", 1, 1)
			[['1', 18716, 18716, 'ENSDARG00000102097', 1, 1]]
			"""
			"""
			for i in convert_ls:
				arr[0] = i[0]
				arr[1] = i[1]
				arr[2] = i[2]
				arr[3] = i[3]
				print >>GENOME_BED, '\t'.join(map(str, arr))
			"""
			arr[0] = convert_ls[0][0]
			arr[1] = convert_ls[0][1]-1
			arr[2] = convert_ls[0][2]
			arr[3] = ','.join([i[3] for i in convert_ls])
			print >>GENOME_BED, '\t'.join(map(str, arr))
			convert_dict[arr[0]]['start'].append(arr[1])
			convert_dict[arr[0]]['end'].append(arr[2])
			convert_dict[arr[0]]['score'].append(arr[4])
	GENOME_BED.close()

	if write_wig:
		wig = trans_bed.replace('.bed', '.genome.wig')
		with open(wig, 'w') as WIG:
			print >>WIG, 'track type=wiggle_0'
			for i,j in convert_dict.items():
				print >>WIG, 'variableStep chrom=%s span=1'%(i)
				start_ls, score_ls = zip(*sorted(zip(j['start'], j['score'])))
				for start,score in zip(start_ls, score_ls):
					print >>WIG, '\t'.join(map(str, [start+1, score]))

Пример #11

0

Показать файл

Файл: network_downsampling_stats.py Проект: Tsinghua-gongjing/test

def read_dir(dir='/Share/home/zhangqf7/gongjing/zebrafish/data/paris/shi-zp-5-rep-combine/downsampling_N', to_dgframe=0, get_inter_intra=1, read_nx=1, interRRI_norRNA=1, support_read=3):
	fn_ls = os.listdir(dir)
	# print fn_ls

	fn_stat_dict = nested_dict()
	downsampling_N_draw = dir + '.subnetwork.draw.pdf'
	fig,ax=plt.subplots(10,1)
	for n,fn in enumerate(fn_ls):
		print "process: %s"%(fn)
		dfFile = dir + '/' + fn + '/' + '27-DG'
		frameFile = dfFile + '.txt'
		if to_dgframe:
			paris_dg2frame.DG2Frame(dfFile=dfFile, frameFile=frameFile)
		if get_inter_intra:
			inter, intra = 0, 0
			with open(frameFile, 'r') as TXT:
				for line in TXT:
					line = line.strip()
					if not line or line.startswith('#'):
						continue
					arr = line.split('\t')
					if arr[1] == arr[5]:
						intra += 1
					else:
						inter += 1
			fn_stat_dict[fn]['inter'] = inter
			fn_stat_dict[fn]['intra'] = intra
			fn_stat_dict[fn]['all'] = intra + inter
		if read_nx:
			df = pd.read_csv(frameFile, header=0, sep='\t')
			df['type'] = ['intra' if i == j else 'inter' for i,j in zip(df['lchr'], df['rchr'])]
			df_inter_RRI = df[df['type']=='inter']
			nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
			fn_stat_dict[fn]['uniq RRI']  = len(nx_inter_RRI.edges())
			if interRRI_norRNA:
				df_inter_RRI = df_inter_RRI[(df_inter_RRI['ltype'].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI['rtype'].isin(['mRNA', 'lncRNA']))]
			df_inter_RRI = df_inter_RRI[df_inter_RRI['support']>=support_read]
			nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
			nx_inter_RRI_info_dict, G_largest = RRI_network_property2(nx_inter_RRI)
			for i,j in nx_inter_RRI_info_dict.items():
				fn_stat_dict[fn][i] = j
			# fn_stat_dict[fn]['uniq RRI']  = len(nx_inter_RRI.edges())
			if n < 10:
				draw_graph(G_largest, ax=ax[n])
	plt.savefig(downsampling_N_draw)
	savefn = dir + '.stat.txt'
	fn_stat_df = pd.DataFrame.from_dict(fn_stat_dict)
	fn_stat_df = fn_stat_df.T
	fn_stat_df['sampling'] = fn_stat_df.index
	print fn_stat_df.head()

	fn_stat_df.to_csv(savefn, header=True, index=False, sep='\t')

	return fn_stat_df

Пример #12

0

Показать файл

Файл: word_comparison.py Проект: benman1/spell_check

def match_model(edit):
    '''Given an edit sequence, look up the likelihood'''
    modelmatch = nested_dict(pickle.load(open("resources/similarity_model.p", "rb")))
    keys = {'delete': 0.0, 'insert': 0.0, 'replace': 0.0}
    d = dict(Counter([e[0] for e in edit]))
    for k,v in d.iteritems():
        keys[k] = v
    p = modelmatch[keys['delete']][keys['insert']][keys['replace']]
    if isinstance(p, float):
        return p
    else:
        return 0

Пример #13

0

Показать файл

Файл: dbdata.py Проект: countvoncount/tools

def get_adwords_data(store, numdays):
    start, end = get_sliding_date_range(numdays)

    adwords_report = AdwordsReport(store, ["kwSite"],
                                           ["cost", "qualityScore"], ["add", "average"], None, time_aggregation=None)
    report = adwords_report.get_data(mode=None, length=None, start_date=start, end_date=end)
    spend_data = nested_dict()
    for entry in report["entries"]:
        kw = entry["dimensions"]["kwSite"]["value"].lower()
        spend_data[kw]['cost'] = entry["metrics"]["cost"]["value"]
        spend_data[kw]['quality'] = entry["metrics"]["qualityScore"]["value"]
    return spend_data

Пример #14

0

Показать файл

Файл: bed_split_sample_bigwig.py Проект: thomasvangurp/epiGBS

def get_groups(args):
    """get groups defined in sample file"""
    group_dict = nested_dict()
    with open(args.samples) as handle:
        header = handle.readline().rstrip('\n').split(',')
        for line in handle:
            split_line = line.rstrip('\n').split(',')
            sample = split_line[0]
            for name,item in zip(header[2:],split_line[2:]):
                try:
                    group_dict[name][item].append(sample)
                except AttributeError:
                    group_dict[name][item] = [sample]
    return group_dict

Пример #15

0

Показать файл

Файл: icshape_all_sample_gini_mean_reactivity.py Проект: Tsinghua-gongjing/test

def all_gini(RPKM_combine=None, mode='gini', null_pct=1):
    if RPKM_combine is None:
        RPKM_combine = '/Share/home/zhangqf7/gongjing/zebrafish/result/RPKMCorrelationPairwiseNew/2018_03_12_gini/RPKM_combine.merge.txt'
    df = pd.read_csv(RPKM_combine, header=None, index_col=0, sep='\t')
    print df.head()

    trans_dict = loadTransGtfBed2('/Share/home/zhangqf7/gongjing/zebrafish/data/reference/transcriptome/danRer10.refSeq.transcriptome.trans.bed2')

    sample_ls = ['egg', '1cell', '4cell', '64cell', 'sphere', 'shield']
    sample_path = ['/Share/home/zhangqf7/gongjing/zebrafish/data/icSHAPE_final_out_new_win/%s.icshape.w200.s30.T2.t200.out'%(i) for i in sample_ls] # norm by window

    sample_ic_dict =  nested_dict()
    for i,j in zip(sample_path, sample_ls):
        print "read icshape: %s"%(i)
        sample_ic_dict[j] = readIc(i)

    t_cutoff = sample_path[0].split('.')[-2]
    savefn = RPKM_combine.replace('.txt', '.%s.%s.null%s.txt'%(t_cutoff, mode, int(null_pct*100)))
    
    SAVEFN = open(savefn, 'w')
    print >>SAVEFN, '\t'.join(['%s(transcript)\t%s(UTR5)\t%s(CDS)\t%s(UTR3)'%(i,i,i,i) for i in sample_ls])
    for tx in df.index:
        gini_ls = [tx]
        for i in sample_ls:
            if sample_ic_dict[i].has_key(tx) and trans_dict.has_key(tx):
                utr_5_start, utr_5_end, cds_start, cds_end, utr_3_start, utr_3_end = [int(trans_dict[tx][g]) for g in ['utr_5_start', 'utr_5_end', 'cds_start', 'cds_end', 'utr_3_start', 'utr_3_end']] 
                if utr_5_start == 0:
                    utr_5_gini = 'NULL'
                else:
                    utr_5_gini = gini(sample_ic_dict[i][tx][(utr_5_start-1):(utr_5_end)], mode=mode, null_pct=null_pct)
                    if utr_5_gini < 0:
                        utr_5_gini = 'NULL'
                if utr_3_start == 0:
                    utr_3_gini = 'NULL'
                else:
                    utr_3_gini = gini(sample_ic_dict[i][tx][(utr_3_start-1):(utr_3_end)], mode=mode, null_pct=null_pct)
                    if utr_3_gini < 0:
                        utr_3_gini = 'NULL'
                cds_gini = gini(sample_ic_dict[i][tx][(cds_start-1):(cds_end)], mode=mode, null_pct=null_pct)
                if cds_gini < 0:
                    cds_gini = 'NULL'
                transcript_gini = gini(sample_ic_dict[i][tx][0:], mode=mode, null_pct=null_pct)
                if transcript_gini < 0:
                    transcript_gini = 'NULL'
                sample_gini_ls = [transcript_gini, utr_5_gini, cds_gini, utr_3_gini]
            else:
                sample_gini_ls = ['NULL','NULL', 'NULL', 'NULL']
            gini_ls += sample_gini_ls
        print >> SAVEFN, '\t'.join(map(str, gini_ls))
    SAVEFN.close()

Пример #16

0

Показать файл

Файл: icshape_all_sample_gini_mean_reactivity.py Проект: Tsinghua-gongjing/test

def loadTransGtfBed2(ref_bed='/Share/home/zhangqf7/gongjing/zebrafish/data/reference/gtf/ref_GRCz10_top_level.trans.bed.2'):
    H = open(ref_bed)
    line = H.readline()
    trans_dict = nested_dict()
    header_ls = ['tx', 'gene', 'type', 'length', 'utr_5_start', 'utr_5_end', 'cds_start', 'cds_end', 'utr_3_start', 'utr_3_end']
    while line:
        if line.startswith('#'): line = H.readline(); continue
        arr = line.strip().split()
        for i,j in zip(header_ls, arr):
            trans_dict[arr[0]][i] = j
        line = H.readline()
    H.close()
    print "read: %s, n=%s"%(ref_bed, len(trans_dict))
    return trans_dict.to_dict()

Пример #17

0

Показать файл

Файл: paris_merge_network.py Проект: Tsinghua-gongjing/test

def read_maternal(maternal_list_file=None):
	if maternal_list_file is None:
		matenal = '/Share/home/zhangqf7/gongjing/zebrafish/data/maternal_gene/maternal-decay.txt'
	else:
		matenal = maternal_list_file
	matenal_gene_dict = nested_dict(1, int)
	with open(matenal, 'r') as IN:
		for line in IN:
			line = line.strip()
			if not line or line.startswith('value'):
				continue
			arr = line.split('\t')
			matenal_gene_dict[arr[0]] += 1
	return matenal_gene_dict.to_dict()

Пример #18

0

Показать файл

Файл: RPKM_combine_correlation.py Проект: Tsinghua-gongjing/test

def read_rpkm(rpkm_txt=None):
	if rpkm_txt is None:
		rpkm_txt = '/Share/home/zhangqf7/gongjing/zebrafish/data/RPKM/DMSO_1cell_rep1'
	rpkm_dict = nested_dict()
	with open(rpkm_txt, 'r') as TXT:
		for line in TXT:
			line = line.strip()
			if not line: continue
			if line.startswith('#'):
				header = line.replace('#', '').split('\t')
				continue
			arr = line.split('\t')
			for i,j in zip(header, arr):
				rpkm_dict[arr[0]][i] = j 
	return rpkm_dict.to_dict()

Пример #19

0

Показать файл

Файл: paris_dg2frame.py Проект: Tsinghua-gongjing/test

def loadTransGtfBed2(ref_bed='/Share/home/zhangqf7/gongjing/mes/ref/mm10.transCoor.bed.2'):
    H = open(ref_bed)
    line = H.readline()
    trans_dict = nested_dict()
    header_ls = ['tx', 'gene', 'type', 'length', 'utr_5_start', 'utr_5_end', 'cds_start', 'cds_end', 'utr_3_start', 'utr_3_end']
    while line:
        if line.startswith('#'): line = H.readline(); continue
        arr = line.strip().split('\t')
        gene = arr[1].split('=')[0].split()[0]
        for i,j in zip(header_ls, arr):
            trans_dict[arr[0]][i] = j
        line = H.readline()
    H.close()
    print "read: %s, n=%s"%(ref_bed, len(trans_dict))
    return trans_dict.to_dict()

Пример #20

0

Показать файл

Файл: multi_peaks_UpSet_extract.py Проект: Tsinghua-gongjing/test

def read_bed(bed):
	print "load: %s"%(bed)
	d = nested_dict(2, list)
	with open(bed, 'r') as BED:
		for line in BED:
			if not line or line.startswith('#'):
				continue
			arr = line.split('\t')
			tx_start_end = '|'.join(arr[1:4])
			sample_ls = arr[6].split('|')
			for sample in sample_ls:
				d[sample][len(sample_ls)].append(tx_start_end)
	d = d.to_dict()
	for sample in sample_ls:
		print sample, len(d[sample][len(sample_ls)]), len(sample_ls)
	return d

Пример #21

0

Показать файл

Файл: RPKM_combine_correlation.py Проект: Tsinghua-gongjing/test

def file_info(file_dir=None, result_dir=None):
	if file_dir is None:
		file_dir = '/Share/home/zhangqf7/gongjing/zebrafish/data/RT'
	if result_dir is None:
		result_dir = '/Share/home/zhangqf7/gongjing/zebrafish/result/RTCorrelationPairwise'
	files = os.listdir(file_dir)
	NAI_files = [i for i in files if i.startswith('NAI')]
	DMSO_files = [i for i in files if i.startswith('DMSO')]
	paths = [file_dir+'/'+i for i in files]
	file_info_dict = nested_dict()

	file_info_dict['file_dir'] = file_dir
	file_info_dict['files'] = files
	file_info_dict['paths'] = paths
	file_info_dict['result_dir'] = result_dir

	return file_info_dict.to_dict()

Пример #22

0

Показать файл

Файл: add_dmp.py Проект: thomasvangurp/epiGBS

def pickle_gff3_entry(in_file, out_file, args, mapping_dict):
    """pickle gff3 file entry with DMPs for merging"""
    DMP_dict = nested_dict()
    with open(in_file,'r') as in_handle:
        header = in_handle.readline()[:-1].split('\t')
        context = in_file.split('.')[-2]
        for line in in_handle:
            split_line = line[:-1].split('\t')
            content = {'context':context}
            for k,v in zip(header,split_line):
                if ',' in v:
                    v = float(v.replace(',','.'))
                content[k] = v
            try:
                if content['diffmeth.p.val']<= float(args.treshold):
                    DMP_dict[content['concatenated_contig']][int(content['concat_contig_pos'])] = content
            except ValueError:
                continue
    with open(out_file,'wb') as out_handle:
        pickle.dump(DMP_dict.to_dict(),out_handle,2)

Пример #23

0

Показать файл

Файл: analyse_meth_scabiosa.py Проект: thomasvangurp/epiGBS

def wur_buxton_diff(header, split_line):
    """calculates average difference in meth between WUR and buxton"""
    meth_values = nested_dict()
    for k,v in zip(header,split_line):
        try:
            location,sample,type = k.split('_')
            if sample in groups:
                meth_values[groups[sample]][location][sample][type] = v
        except ValueError:
            pass
    for group in meth_values.keys():
        buxton_meth_values = []
        for ind, meth_dict in meth_values[group]['BUXTON'].items():
            try:
                buxton_meth_values.append(int(meth_dict['methylated'])
                                          / float(meth_dict['total']))
            except TypeError:
                buxton_meth_values.append(None)
            except ValueError:
                buxton_meth_values.append(None)
        wur_meth_values = []
        for ind, meth_dict in meth_values[group]['WUR'].items():
            try:
                wur_meth_values.append(int(meth_dict['methylated'])
                                       / float(meth_dict['total']))
            except TypeError:
                wur_meth_values.append(None)
            except ValueError:
                wur_meth_values.append(None)
        diff = [a - b for a,b in zip(buxton_meth_values, wur_meth_values)
                if a and b]
        if diff == []:
            abs_diff = None
            rel_diff = None
        else:
            abs_diff = sum([abs(v) for v in diff]) / float(len(diff))
            rel_diff = sum(diff) / float(len(diff))
        meth_values[group]['abs_diff'] = abs_diff
        meth_values[group]['rel_diff'] = rel_diff
    return meth_values

Пример #24

0

Показать файл

Файл: ghe_teams.py Проект: RitaChenNO1/GitHubAPI

def json2VerticaTable(tableName, jsonData, cur, keep_keys,extend_cols,extend_vals):
    """insert choosed {key,value} of data into Vertica for each Batch"""
    i = 0
    for r in jsonData:
        # since >=2 level, the 2nd level's data will be a problem
        columns = []
        values = []
        nd = nested_dict(r)
        for keys_as_tuple, value in nd.items_flat():
            con_keys = "_".join(str(x) for x in keys_as_tuple)
            if con_keys in keep_keys:
                columns.append(con_keys)
                values.append(str(value).replace("'", "''"))
        columns.extend(extend_cols)
        values.extend(extend_vals)
        # print("**************this is the columns******************")
        # print columns
        # print("**************this is the values******************")
        # print values
        if i == 0:
            # CREATE TABLE foo (numbs int, names varchar(30))
            createTableSQL = "CREATE TABLE IF NOT EXISTS  " + tableName + " ("
            for k in columns:
                createTableSQL = createTableSQL + k + " varchar(200),"
            # remove the last ,  and add 0
            createTableSQL = createTableSQL[:-1] + ")"
            # print(createTableSQL)
            cur.execute(createTableSQL)
            i = i + 1
        else:
            columnsStr = ','.join(columns)
            # print(columnsStr)
            # TypeError: sequence item 6: expected string or Unicode, bool found
            valuesStr = ','.join(["'" + str(x) + "'" for x in values])
            # print(valuesStr)
            insertSQL = "INSERT INTO %s (%s) VALUES (%s)" % (tableName, columnsStr, valuesStr)
            # print(insertSQL)
            cur.execute(insertSQL)
            i = i + 1

Пример #25

0

Показать файл

Файл: analyse_SGBS.py Проект: thomasvangurp/epiGBS

def get_stats(args):
    """get stats based on bam file"""
    mapping_dict = nested_dict.nested_dict()
    try:
        handle = pysam.AlignmentFile(args.input, 'rb')
    except OSError:
        print 'error'
    #Samples can be added from several lanes, which will results in different read groups
    #in order to only account for samples here, make a dict mapping RG_ID to sample
    RG_to_sample = dict([(r['ID'],r['SM']) for r in handle.header['RG']])
    count = 0
    for read in handle:
        count += 1
        if not count%1000000:
            print '%s reads processed' % count
        if not read.is_duplicate and not read.is_qcfail:
            #make dict of read tag objects
            tag_dict = dict(read.tags)
            sample = RG_to_sample[tag_dict['RG']]
            #add count of valid read tot total for this sample
            try:
                mapping_dict['total'][sample] += 1
            except TypeError:
                mapping_dict['total'][sample] = 1
            if 'mono' in sample:
                if sample.replace(' ','_') not in read.reference_name:
                    try:
                        if read.reference_name not in mapping_dict['discard']:
                            mapping_dict['discard'].append(read.reference_name)
                    except AttributeError:
                        mapping_dict['discard'] = [read.reference_name]
                    except KeyError:
                        mapping_dict['discard'] = [read.reference_name]
            try:
                mapping_dict[read.reference_name][sample] += 1
            except TypeError:
                mapping_dict[read.reference_name][sample] = 1
    return mapping_dict

Пример #26

0

Показать файл

Файл: test_nested_dict.py Проект: hpcugent/nested-dict

    def test_default(self):
        """
            test a range of nested_dict
        """
        from nested_dict import nested_dict
        nd = nested_dict()
        nd['new jersey']['mercer county']['plumbers'] = 3
        nd['new jersey']['mercer county']['programmers'] = 81
        nd['new jersey']['middlesex county']['programmers'] = 81
        nd['new jersey']['middlesex county']['salesmen'] = 62
        nd['new york']['queens county']['plumbers'] = 9
        nd['new york']['queens county']['salesmen'] = 36

        expected_result = sorted([(('new jersey', 'mercer county', 'plumbers'),        3),
                                  (('new jersey', 'mercer county', 'programmers'),    81),
                                  (('new jersey', 'middlesex county', 'programmers'), 81),
                                  (('new jersey', 'middlesex county', 'salesmen'),    62),
                                  (('new york', 'queens county', 'plumbers'),          9),
                                  (('new york', 'queens county', 'salesmen'),         36)])
        all = sorted(tup for tup in nd.iteritems_flat())
        self.assertEqual(all, expected_result)
        all = sorted(tup for tup in nd.items_flat())
        self.assertEqual(all, expected_result)

Пример #27

0

Показать файл

Файл: main.py Проект: RobertBeaudenon/ML-NaiveBaysClassifier

def classify_using_naive_bays(titles, dict_titles_testing, dict_priors,
                              words_dict):
    title_score_dict = nd.nested_dict()
    # counter_right = 0
    # counter_wrong = 0

    for title in titles:
        score_dict = {}
        words = regex_filtering(title.lower())

        if dict_priors['story'] == 0:
            score_story = float('-inf')
        else:
            score_story = math.log10(dict_priors['story'])

        if dict_priors['ask_hn'] == 0:
            score_ask_hn = float('-inf')
        else:
            score_ask_hn = math.log10(dict_priors['ask_hn'])

        if dict_priors['show_hn'] == 0:
            score_show_hn = float('-inf')
        else:
            score_show_hn = math.log10(dict_priors['show_hn'])

        if dict_priors['poll'] == 0:
            score_poll = float('-inf')
        else:
            score_poll = math.log10(dict_priors['poll'])

        for word in words:
            if word in words_dict.keys(
            ):  # should we do something if word is not in model or just skip it?
                score_story += math.log10(words_dict[word]['P(w|story)'])
                score_ask_hn += math.log10(words_dict[word]['P(w|ask_hn)'])
                score_show_hn += math.log10(words_dict[word]['P(w|show_hn)'])
                score_poll += math.log10(words_dict[word]['P(w|poll)'])

        score_dict['story'] = score_story
        score_dict['ask_hn'] = score_ask_hn
        score_dict['show_hn'] = score_show_hn
        score_dict['poll'] = score_poll

        max_value = max(score_dict.values())  # maximum value
        max_keys = [k for k, v in score_dict.items() if v == max_value
                    ]  # getting all keys containing the `maximum`
        estimated_post_type = max_keys[0]

        title_score_dict[title]['estimation'] = estimated_post_type

        if estimated_post_type == dict_titles_testing[title]:
            title_score_dict[title][
                'original_post_type'] = dict_titles_testing[title] + ' right'
            # counter_right += 1
        else:
            title_score_dict[title][
                'original_post_type'] = dict_titles_testing[title] + ' wrong'
            # counter_wrong += 1

        title_score_dict[title]['score_story'] = score_story
        title_score_dict[title]['score_ask_hn'] = score_ask_hn
        title_score_dict[title]['score_show_hn'] = score_show_hn
        title_score_dict[title]['score_poll'] = score_poll

    # print(counter_right)
    # print(counter_wrong)
    return title_score_dict

Пример #28

0

Показать файл

Файл: analysis.py Проект: INM-6/multi-area-model

    def create_synaptic_input(self, **keywords):
        """
        Calculate synaptic input of populations and areas using the spike data.
        Uses function ah.pop_synaptic_input.
        If the synaptic inputs have previously been stored with the
        same parameters, they are loaded from file.

        Parameters
        ----------
        t_min : float, optional
            Minimal time in ms of the simulation to take into account
            for the calculation. Defaults to 500 ms.
        t_max : float, optional
            Maximal time in ms of the simulation to take into account
            for the calculation. Defaults to the simulation time.
        areas : list, optional
            Which areas to include in the calculcation.
            Defaults to all loaded areas.
        pops : list or {'complete'}, optional
            Which populations to include in the calculation.
            If set to 'complete', all populations the respective areas
            are included. Defaults to 'complete'.
        kernel : {'gauss_time_window', 'alpha_time_window', 'rect_time_window'}, optional
            Convolution kernel for the calculation of the underlying firing rates.
            Defaults to 'binned' which corresponds to a simple histogram.
        resolution: float, optional
            Width of the convolution kernel. Specifically it correponds to:
            - 'binned' : bin width of the histogram
            - 'gauss_time_window' : sigma
            - 'alpha_time_window' : time constant of the alpha function
            - 'rect_time_window' : width of the moving rectangular function
        """
        default_dict = {
            'areas': self.areas_loaded,
            'pops': 'complete',
            'resolution': 1.,
            'kernel': 'binned'
        }
        params = ah._create_parameter_dict(default_dict, self.T, **keywords)

        # Check if synaptic inputs have been stored with the same parameters
        iterator_areas = ah.model_iter(mode='single',
                                       areas=params['areas'],
                                       pops=None)
        iterator_pops = ah.model_iter(mode='single',
                                      areas=params['areas'],
                                      pops=params['pops'])
        fp = os.path.join(self.output_dir, 'synaptic_input')
        self.synaptic_input = ah._check_stored_data(fp, copy(iterator_areas),
                                                    params)
        fp = os.path.join(self.output_dir, 'synaptic_input_pops')
        self.synaptic_input_pops = ah._check_stored_data(
            fp, copy(iterator_pops), params)

        if self.synaptic_input is None:
            print('Computing rate time series')
            if 'rate_time_series' not in inspect.getmembers(self):
                self.create_rate_time_series(**params)

            d_pops = nested_dict()
            d_pops['Parameters'] = params
            for area, pop in copy(iterator_pops):
                if pop in self.network.structure[area]:
                    if 'I' in pop:
                        tau_syn = self.network.params['neuron_params'][
                            'single_neuron_dict']['tau_syn_in']
                    else:
                        tau_syn = self.network.params['neuron_params'][
                            'single_neuron_dict']['tau_syn_ex']
                    time_series = ah.synaptic_output(
                        self.rate_time_series_pops[area][pop],
                        tau_syn,
                        params['t_min'],
                        params['t_max'],
                        resolution=params['resolution'])
                    d_pops[area][pop] = time_series
            self.synaptic_output_pops = d_pops.to_dict()

            d_pops = nested_dict()
            d_pops['Parameters'] = params
            d_pops['Parameters'] = params
            for area, pop in iterator_pops:
                if pop in self.network.structure[area]:
                    time_series = np.zeros(
                        int((params['t_max'] - params['t_min']) /
                            params['resolution']))
                    for source_area, source_pop in ah.model_iter(
                            mode='single', areas=self.areas_loaded):
                        if source_pop in self.network.structure[source_area]:
                            weight = self.network.W[area][pop][source_area][
                                source_pop]
                            time_series += (
                                self.synaptic_output_pops[source_area]
                                [source_pop] * abs(weight) *
                                self.network.K[area][pop][source_area]
                                [source_pop])
                    d_pops[area][pop] = time_series

            d = nested_dict()
            d['Parameters'] = params
            for area in params['areas']:
                d[area] = np.zeros(
                    int((params['t_max'] - params['t_min']) /
                        params['resolution']))
                for pop in self.network.structure[area]:
                    d[area] += d_pops[area][pop] * self.network.N[area][pop]
                d[area] /= self.network.N[area]['total']
            self.synaptic_input = d.to_dict()
            self.synaptic_input_pops = d_pops.to_dict()

Пример #29

0

Показать файл

    def add_events(self,
                   company_id,
                   events,
                   worker,
                   allow_locked_tasks=False) -> Tuple[int, int, dict]:
        actions = []
        task_ids = set()
        task_iteration = defaultdict(lambda: 0)
        task_last_scalar_events = nested_dict(
            3, dict)  # task_id -> metric_hash -> variant_hash -> MetricEvent
        task_last_events = nested_dict(
            3, dict)  # task_id -> metric_hash -> event_type -> MetricEvent
        errors_per_type = defaultdict(int)
        valid_tasks = self._get_valid_tasks(
            company_id,
            task_ids={
                event["task"]
                for event in events if event.get("task") is not None
            },
            allow_locked_tasks=allow_locked_tasks,
        )

        for event in events:
            # remove spaces from event type
            event_type = event.get("type")
            if event_type is None:
                errors_per_type["Event must have a 'type' field"] += 1
                continue

            event_type = event_type.replace(" ", "_")
            if event_type not in EVENT_TYPES:
                errors_per_type[f"Invalid event type {event_type}"] += 1
                continue

            task_id = event.get("task")
            if task_id is None:
                errors_per_type["Event must have a 'task' field"] += 1
                continue

            if task_id not in valid_tasks:
                errors_per_type["Invalid task id"] += 1
                continue

            event["type"] = event_type

            # @timestamp indicates the time the event is written, not when it happened
            event["@timestamp"] = es_factory.get_es_timestamp_str()

            # for backward bomba-tavili-tea
            if "ts" in event:
                event["timestamp"] = event.pop("ts")

            # set timestamp and worker if not sent
            if "timestamp" not in event:
                event["timestamp"] = es_factory.get_timestamp_millis()

            if "worker" not in event:
                event["worker"] = worker

            # force iter to be a long int
            iter = event.get("iter")
            if iter is not None:
                iter = int(iter)
                event["iter"] = iter

            # used to have "values" to indicate array. no need anymore
            if "values" in event:
                event["value"] = event["values"]
                del event["values"]

            event["metric"] = event.get("metric") or ""
            event["variant"] = event.get("variant") or ""

            index_name = get_index_name(company_id, event_type)
            es_action = {
                "_op_type": "index",  # overwrite if exists with same ID
                "_index": index_name,
                "_source": event,
            }

            # for "log" events, don't assing custom _id - whatever is sent, is written (not overwritten)
            if event_type != EventType.task_log.value:
                es_action["_id"] = self._get_event_id(event)
            else:
                es_action["_id"] = dbutils.id()

            task_ids.add(task_id)
            if (iter is not None and event.get("metric")
                    not in self._skip_iteration_for_metric):
                task_iteration[task_id] = max(iter, task_iteration[task_id])

            self._update_last_metric_events_for_task(
                last_events=task_last_events[task_id],
                event=event,
            )
            if event_type == EventType.metrics_scalar.value:
                self._update_last_scalar_events_for_task(
                    last_events=task_last_scalar_events[task_id], event=event)

            actions.append(es_action)

        action: Dict[dict]
        plot_actions = [
            action["_source"] for action in actions
            if action["_source"]["type"] == EventType.metrics_plot.value
        ]
        if plot_actions:
            self.validate_and_compress_plots(
                plot_actions,
                validate_json=config.get("services.events.validate_plot_str",
                                         False),
                compression_threshold=config.get(
                    "services.events.plot_compression_threshold", 100_000),
            )

        added = 0
        if actions:
            chunk_size = 500
            with translate_errors_context(), TimingContext(
                    "es", "events_add_batch"):
                # TODO: replace it with helpers.parallel_bulk in the future once the parallel pool leak is fixed
                with closing(
                        helpers.streaming_bulk(
                            self.es,
                            actions,
                            chunk_size=chunk_size,
                            # thread_count=8,
                            refresh=True,
                        )) as it:
                    for success, info in it:
                        if success:
                            added += 1
                        else:
                            errors_per_type[
                                "Error when indexing events batch"] += 1

                remaining_tasks = set()
                now = datetime.utcnow()
                for task_id in task_ids:
                    # Update related tasks. For reasons of performance, we prefer to update
                    # all of them and not only those who's events were successful
                    updated = self._update_task(
                        company_id=company_id,
                        task_id=task_id,
                        now=now,
                        iter_max=task_iteration.get(task_id),
                        last_scalar_events=task_last_scalar_events.get(
                            task_id),
                        last_events=task_last_events.get(task_id),
                    )

                    if not updated:
                        remaining_tasks.add(task_id)
                        continue

                if remaining_tasks:
                    TaskBLL.set_last_update(remaining_tasks,
                                            company_id,
                                            last_update=now)

        if not added:
            raise errors.bad_request.EventsNotAdded(**errors_per_type)

        errors_count = sum(errors_per_type.values())
        return added, errors_count, errors_per_type

Пример #30

0

Показать файл

Файл: utils.py Проект: hanfeijp/wide-residual-networks

def flatten_params(params):
    return OrderedDict(('.'.join(k), Variable(v, requires_grad=True))
                       for k, v in nested_dict(params).iteritems_flat() if v is not None)

Пример #31

0

Показать файл

Файл: views.py Проект: SkrGitRepo/MyPythonDjango

def search_user(request):
    
    if request.method=='POST':
        userid = request.POST.get('userid');
        userrole = request.POST.get('userrole')
        #return render_to_response('search_result.html', {'userid':userid, 'userrole':userrole})
        return render(request,'search_result.html', {'userid':userid, 'userrole':userrole})
    
    elif request.method=='GET':
        if 'userid' in request.GET and request.GET['userid']:
            userid= request.GET['userid']
            userrole=request.GET['userrole']
            lifecycle_userrole = userrole.split('-')
            lifecycle = lifecycle_userrole[1]
            usertype = lifecycle_userrole[2] 
            #userrole = str(lifecycle_userrole[1]).title()

            if lifecycle == "all":
                
                user_status = nested_dict()
                
                get_usrlist_frm_cpo_dev1 = USER_TABLE.objects.using('eon_rtp3_1_l').filter(username=userid).order_by('userid')
                
                for user in get_usrlist_frm_cpo_dev1:
                    user_status['eon-rtp3-1-l']= user.subtypeofuser
                    user_status['eon-rch1-1-l']= user.subtypeofuser
                
                get_usrlist_frm_cpo_dev2 = USER_TABLE.objects.using('eon_rtp3_2_l').filter(username=userid).order_by('userid')
                for user in get_usrlist_frm_cpo_dev2:
                    user_status['eon-rtp3-2-l']= user.subtypeofuser
                    user_status['eon-rtp5-1-l']= user.subtypeofuser
                    
                
                cpo_dev_onramp_resource = {1:'cpo-dev-superuser',2:'cpo-dev-sysadmin',3:'cpo-dev-networkadmin',4:'cpo-dev-operator',5:'cpo-dev-provisioner',}
                                
                for subtypeoduser,onramp_resource in cpo_dev_onramp_resource.iteritems():
                    #onramp_provisioned_users = get_cpo_provisioned_userlist(onramp_resource)
                    onramp_provisioned_users = get_local_cpo_provisioned_userlist(request,userrole)
                    
                    logger.info("All user found under onramp resource : ")
                    logger.debug(onramp_provisioned_users)
                    
                    #onramp_approved_usrlist=[]
                    if onramp_provisioned_users != None:
                        if userid in onramp_provisioned_users:
                            logger.info("user found")
                            user_status['onramp_dev']=subtypeoduser


                cpo_prod_onramp_resource = {1:'cpo-prod-superuser',2:'cpo-prod-sysadmin',3:'cpo-prod-networkadmin',4:'cpo-prod-operator',5:'cpo-prod-provisioner',}
                
                for subtypeoduser,onramp_resource in cpo_prod_onramp_resource.iteritems():
                    #onramp_provisioned_users = get_cpo_provisioned_userlist(onramp_resource)
                    onramp_provisioned_users = get_local_cpo_provisioned_userlist(request,userrole)
                    
                    logger.info("All user found under onramp PROD resource : ")
                    logger.debug(onramp_provisioned_users)
                    
                    #onramp_approved_usrlist=[]
                    if onramp_provisioned_users != None:
                        if userid in onramp_provisioned_users:
                            user_status['onramp_prod']=subtypeoduser


                #Due to django template bug , defaultdict or nesteddict must be converted to python dict before passing to view
                user_status = dict(user_status)
                   
                
                return render_to_response('search_result.html', {'userid':userid, 'userrole':userrole, 'lifecycle':lifecycle,
                                                                 'user_status': sorted(user_status.items())},)
                #return render_to_response('search_result.html', {'userid':userid, 'userrole':userrole, 'lifecycle':lifecycle,
                #                                                 'user_status': user_status},)
            elif lifecycle == 'dev':
                
                #--------------------------------------------------------------------------------------------------------
                cpo_dev_onramp_resource = {'cpo-dev-superuser':1,'cpo-dev-sysadmin':2,'cpo-dev-networkadmin':3,'cpo-dev-operator':4,'cpo-dev-provisioner':5,}
                user_status = {}
                
                subtype_of_user= cpo_dev_onramp_resource[userrole]
                
                #get_usrlist_frm_cpo_dev1 = USER_TABLE.objects.using('eon_rtp3_1_l').filter(username=userid).filter(subtypeofuser=1).order_by('userid')
                get_usrlist_frm_cpo_dev1 = USER_TABLE.objects.using('eon_rtp3_1_l').filter(username=userid,subtypeofuser=subtype_of_user).order_by('userid')
                
                logger.info("**********Fetched data from dev1")
                logger.debug(get_usrlist_frm_cpo_dev1)
                
                for user in get_usrlist_frm_cpo_dev1:
                    user_status['eon-rtp3-1-l']= user.subtypeofuser
                
                #get_usrlist_frm_cpo_dev2 = USER_TABLE.objects.using('eon_rtp3_2_l').filter(username=userid).filter(subtypeofuser=1).order_by('userid')
                get_usrlist_frm_cpo_dev2 = USER_TABLE.objects.using('eon_rtp3_2_l').filter(username=userid,subtypeofuser=subtype_of_user).order_by('userid')
                
                for user in get_usrlist_frm_cpo_dev2:
                    user_status['eon-rtp3-2-l']= user.subtypeofuser
                    
                
                #cpo_dev_onramp_resource = {1:'cpo-dev-superuser',2:'cpo-dev-sysadmin',3:'cpo-dev-netwrokadmin',4:'cpo-dev-operator',5:'cpo-dev-provisioner',}
                #onramp_provisioned_users = get_cpo_provisioned_userlist(userrole)
                onramp_provisioned_users = get_local_cpo_provisioned_userlist(request,userrole)
                    
                logger.info("All user found under onramp resource : ")
                logger.debug(onramp_provisioned_users)
                    
                if onramp_provisioned_users != None:
                    if userid in onramp_provisioned_users:
                        logger.info("user found")
                        user_status['onramp_dev']=subtype_of_user


                #Due to django template bug , defaultdict or nesteddict must be converted to python dict before passing to view
                #user_status = dict(user_status)
                #--------------------------------------------------------------------------------------------------------
                return render_to_response('search_result.html',{'userid':userid, 'userrole':userrole,'lifecycle':lifecycle,'user_status':sorted(user_status.items())})
            
            elif lifecycle == 'prod':
                #--------------------------------------------------------------------------------------------------------
                cpo_prod_onramp_resource = {'cpo-prod-superuser':1,'cpo-prod-sysadmin':2,'cpo-prod-networkadmin':3,'cpo-prod-operator':4,'cpo-prod-provisioner':5,}
                user_status = {}
                
                subtype_of_user= cpo_prod_onramp_resource[userrole]
                
                #get_usrlist_frm_cpo_dev1 = USER_TABLE.objects.using('eon_rtp3_1_l').filter(username=userid).filter(subtypeofuser=1).order_by('userid')
                get_usrlist_frm_cpo_prod1 = USER_TABLE.objects.using('eon_rtp3_1_l').filter(username=userid,subtypeofuser=subtype_of_user).order_by('userid')
                
                logger.info("**********Fetched data from dev1")
                logger.debug(get_usrlist_frm_cpo_prod1)
                
                for user in get_usrlist_frm_cpo_prod1:
                    user_status['eon-rch1-1-l']= user.subtypeofuser
                
                get_usrlist_frm_cpo_prod2 = USER_TABLE.objects.using('eon_rtp3_2_l').filter(username=userid,subtypeofuser=subtype_of_user).order_by('userid')
                
                for user in get_usrlist_frm_cpo_prod2:
                    user_status['eon-rtp5-1-l']= user.subtypeofuser
                    
                
                #cpo_dev_onramp_resource = {1:'cpo-dev-superuser',2:'cpo-dev-sysadmin',3:'cpo-dev-netwrokadmin',4:'cpo-dev-operator',5:'cpo-dev-provisioner',}
                #onramp_provisioned_users = get_cpo_provisioned_userlist(userrole)
                onramp_provisioned_users = get_local_cpo_provisioned_userlist(request,userrole)    
                logger.info("All user found under onramp resource : ")
                logger.debug(onramp_provisioned_users)
                    
                if onramp_provisioned_users != None:
                    if userid in onramp_provisioned_users:
                        logger.info("user found")
                        user_status['onramp_prod']=subtype_of_user


                #Due to django template bug , defaultdict or nesteddict must be converted to python dict before passing to view
                #user_status = dict(user_status)
                #--------------------------------------------------------------------------------------------------------
                return render_to_response('search_result.html',{'userid':userid, 'userrole':userrole,'lifecycle':lifecycle,'user_status':sorted(user_status.items())})
            else:
                return render_to_response('search_result.html',{'userid':userid, 'userrole':userrole,'lifecycle':lifecycle,'user_status':'Hello'})
        else:
            error_message = 'Please provide valid userid'
            return render_to_response('search_result.html', {'error_msg':error_message})

Пример #32

0

Показать файл

Файл: utils.py Проект: mingyangyi/Riemann_batch

def flatten_params(params):
    flat_params = OrderedDict()
    for keys, v in nested_dict(params).iteritems_flat():
        if v is not None:
            flat_params['.'.join(keys)] = Variable(v, requires_grad=True)
    return flat_params

Пример #33

0

Показать файл

Файл: plot_all_model_loss.py Проект: Tsinghua-gongjing/StructureImpute

def plot_dir_loss(d, not_plot_dir_str, only_plot_dir_str, min_loss_col, savefn):
    fn_ls = os.listdir(d)
    for j in not_plot_dir_str.split(':'):
        fn_ls = [i for i in fn_ls if not j in i]
    if only_plot_dir_str != '.':
        fn_ls = [i for i in fn_ls if i in only_plot_dir_str.split(':')]
    fn_ls = [i for i in fn_ls if i != 'readme.txt']
        
    loss_dict = nested_dict()
    loss_ls = []
    for i in fn_ls:
        log = d + '/' + i + '/log.txt'
        
        train_shell = d + '/' + i + '/train.sh'
        model_parameter_dict = {}
        with open(train_shell, 'r') as TRAIN:
            for line in TRAIN:
                line = line.strip('\n')
                line = line.replace('RNA-structure-profile-imputation', 'ShapeImputation')
                if 'CUDA_VISIBLE_DEVICES' in line:
                    continue
                elif '--' not in line:
#                     SAVEFN.write(line+'\n')
                    pass
                else:
                    arr = line.strip(' ').split(' ')
                    arr[1] = '' if len(arr) == 2 else arr[1]
                    model_parameter_dict[arr[0]] = arr[1]
#                     print(arr)
#         print(model_parameter_dict)
        if '--batch_size' not in model_parameter_dict: model_parameter_dict['--batch_size'] = 100
        if '--test_batch_size' not in model_parameter_dict: model_parameter_dict['--test_batch_size'] = 100
        
        if os.path.isfile(log):
            print('process: {}'.format(i))
            log_plot_savefn = log.replace('.txt', '.plot.pdf')
            loss_df = util.read_log(log, savefn=log_plot_savefn, test_batch_size=int(model_parameter_dict['--test_batch_size']))
            if loss_df.shape[0] < 10: continue
            loss_min = loss_df.loc[loss_df[min_loss_col].idxmin()]
            loss_min.loc['epoch',] = loss_min.name
            loss_min.name = i
    #         print(loss_min)
            loss_ls.append(loss_min)
        
    loss_df_all = pd.concat(loss_ls, axis=1)
    loss_df_all = loss_df_all[sorted(loss_df_all.columns)]
    print(loss_df_all)
    savefn_txt = savefn.replace('.pdf', '.csv')
    loss_df_all_T = loss_df_all.T
    loss_df_all_T['epoch'].dtype == 'int'
    loss_df_all_T.to_csv(savefn_txt, header=True, index=True, sep='\t', float_format='%.5f')
    
    fig,ax=plt.subplots(figsize=(max(8,0.5*len(fn_ls)),28))
    cols = ['validate loss (train_nonull_validate_nonull)', 'validate loss (train_hasnull_validate_hasnull)', 'validate loss (train_hasnull_validate_onlynull)', 'validate loss (train_hasnull_validate_nonull)']
    for col in cols:
        ax.plot(loss_df_all.loc[col,], label=col, marker='.')
    for i in range(0, loss_df_all.shape[1]):
        plt.axvline(x=i, ymin=0, ymax=1, ls='--', lw='0.2', color='grey')
    plt.xticks(range(0, len(loss_df_all.columns)), loss_df_all.columns, rotation=90)
    plt.legend()
    plt.tight_layout()
    plt.savefig(savefn)
    plt.close()

Пример #34

0

Показать файл

Файл: shadow_attack.py Проект: SatwikPrabhu/Attacking-Lightning-s-anonymity

def dest_reveal_new(G, adversary, delay, amount, pre, next):
    T = nd.nested_dict()
    flag1 = True
    anon_sets = nd.nested_dict()
    level = 0
    index = 0
    T[0]["nodes"] = [next]
    T[0]["delays"] = [delay]
    print(delay)
    T[0]["previous"] = [-1]
    T[0]["visited"] = [[pre, adversary, next]]
    T[0]["amounts"] = [amount]
    # pr = pf.edge_prob(G.edges[pre,adversary]["LastFailure"])*pf.edge_prob(G.edges[adversary,next]["LastFailure"])
    # T[0]["probs"] = [pr]
    x = -1
    # if T[0]["delays"][0] == 0:
    # maybe_targets[index]["target"] = next
    # maybe_targets[index]["path"] = [adversary,next]
    # maybe_targets[index]["delay"] = delay
    # maybe_targets[index]["amt"] = amount
    # maybe_targets[index]["tech"] = 0
    # maybe_targets[index]["sources"] = source_reveal(G, [pre, adversary,next], 0, 0, amount, pre, next,adversary)
    # index += 1
    # paths = pf.Dijkstra_all_paths(G,next,amount,pf.lnd_cost_fun)
    # for u in paths:
    #     if pre in paths[u]:
    #         ind = paths[u].index(pre)
    #         if(paths[u][ind:] == [pre,adversary,next]):
    #             anon_sets[index] = [u,next]
    #             print("match",u,next)

    paths = nd.nested_dict()
    num_paths = 0
    flag = True

    while (flag):
        level += 1
        if (level == 3):
            flag1 = False
            break
        t1 = T[level - 1]["nodes"]
        d1 = T[level - 1]["delays"]
        p1 = T[level - 1]["previous"]
        v1 = T[level - 1]["visited"]
        a1 = T[level - 1]["amounts"]
        # pr1 = T[level - 1]["probs"]
        t2 = []
        d2 = []
        p2 = []
        v2 = [[]]
        a2 = []
        # pr2 = []
        for i in range(0, len(t1)):
            u = t1[i]
            # if v1[i] == []:
            #     print(u)
            #     print("yes",u)
            for [u, v] in G.out_edges(u):
                #print(v)
                # p = p1[i]
                # flag1 = 0
                # level2 = level - 2
                # while(level2>=1):
                #     if(T[level2]["nodes"][p] == v):
                #         flag1 = 1
                #         break
                #     else:
                #         p = T[level2]["previous"][p]
                #         level2 = level2 - 1
                # pr = pf.edge_prob(G.edges[u,v]["LastFailure"])*pr1[i]
                if (v != pre and v != adversary and v != next
                        and v not in v1[i] and
                    (G.edges[u, v]["Balance"] + G.edges[v, u]["Balance"]) >=
                    ((a1[i] - G.edges[u, v]["BaseFee"]) /
                     (1 + G.edges[u, v]["FeeRate"]))):
                    t2.append(v)
                    d2.append(d1[i] - G.edges[u, v]["Delay"])
                    p2.append(i)
                    v2.append(v1[i] + [v])
                    a2.append(((a1[i] - G.edges[u, v]["BaseFee"]) /
                               (1 + G.edges[u, v]["FeeRate"])))
                    # pr2.append(pr)
        T[level]["nodes"] = t2
        #print(level,t2,d2)
        T[level]["delays"] = d2
        T[level]["previous"] = p2
        T[level]["visited"] = v2
        T[level]["amounts"] = a2
        # T[level]["probs"] = pr2
        #print(t2,d2,p2)
        print(level, len(t2))
        if (len(t2) == 0):
            flag = False
    level = level - 1
    while (level >= 0):
        t = T[level]["nodes"]
        d = T[level]["delays"]
        p = T[level]["previous"]
        a = T[level]["amounts"]
        v = T[level]["visited"]
        #print(level)
        # if(level == 0):
        #     print(t,d)
        for i in range(0, len(t)):
            #if(d[i] == 0):
            path = []
            level1 = level
            path.append(T[level1]["nodes"][i])
            loc = T[level1]["previous"][i]
            while (level1 > 0):
                level1 = level1 - 1
                path.append(T[level1]["nodes"][loc])
                loc = T[level1]["previous"][loc]
            path.reverse()
            path = [pre, adversary] + path
            if (len(path) == len(set(path))):
                #print(path, level)
                amt = a[i]
                pot = path[len(path) - 1]
                sources = deanonymize(G, pot, path, amt, pf.lnd_cost_fun)
                if sources != None:

                    #print("match",pot)
                    anon_sets[pot] = list(sources)
                    # if paths == [pre, adversary] + path:
                    #     maybe_targets[index]["target"] = pot
                    #     maybe_targets[index]["path"] = [adversary] + path
                    #     maybe_targets[index]["delay"] = delay
                    #     maybe_targets[index]["amt"] = amt
                    #     maybe_targets[index]["tech"] = 0
                    #     maybe_targets[index]["sources"] = source_reveal(G, [pre, adversary] + path, 0, 0, amt, pre, next,
                    #                                                     adversary)
                    #     index += 1
                    # for u in paths:
                    #     if pre in paths[u]:
                    #         ind = paths[u].index(pre)
                    #         if paths[u][ind:] == [pre,adversary] + path:
                    #             anon_sets[index] = [u,pot]
                    #             index+=1
                    #             print("match",u,pot)
        level = level - 1
    return anon_sets, flag1

Пример #35

0

Показать файл

Файл: shadow_attack.py Проект: SatwikPrabhu/Attacking-Lightning-s-anonymity

def deanonymize(G, target, path, amt, cost_function):
    # if(target == 500):
    #     print("target", 6946,path)
    pq = PriorityQueue()
    delays = {}
    costs = {}
    paths = nd.nested_dict()
    paths1 = nd.nested_dict()
    dists = {}
    visited = set()
    previous = {}
    done = {}
    prob = {}
    sources = []
    pre = path[0]
    adv = path[1]
    nxt = path[2]
    for node in G.nodes():
        previous[node] = -1
        delays[node] = -1
        costs[node] = max
        paths[node] = []
        dists[node] = max
        done[node] = 0
        paths1[node] = []
        prob[node] = 1
    dists[target] = 0
    paths[target] = [target]
    costs[target] = amt
    delays[target] = 0
    pq.put((dists[target], target))
    flag1 = 0
    flag2 = 0
    while (0 != pq.qsize()):
        curr_cost, curr = pq.get()
        if curr_cost > dists[curr]:
            continue
        visited.add(curr)
        if target == 500 and (curr == pre):
            print(curr)
        for [v, curr] in G.in_edges(curr):
            # if v == pre and curr == adv:
            #     print("yes1", pre,curr,G.edges[v, curr]["Balance"] + G.edges[curr, v]["Balance"],costs[curr])
            if (G.edges[v, curr]["Balance"] + G.edges[curr, v]["Balance"] >=
                    costs[curr]) and v not in visited:
                # if v==pre:
                #     print("yes",pre)
                if done[v] == 0:
                    paths1[v] = [v] + paths[curr]
                    done[v] = 1
                cost = dists[curr] + cost_function(G, costs[curr], curr, v)
                if cost < dists[v]:
                    paths[v] = [v] + paths[curr]
                    # if v==pre:
                    #     print(v,paths[v])
                    dists[v] = cost
                    delays[v] = delays[curr] + G.edges[v, curr]["Delay"]
                    costs[v] = costs[curr] + G.edges[v, curr][
                        "BaseFee"] + costs[curr] * G.edges[v, curr]["FeeRate"]
                    # prob[v] = pf.edge_prob(G.edges[v,curr]["LastFailure"])*prob[curr]
                    pq.put((dists[v], v))
        if (curr in path[1:]):
            ind = path.index(curr)
            if (paths[curr] != path[ind:]):
                return None
            if curr == adv:
                #print("ad", paths[curr])
                flag1 = 1
        if (curr == pre):
            # print(pre,paths[pre])
            if paths[pre] != path:
                return [pre]
            else:
                sources.append(pre)
            flag2 = 1
        if flag1 == 1 and flag2 == 1:
            if pre in paths[curr]:
                for [v, curr] in G.in_edges(curr):
                    if v not in paths[curr]:
                        sources.append(v)
    sources = set(sources)
    return sources

Пример #36

0

Показать файл

Файл: cache_init_code.py Проект: wpfu/cmos_linux

 def __init__(self, cache_cfg_file):
     self.cache_str = ""
     self.cache_type = ""
     self.cache_cfg_file = cache_cfg_file
     self.section_dic = nested_dict()

Пример #37

0

Показать файл

Файл: attack_mixed.py Проект: SatwikPrabhu/Attacking-Lightning-s-anonymity

def deanonymize_c(G, target, path, amt, fuzz):
    pq = PriorityQueue()
    cost_function = pf.c_cost_fun(fuzz)
    delays = {}
    costs = {}
    paths = nd.nested_dict()
    paths1 = nd.nested_dict()
    dists = {}
    visited = set()
    previous = {}
    done = {}
    # prob = {}
    sources = []
    pre = path[0]
    adv = path[1]
    nxt = path[2]
    for node in G.nodes():
        previous[node] = -1
        delays[node] = -1
        costs[node] = max
        paths[node] = []
        dists[node] = max
        done[node] = 0
        paths1[node] = []
        # prob[node] = 1
    dists[target] = 0
    paths[target] = [target]
    costs[target] = amt
    delays[target] = 0
    pq.put((dists[target], target))
    flag1 = 0
    flag2 = 0
    while (0 != pq.qsize()):
        curr_cost, curr = pq.get()
        if curr_cost > dists[curr]:
            continue
        visited.add(curr)
        for [v, curr] in G.in_edges(curr):
            if (G.edges[v, curr]["Balance"] + G.edges[curr, v]["Balance"] >=
                    costs[curr]) and v not in visited:
                if done[v] == 0 and G.nodes[v]["Tech"] == 1:
                    paths1[v] = [v] + paths[curr]
                    done[v] = 1
                cost = dists[curr] + cost_function(G, costs[curr], curr, v)
                if cost < dists[v]:
                    paths[v] = [v] + paths[curr]
                    dists[v] = cost
                    delays[v] = delays[curr] + G.edges[v, curr]["Delay"]
                    costs[v] = costs[curr] + G.edges[v, curr][
                        "BaseFee"] + costs[curr] * G.edges[v, curr]["FeeRate"]
                    # prob[v] = pf.edge_prob(G.edges[v,curr]["LastFailure"])*prob[curr]
                    pq.put((dists[v], v))
        # If at any point the sub-path found is not found to be optimal, this is definetely not the destination if using lnd since the sub-path from an intermediary to
        # the destination has to be the cheapest path from the intermediary to the destination.
        if (curr in path[1:]):
            ind = path.index(curr)
            if (paths[curr] != path[ind:]):
                return []
            if curr == adv:
                flag1 = 1
        if (curr == pre):
            # If pre is the source, the path from pre need to not match the path found since, the cost from the source to the second node is computed differently.
            # Moreover, the source would not choose the absolute cheapest path since the first hop may not have sufficient forward balance.
            # Thus, pre has to be the source if the paths dont match, since the paths would only match if pre is an intermediary.
            if paths[pre] != path:
                return [pre]
            else:
                # if the paths do match, pre is just one possible source
                sources.append(pre)
            flag2 = 1
        if flag1 == 1 and flag2 == 1:
            # since if pre is in the path from curr, the path from pre has to match the path we had found as it is the cheapest path from pre. This measns that curr
            # is a valid second node. So, all neighbors of curr that have not occured in the path are potential sources.
            if pre in paths[curr]:
                for [v, curr] in G.in_edges(curr):
                    if v not in paths[curr] and G.nodes[v]["Tech"] == 1:
                        sources.append(v)
    sources = list(set(sources))
    return sources

Пример #38

0

Показать файл

Файл: utils.py Проект: mingyangyi/Riemann_batch

def flatten_stats(stats):
    flat_stats = OrderedDict()
    for keys, v in nested_dict(stats).iteritems_flat():
        flat_stats['.'.join(keys)] = v
    return flat_stats

Пример #39

0

Показать файл

Файл: main.py Проект: RobertBeaudenon/ML-NaiveBaysClassifier

def main(file_name, stop_words, model_output, baseline_output):
    # Read in the survey CSV
    df = get_dataframe_from_csv(file_name)

    # Create training data
    df2018 = get_training_data(df)

    # Create classes list
    classes = ['story', 'ask_hn', 'show_hn', 'poll']

    # Dictionary that will hold all necessary data related to word
    # such as : frequency and conditional probability for each class
    words_dict = nd.nested_dict()

    # Training data for classes

    for class_name in classes:
        df2018_class_name = df2018[df2018['Post Type'] == class_name]

        list_of_story_title = df_title_to_list(df2018_class_name)

        words_dict = title_to_vocab(list_of_story_title, class_name,
                                    words_dict)

    # For experiment 1.3.1 and 1.3.3
    # Remove list of predefined words from vocabulary
    for word in stop_words:
        words_dict.pop(word, None)

    # For experiment 1.3.2
    if model_output == "wordlength-model.txt":
        words_dict_iterator = dict(
            words_dict
        )  # recreating another copy of the dict for iteration because size changes
        for word in words_dict_iterator:
            if len(word) <= 2 or len(word) >= 9:
                words_dict.pop(word, None)

    # Fill missing values for specific class in dictionary for each word frequency with 0
    # To prevent calculation errors later
    for class_name in classes:
        fill_non_existing_values(words_dict, class_name)

    # Convert dict of frequencies to pandas dataframe to simplify later calculations
    df_word_frequencies = convert_dict_to_dataframe(words_dict)

    # generate smoothed conditinal probabilities for each word in class
    for class_name in classes:
        words_dict = get_conditional_probability(words_dict,
                                                 df_word_frequencies,
                                                 class_name)

    # generate model output file
    if len(model_output) > 0:
        generate_model_file(model_output, words_dict)

    # PART 2

    # Create testing data
    df2019 = get_testing_data(df)

    # Find all duplicate rows (titles)
    # get_duplicated_titles(df2019)

    # Getting priors of testing set
    dict_priors = generate_priors(df2018)

    # Transforming column of pandas dataframe such as key: title, value: post Type
    # del dict
    dict_titles_testing = dict(zip(df2019['Title'], df2019['Post Type']))

    # Initializing dictionary that will hold score of each title for each class with prediction
    title_score_dict = nd.nested_dict()

    # Getting list of titles
    list_of_title = df_title_to_list(df2019)

    # Classification using Naive Bays Classification :
    title_score_dict = classify_using_naive_bays(list_of_title,
                                                 dict_titles_testing,
                                                 dict_priors, words_dict)

    # Generating Baseline File
    if len(baseline_output) > 0:
        generate_baseline_file(baseline_output, title_score_dict)

    # Return prediction and estimation list to calculate performance
    if len(baseline_output) == 0:
        result = get_y_true_y_prediction(title_score_dict)
        return result

    if baseline_output == "baseline-result.txt":
        result = get_y_true_y_prediction(title_score_dict)
        result.append(len(words_dict))
        generate_vocab_file(words_dict)
        generate_remove_word_file()
        return result

Пример #40

0

Показать файл

import iothub_client
import serial
import json
import time
import datetime
import sys
from iothub_client import *
from nested_dict import nested_dict

message_timeout = 10000
receive_context = 0
IoTMessage = ''
counter = 0
workingSecond = datetime.datetime.now().second
sensorDataDict = nested_dict(2, int)

protocol = IoTHubTransportProvider.AMQP
# example connection_string = "HostName=myhub.azure-devices.net;DeviceId=mydevice;SharedAccessKey=mykey="
connection_string = "{ Insert string here }"


def receive_message_callback(message, counter):
    global weHavePicture, IoTMessage
    buffer = message.get_bytearray()
    size = len(buffer)
    message = buffer[:size].decode('utf-8')
    print("Message received: %s" % message)
    try:
        IoTMessage = json.loads(message)
    except:

Пример #41

0

Показать файл

Файл: compare_rep_corr.py Проект: Tsinghua-gongjing/StructureImpute

def rep_compare(rep1_out=None,
                rep1_validate=None,
                rep1_predict=None,
                rep2_out=None,
                rep2_validate=None,
                rep2_predict=None,
                tx_null_pct=0.3,
                savefn=None):
    if rep1_out is None:
        rep1_out = '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape_rep1/shape.c200T2M0m0.out'
    if rep1_validate is None:
        rep1_validate = '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape_rep1/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding100.validation.txt'
    if rep1_predict is None:
        rep1_predict = '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape_rep1/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding100.validation.prediction_trainHasNull_lossAll.txt'
    if rep2_out is None:
        rep2_out = '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape_rep2/shape.c200T2M0m0.out'
    if rep2_validate is None:
        rep2_validate = '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape_rep2/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding100.validation.txt'
    if rep2_predict is None:
        rep2_predict = '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape_rep2/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding100.validation.prediction_trainHasNull_lossAll.txt'
    if savefn is None:
        savefn = '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/shape_dist/rep.corr.txt'

    cols = [
        'tx', 'length', 'start', 'end', 'mean_reactivity', 'null_pct', 'seq',
        'fragment_shape', 'fragment_shape(true)'
    ]
    df_rep1_validate = pd.read_csv(rep1_validate, header=None, sep='\t')
    df_rep1_validate.columns = cols
    df_rep1_predict = pd.read_csv(rep1_predict, header=None, sep='\t')
    df_rep1_validate['fragment_shape(predict)'] = df_rep1_predict[0]

    df_rep2_validate = pd.read_csv(rep2_validate, header=None, sep='\t')
    df_rep2_validate.columns = cols
    df_rep2_predict = pd.read_csv(rep2_predict, header=None, sep='\t')
    df_rep2_validate['fragment_shape(predict)'] = df_rep2_predict[0]

    df_rep = df_rep1_validate.merge(df_rep2_validate,
                                    how='inner',
                                    on=['tx', 'start', 'end'])
    print("df shape: rep1,rep2,rep", df_rep1_validate.shape,
          df_rep2_validate.shape, df_rep.shape)

    out_dict1 = util.read_icshape_out(rep1_out)
    out_dict2 = util.read_icshape_out(rep2_out)
    valid_tx = []
    for i in df_rep['tx']:
        rep1_null_pct = out_dict1[i]['reactivity_ls'].count('NULL') / (
            float(out_dict1[i]['length']) - 35)
        rep2_null_pct = out_dict2[i]['reactivity_ls'].count('NULL') / (
            float(out_dict2[i]['length']) - 35)
        if rep1_null_pct > tx_null_pct: continue
        if rep2_null_pct > tx_null_pct: continue
        valid_tx.append(i)
    print("tx valid number", len(set(valid_tx)))

    df_rep = df_rep[df_rep['tx'].isin(valid_tx)]
    df_rep.to_csv(savefn, header=True, index=False, sep='\t')

    tx_shape_dict = nested_dict(2, list)
    for tx in set(valid_tx):
        df_tx = df_rep[df_rep['tx'] == tx]
        for index, i in df_tx.iterrows():
            for v in i['fragment_shape(true)_x'].split(','):
                tx_shape_dict[tx]['rep1_before'].append(float(v))
            for v in i['fragment_shape(true)_y'].split(','):
                tx_shape_dict[tx]['rep2_before'].append(float(v))
            for v in i['fragment_shape(predict)_x'].split(','):
                tx_shape_dict[tx]['rep1_after'].append(float(v))
            for v in i['fragment_shape(predict)_y'].split(','):
                tx_shape_dict[tx]['rep2_after'].append(float(v))

    corr_dict = nested_dict(2, int)
    for tx in tx_shape_dict:
        v1 = [
            i for i, j in zip(tx_shape_dict[tx]['rep1_before'],
                              tx_shape_dict[tx]['rep2_before'])
            if i >= 0 and j >= 0
        ]
        v2 = [
            j for i, j in zip(tx_shape_dict[tx]['rep1_before'],
                              tx_shape_dict[tx]['rep2_before'])
            if i >= 0 and j >= 0
        ]
        v3 = [
            i for i, j in zip(tx_shape_dict[tx]['rep1_after'],
                              tx_shape_dict[tx]['rep2_after'])
            if i >= 0 and j >= 0
        ]
        v4 = [
            j for i, j in zip(tx_shape_dict[tx]['rep1_after'],
                              tx_shape_dict[tx]['rep2_after'])
            if i >= 0 and j >= 0
        ]
        # print(tx,v1,v2,v3,v4)
        if len(v1) <= 10 or len(v3) <= 10: continue
        c1, p1 = stats.pearsonr(v1, v2)
        c2, p2 = stats.pearsonr(v3, v4)
        corr_dict[tx]['corr_before'] = c1
        corr_dict[tx]['corr_before(p)'] = p1
        corr_dict[tx]['corr_before(n)'] = len(v1)
        corr_dict[tx]['corr_after'] = c2
        corr_dict[tx]['corr_after(p)'] = p2
        corr_dict[tx]['corr_after(n)'] = len(v3)
    corr_df = pd.DataFrame.from_dict(corr_dict, orient='index')
    # print(corr_df)
    corr_df.to_csv(savefn.replace('.pdf', '.txt'),
                   header=True,
                   index=True,
                   sep='\t')

    corr_df['# imputated nt'] = corr_df['corr_after(n)'] - corr_df[
        'corr_before(n)']

    fig, ax = plt.subplots(figsize=(8, 8))
    sns.scatterplot(x='corr_before',
                    y='corr_after',
                    data=corr_df,
                    ax=ax,
                    hue='# imputated nt')
    ax.set_xlim(0.2, 1.05)
    ax.set_ylim(0.2, 1.05)
    plt.xticks([0.2, 0.4, 0.6, 0.8, 1.0], [0.2, 0.4, 0.6, 0.8, 1.0])
    plt.yticks([0.2, 0.4, 0.6, 0.8, 1.0], [0.2, 0.4, 0.6, 0.8, 1.0])
    plt.tight_layout()
    plt.savefig(savefn)
    plt.close()

Пример #42

0

Показать файл

Файл: utils.py Проект: uberstig/deep-symmetry

def flatten_stats(stats):
    return OrderedDict(
        ('.'.join(k), v) for k, v in nested_dict(stats).iteritems_flat())

Пример #43

0

Показать файл

Файл: multiarea_helpers.py Проект: jasperalbers/nest-benchmarks

def matrix_to_dict(m, area_list, structure, external=None):
    """
    Convert a matrix containing connectivity
    information of a network defined by structure to a dictionary.

    Parameters
    ----------
    m : array-like
        Matrix to be converted.
    area_list: list
        List of areas in the network. Defines the order of areas
        in the matrix to be created.
    structure : dict
        Structure of the network. Define the populations for each single area.
    external: numpy.ndarray or dict
        If None, do not include connectivity from external
        sources in the return dictionary.
        If numpy.ndarray or dict, use the connectivity given to add an entry
        'external' for each population.
        Defaults to None.
    """
    dic = nested_dict()
    for area, area2 in product(area_list, area_list):
        mask = create_mask(structure,
                           target_areas=[area],
                           source_areas=[area2],
                           external=False)
        if external is not None:
            x = m[mask[:, :-1]]
        else:
            x = m[mask]

        if area == 'TH' and area2 == 'TH':
            x = x.reshape((6, 6))
            x = np.insert(x, 2, np.zeros((2, 6), dtype=float), axis=0)
            x = np.insert(x, 2, np.zeros((2, 8), dtype=float), axis=1)
        elif area2 == 'TH':
            x = x.reshape((8, 6))
            x = np.insert(x, 2, np.zeros((2, 8), dtype=float), axis=1)
        elif area == 'TH':
            x = x.reshape((6, 8))
            x = np.insert(x, 2, np.zeros((2, 8), dtype=float), axis=0)
        else:
            x = x.reshape((8, 8))
        for i, pop in enumerate(population_list):
            for j, pop2 in enumerate(population_list):
                if x[i][j] < 1e-20:
                    x[i][j] = 0.
                dic[area][pop][area2][pop2] = x[i][j]
    if external is not None:
        if isinstance(external, np.ndarray):
            for area in dic:
                for pop in population_list:
                    if pop in structure[area]:
                        mask = create_vector_mask(structure,
                                                  areas=[area],
                                                  pops=[pop])
                        dic[area][pop]['external'] = {
                            'external': external[mask][0]
                        }
                    else:
                        dic[area][pop]['external'] = {'external': 0.}

        if isinstance(external, dict):
            for area in dic:
                for pop in dic[area]:
                    dic[area][pop]['external'] = external[area][pop]

    return dic.to_dict()

Пример #44

0

Показать файл

Файл: attack_clients_known.py Проект: SatwikPrabhu/Attacking-Lightning-s-anonymity

def deanonymize_c(G, target, path, amt, fuzz):
    pq = PriorityQueue()
    cost_function = pf.c_cost_fun(fuzz)
    delays = {}
    costs = {}
    paths = nd.nested_dict()
    paths1 = nd.nested_dict()
    dists = {}
    visited = set()
    previous = {}
    done = {}
    # prob = {}
    sources = []
    pre = path[0]
    adv = path[1]
    nxt = path[2]
    for node in G.nodes():
        previous[node] = -1
        delays[node] = -1
        costs[node] = max
        paths[node] = []
        dists[node] = max
        done[node] = 0
        paths1[node] = []
        # prob[node] = 1
    dists[target] = 0
    paths[target] = [target]
    costs[target] = amt
    delays[target] = 0
    pq.put((dists[target], target))
    flag1 = 0
    flag2 = 0
    while (0 != pq.qsize()):
        curr_cost, curr = pq.get()
        if curr_cost > dists[curr]:
            continue
        visited.add(curr)
        for [v, curr] in G.in_edges(curr):
            if (G.edges[v, curr]["Balance"] + G.edges[curr, v]["Balance"] >=
                    costs[curr]) and v not in visited:
                if done[v] == 0 and G.nodes[v]["Tech"] == 1:
                    paths1[v] = [v] + paths[curr]
                    done[v] = 1
                cost = dists[curr] + cost_function(G, costs[curr], curr, v)
                if cost < dists[v]:
                    paths[v] = [v] + paths[curr]
                    dists[v] = cost
                    delays[v] = delays[curr] + G.edges[v, curr]["Delay"]
                    costs[v] = costs[curr] + G.edges[v, curr][
                        "BaseFee"] + costs[curr] * G.edges[v, curr]["FeeRate"]
                    # prob[v] = pf.edge_prob(G.edges[v,curr]["LastFailure"])*prob[curr]
                    pq.put((dists[v], v))
        if (curr in path[1:]):
            ind = path.index(curr)
            if (paths[curr] != path[ind:]):
                return []
            if curr == adv:
                flag1 = 1
        # if flag1 == 1:
        #     print("path", paths[adv])
        if (curr == pre):
            if paths[pre] != path:
                if G.nodes[pre]["Tech"] != 1:
                    return []
                return [pre]
            else:
                if G.nodes[pre]["Tech"] == 1:
                    sources.append(pre)
            flag2 = 1
        if flag1 == 1 and flag2 == 1:
            if pre in paths[curr]:
                for [v, curr] in G.in_edges(curr):
                    if v not in paths[curr] and G.nodes[v]["Tech"] == 1:
                        sources.append(v)
    sources = list(set(sources))
    return sources

Пример #45

0

Показать файл

Файл: utils.py Проект: uberstig/deep-symmetry

def flatten_params(params):
    return OrderedDict(('.'.join(k), Variable(v, requires_grad=True))
                       for k, v in nested_dict(params).iteritems_flat()
                       if v is not None)

Пример #46

0

Показать файл

Файл: multiarea_helpers.py Проект: jasperalbers/nest-benchmarks

def load_degree_data(fn):
    """
    Load connectivity information from json file and
    store indegrees in dictionary.

    Parameters
    ----------
    fn : string
        File name of json file. The file has to contain a dictionary
        with a subdictionary called 'synapses' containing the
        synapses between any pair of populations at the top level.

    Returns
    -------
    indegrees : dict
        Indegrees on population level. Dictionary levels are sorted as
        target area --> target population --> source area --> source population.
    indegrees_areas : dict
        Indegrees on area level. Dictionary levels are sorted as
        target area --> source area
    outdegrees : dict
        Outdegrees on population level. Dictionary levels are sorted as
        target area --> target population --> source area --> source population.
    outdegrees : dict
        Outdegrees on area level. Dictionary levels are sorted as
        target area --> source area
    """

    f = open(fn)
    dat = json.load(f)
    f.close()
    syn = dat['synapses']
    num = dat['neuron_numbers']
    indegrees = nested_dict()
    outdegrees = nested_dict()
    for target_area, target_pop, source_area, source_pop in product(
            complete_area_list, population_list, complete_area_list,
            population_list):
        numT = num[target_area][target_pop]
        if numT > 0.0:
            indegrees[target_area][target_pop][source_area][source_pop] = syn[
                target_area][target_pop][source_area][source_pop] / numT
        else:
            # assign 0 to indegrees onto non-existing populations
            indegrees[target_area][target_pop][source_area][source_pop] = 0.0

        if source_area != 'external':
            numS = num[source_area][source_pop]
            if numS > 0.0:
                outdegrees[target_area][target_pop][source_area][
                    source_pop] = syn[target_area][target_pop][source_area][
                        source_pop] / numS
            else:
                # assign 0 to outdegrees from non-existing populations
                outdegrees[target_area][target_pop][source_area][
                    source_pop] = 0.0

    for target_area, target_pop, ext_pop in product(complete_area_list,
                                                    population_list,
                                                    ['external']):
        numT = num[target_area][target_pop]
        if numT > 0.0:
            indegrees[target_area][target_pop]['external'][ext_pop] = syn[
                target_area][target_pop]['external'][ext_pop] / numT
        else:
            indegrees[target_area][target_pop]['external'][ext_pop] = 0.0

    indegrees_areas = area_level_dict(indegrees, num, degree='indegree')
    outdegrees_areas = area_level_dict(outdegrees, num, degree='outdegree')
    return (indegrees.to_dict(), indegrees_areas, outdegrees.to_dict(),
            outdegrees_areas)

Пример #47

0

Показать файл

    def coll_adv_attack(self, G, adversary, delay, amount, pre, next, advpath):
                # tp1_begin = time.time()
                T = nd.nested_dict()
                anon_sets = {}
                flag1 = True
                level = 0
                T[0]["nodes"] = [next]
                T[0]["delays"] = [delay]
                T[0]["previous"] = [-1]
                T[0]["visited"] = [[pre,adversary,next]]
                T[0]["amounts"] = [amount]
                flag = True

                # cache sources for all candidate dovetail nodes (they are identical)
                sourcesets = {}

                while(flag):
                    level+=1
                    if(level == 4):
                        flag1 = False
                        break
                    t1 = T[level - 1]["nodes"]
                    d1 = T[level - 1]["delays"]
                    v1 = T[level - 1]["visited"]
                    a1 = T[level - 1]["amounts"]
                    t2 = []
                    d2 = []
                    p2 = []
                    v2 = [[]]
                    a2 = []
                    for i in range(0,len(t1)):
                        u = t1[i]
                        for [u,v] in G.out_edges(u):
                            if(v!=pre and v!=adversary  and v!=next and v not in v1[i] and (d1[i] - G.edges[u,v]["Delay"])>=0 and (G.edges[u,v]["Balance"]+G.edges[v,u]["Balance"])>=((a1[i] - G.edges[u, v]["BaseFee"]) / (1 + G.edges[u, v]["FeeRate"]))):
                                t2.append(v)
                                d2.append(d1[i] - G.edges[u,v]["Delay"])
                                p2.append(i)
                                v2.append(v1[i]+[v])
                                a2.append(((a1[i] - G.edges[u, v]["BaseFee"]) / (1 + G.edges[u, v]["FeeRate"])))
                    T[level]["nodes"] = t2
                    T[level]["delays"] = d2
                    T[level]["previous"] = p2
                    T[level]["visited"] = v2
                    T[level]["amounts"] = a2
                    if(len(t2) == 0):
                        flag = False
                level = level - 1
                while(level>=0):
                    t = T[level]["nodes"]
                    d = T[level]["delays"]
                    p = T[level]["previous"]
                    a = T[level]["amounts"]
                    v = T[level]["visited"]
                    for i in range(0, len(t)):
                        if d[i] == 0:
                            path = []
                            level1 = level
                            path.append(T[level1]["nodes"][i])
                            loc = T[level1]["previous"][i]
                            while (level1 > 0):
                                level1 = level1 - 1
                                path.append(T[level1]["nodes"][loc])
                                loc = T[level1]["previous"][loc]
                            path.reverse()
                            path = [pre,adversary]+path
                            if (len(path) == len(set(path))):
                                amt = a[i]
                                dl = d[i]
                                pot = path[len(path) - 1]

                                # find earliest possible dovetail node for this path
                                dove, dove_index = self.get_dovetail(G, advpath, path, amt)
                                if dove_index != -1:
                                    if dove not in sourcesets:
                                        fullpath = advpath + path[1:]

                                        ind = len(fullpath) - 1
                                        while ind > dove_index:
                                            amt += G.edges[fullpath[ind-1], fullpath[ind]]["BaseFee"] + amt * G.edges[fullpath[ind-1], fullpath[ind]]["FeeRate"]
                                            dl += G.edges[fullpath[ind-1], fullpath[ind]]["Delay"]
                                            ind -= 1

                                        # tp2_begin = time.time()
                                        sources = self.deanonymize(G,dove,fullpath[:dove_index+1],amt,dl)
                                        # tp2_end = time.time()
                                        # print("Time for candidate {}: {} seconds".format(pot, tp2_end - tp2_begin))
                                        sourcesets[dove] = sources
                                    else:
                                        # print("Source from cache of dovetail {}.".format(dove))
                                        sources = sourcesets[dove]
                                    if sources != None and len(sources) > 0:
                                        anon_sets[pot] = list(sources)
                                # else:
                                #     print("Dovetail not found for candidate {}".format(pot))
                    level = level - 1
                # tp1_end = time.time()
                # print("Time for full: {} seconds".format( tp1_end - tp1_begin))
                return anon_sets, flag1

Пример #48

0

Показать файл

    def add_events(self, company_id, events, worker):
        actions = []
        task_ids = set()
        task_iteration = defaultdict(lambda: 0)
        task_last_events = nested_dict(
            3, dict)  # task_id -> metric_hash -> variant_hash -> MetricEvent

        for event in events:
            # remove spaces from event type
            if "type" not in event:
                raise errors.BadRequest("Event must have a 'type' field",
                                        event=event)

            event_type = event["type"].replace(" ", "_")
            if event_type not in EVENT_TYPES:
                raise errors.BadRequest(
                    "Invalid event type {}".format(event_type),
                    event=event,
                    types=EVENT_TYPES,
                )

            event["type"] = event_type

            # @timestamp indicates the time the event is written, not when it happened
            event["@timestamp"] = es_factory.get_es_timestamp_str()

            # for backward bomba-tavili-tea
            if "ts" in event:
                event["timestamp"] = event.pop("ts")

            # set timestamp and worker if not sent
            if "timestamp" not in event:
                event["timestamp"] = es_factory.get_timestamp_millis()

            if "worker" not in event:
                event["worker"] = worker

            # force iter to be a long int
            iter = event.get("iter")
            if iter is not None:
                iter = int(iter)
                event["iter"] = iter

            # used to have "values" to indicate array. no need anymore
            if "values" in event:
                event["value"] = event["values"]
                del event["values"]

            index_name = EventMetrics.get_index_name(company_id, event_type)
            es_action = {
                "_op_type": "index",  # overwrite if exists with same ID
                "_index": index_name,
                "_type": "event",
                "_source": event,
            }

            # for "log" events, don't assing custom _id - whatever is sent, is written (not overwritten)
            if event_type != "log":
                es_action["_id"] = self._get_event_id(event)
            else:
                es_action["_id"] = dbutils.id()

            task_id = event.get("task")
            if task_id is not None:
                es_action["_routing"] = task_id
                task_ids.add(task_id)
                if iter is not None:
                    task_iteration[task_id] = max(iter,
                                                  task_iteration[task_id])

                if event_type == EventType.metrics_scalar.value:
                    self._update_last_metric_event_for_task(
                        task_last_events=task_last_events,
                        task_id=task_id,
                        event=event)
            else:
                es_action["_routing"] = task_id

            actions.append(es_action)

        if task_ids:
            # verify task_ids
            with translate_errors_context(), TimingContext(
                    "mongo", "task_by_ids"):
                res = Task.objects(id__in=task_ids,
                                   company=company_id).only("id")
                if len(res) < len(task_ids):
                    invalid_task_ids = tuple(
                        set(task_ids) - set(r.id for r in res))
                    raise errors.bad_request.InvalidTaskId(
                        company=company_id, ids=invalid_task_ids)

        errors_in_bulk = []
        added = 0
        chunk_size = 500
        with translate_errors_context(), TimingContext("es",
                                                       "events_add_batch"):
            # TODO: replace it with helpers.parallel_bulk in the future once the parallel pool leak is fixed
            with closing(
                    helpers.streaming_bulk(
                        self.es,
                        actions,
                        chunk_size=chunk_size,
                        # thread_count=8,
                        refresh=True,
                    )) as it:
                for success, info in it:
                    if success:
                        added += chunk_size
                    else:
                        errors_in_bulk.append(info)

            remaining_tasks = set()
            now = datetime.utcnow()
            for task_id in task_ids:
                # Update related tasks. For reasons of performance, we prefer to update all of them and not only those
                #  who's events were successful

                updated = self._update_task(
                    company_id=company_id,
                    task_id=task_id,
                    now=now,
                    iter=task_iteration.get(task_id),
                    last_events=task_last_events.get(task_id),
                )

                if not updated:
                    remaining_tasks.add(task_id)
                    continue

            if remaining_tasks:
                TaskBLL.set_last_update(remaining_tasks,
                                        company_id,
                                        last_update=now)

        # Compensate for always adding chunk_size on success (last chunk is probably smaller)
        added = min(added, len(actions))

        return added, errors_in_bulk

Пример #49

0

Показать файл

 def deanonymize(self, G,target,path,amt,dl):
     pq = PriorityQueue()
     delays = {}
     costs = {}
     paths = nd.nested_dict()
     paths1 = nd.nested_dict()
     dists = {}
     visited = set()
     previous = {}
     done = {}
     prob = {}
     sources = []
     pre = path[0]
     adv = path[1]
     # nxt = path[2]
     for node in G.nodes():
         previous[node] = -1
         delays[node] = -1
         costs[node] = inf
         paths[node] = []
         dists[node] = inf
         done[node] = 0
         paths1[node] = []
         prob[node] = 1
     dists[target] = 0
     paths[target] = [target]
     costs[target] = amt
     delays[target] = dl
     pq.put((dists[target],target))
     flag1 = 0
     flag2 = 0
     while(0!=pq.qsize()):
         curr_cost,curr = pq.get()
         if curr_cost > dists[curr]:
             continue
         visited.add(curr)
         for [v,curr] in G.in_edges(curr):
             if (G.edges[v, curr]["Balance"] + G.edges[curr, v]["Balance"] >= costs[curr]) and v not in visited:
                 if done[v] == 0:
                     paths1[v] = [v]+paths[curr]
                     done[v] = 1
                 cost = dists[curr] + self.cost_function(G,costs[curr],curr,v)
                 if cost < dists[v]:
                     paths[v] = [v]+paths[curr]
                     dists[v] = cost
                     delays[v] = delays[curr] + G.edges[v,curr]["Delay"]
                     costs[v] = costs[curr] + G.edges[v, curr]["BaseFee"] + costs[curr] * G.edges[v, curr]["FeeRate"]
                     pq.put((dists[v],v))
         if(curr in path[1:]):
             ind = path.index(curr)
             if(paths[curr]!=path[ind:]):
                 return None
             if curr == adv:
                 flag1 = 1
         if(curr == pre):
             if paths[pre] != path:
                 return [pre]
             else:
                 sources.append(pre)
             flag2 = 1
         if flag1 == 1 and flag2 == 1:
             if pre in paths[curr]:
                 for [v,curr] in G.in_edges(curr):
                         if v not in paths[curr]:
                             sources.append(v)
     sources = set(sources)
     return sources

Пример #50

0

Показать файл

Файл: analysis.py Проект: INM-6/multi-area-model

    def create_synchrony(self, **keywords):
        """
        Calculate synchrony as the coefficient of variation of the population rate
        and store in member synchrony. Uses helper function synchrony.
        If the synchrony has previously been stored with the
        same parameters, they are loaded from file.


        Parameters
        ----------
        t_min : float, optional
            Minimal time in ms of the simulation to take into account
            for the calculation. Defaults to 500 ms.
        t_max : float, optional
            Maximal time in ms of the simulation to take into account
            for the calculation. Defaults to the simulation time.
        areas : list, optional
            Which areas to include in the calculcation.
            Defaults to all loaded areas.
        pops : list or {'complete'}, optional
            Which populations to include in the calculation.
            If set to 'complete', all populations the respective areas
            are included. Defaults to 'complete'.
        resolution : float, optional
            Resolution of the population rate. Defaults to 1 ms.
        """

        default_dict = {
            'areas': self.areas_loaded,
            'pops': 'complete',
            'resolution': 1.0
        }
        params = ah._create_parameter_dict(default_dict, self.T, **keywords)
        iterator = ah.model_iter(mode='single',
                                 areas=params['areas'],
                                 pops=params['pops'])
        # Check if synchrony values have been stored with the same parameters
        self.synchrony = ah._check_stored_data(
            os.path.join(self.output_dir, 'synchrony.json'), copy(iterator),
            params)

        if self.synchrony is None:
            print("Computing synchrony")
            d = nested_dict()
            d['Parameters'] = params
            for area, pop in iterator:
                if pop in self.network.structure[area]:
                    d[area][pop] = ah.synchrony(
                        self.spike_data[area][pop],
                        self.network.N[area][pop],
                        params['t_min'],
                        params['t_max'],
                        resolution=params['resolution'])
                else:
                    d[area][pop] = np.nan

            for area in params['areas']:
                total_spikes = ah.area_spike_train(self.spike_data[area])
                d[area]['total'] = ah.synchrony(
                    total_spikes,
                    self.network.N[area]['total'],
                    params['t_min'],
                    params['t_max'],
                    resolution=params['resolution'])
            self.synchrony = d.to_dict()

Пример #51

0

Показать файл

Файл: utils.py Проект: hanfeijp/wide-residual-networks

def flatten_stats(stats):
    return OrderedDict(('.'.join(k), v)
                       for k, v in nested_dict(stats).iteritems_flat())

Пример #52

0

Показать файл

Файл: analysis.py Проект: INM-6/multi-area-model

    def create_pop_rates(self, **keywords):
        """
        Calculate time-averaged population rates and store them in member pop_rates.
        If the rates had previously been stored with the same
        parameters, they are loaded from file.

        Parameters
        ----------
        t_min : float, optional
            Minimal time in ms of the simulation to take into account
            for the calculation. Defaults to 500 ms.
        t_max : float, optional
            Maximal time in ms of the simulation to take into account
            for the calculation. Defaults to the simulation time.
        compute_stat : bool, optional
            If set to true, the mean and variance of the population rate
            is calculated. Defaults to False.
            Caution: Setting to True slows down the computation.
        areas : list, optional
            Which areas to include in the calculcation.
            Defaults to all loaded areas.
        pops : list or {'complete'}, optional
            Which populations to include in the calculation.
            If set to 'complete', all populations the respective areas
            are included. Defaults to 'complete'.
        """
        default_dict = {
            'areas': self.areas_loaded,
            'pops': 'complete',
            'compute_stat': False
        }
        params = ah._create_parameter_dict(default_dict, self.T, **keywords)
        iterator = ah.model_iter(mode='single',
                                 areas=params['areas'],
                                 pops=params['pops'])
        # Check if population rates have been stored with the same parameters
        fp = os.path.join(self.output_dir, 'pop_rates.json')
        self.pop_rates = ah._check_stored_data(fp, copy(iterator), params)

        if self.pop_rates is None:
            print("Computing population rates")
            d = nested_dict()
            d['Parameters'] = params

            if params['compute_stat']:
                for area in params['areas']:
                    if params['pops'] == 'complete':
                        pops = self.network.structure[area]
                    else:
                        pops = params['pops']
                        total_rates = []
                        for pop in pops:
                            rate = ah.pop_rate(self.spike_data[area][pop],
                                               params['t_min'],
                                               params['t_max'],
                                               self.network.N[area][pop])
                            d[area][pop] = (rate[0], rate[1])
                            total_rates += rate[2]
                        d[area]['total'] = (np.mean(total_rates),
                                            np.std(total_rates))
            else:
                for area, pop in iterator:
                    if pop in self.network.structure[area]:
                        spikes = self.spike_data[area][pop][:, 1]
                        indices = np.where(
                            np.logical_and(spikes > params['t_min'],
                                           spikes < params['t_max']))
                        d[area][pop] = (
                            indices[0].size /
                            (self.network.N[area][pop] *
                             (params['t_max'] - params['t_min']) / 1000.0),
                            np.nan)
                    else:
                        d[area][pop] = (0., 0.)
                for area in params['areas']:
                    total_spikes = ah.area_spike_train(self.spike_data[area])
                    indices = np.where(
                        np.logical_and(total_spikes[:, 1] > params['t_min'],
                                       total_spikes[:, 1] < params['t_max']))
                    d[area]['total'] = total_spikes[:, 1][indices].size / (
                        self.network.N[area]['total'] *
                        (params['t_max'] - params['t_min']) / 1000.0)
            self.pop_rates = d.to_dict()

Пример #53

0

Показать файл

 def deanonymize(self, G, target, path, amt, dl):
     pq = PriorityQueue()
     delays = {}
     costs = {}
     paths = nd.nested_dict()
     paths1 = nd.nested_dict()
     dists = {}
     visited = set()
     previous = {}
     done = {}
     prob = {}
     sources = []
     pre = path[0]
     adv = path[1]
     nxt = path[2]
     for node in G.nodes():
         previous[node] = -1
         delays[node] = -1
         costs[node] = inf
         paths[node] = []
         dists[node] = inf
         done[node] = 0
         paths1[node] = []
         prob[node] = 1
     dists[target] = 0
     paths[target] = [target]
     costs[target] = amt
     delays[target] = dl
     pq.put((dists[target], target))
     flag1 = 0
     flag2 = 0
     while(0 != pq.qsize()):
         curr_cost, curr = pq.get()
         if curr_cost > dists[curr]:
             continue
         visited.add(curr)
         for [v, curr] in G.in_edges(curr):
             if (G.edges[v, curr]["Balance"] + G.edges[curr, v]["Balance"] >= costs[curr]) and v not in visited:
                 if done[v] == 0:
                     paths1[v] = [v]+paths[curr]
                     done[v] = 1
                 cost = dists[curr] + self.cost_function(G, costs[curr], curr, v)
                 if cost < dists[v]:
                     paths[v] = [v]+paths[curr]
                     dists[v] = cost
                     delays[v] = delays[curr] + G.edges[v, curr]["Delay"]
                     costs[v] = costs[curr] + G.edges[v, curr]["BaseFee"] + \
                         costs[curr] * G.edges[v, curr]["FeeRate"]
                     pq.put((dists[v], v))
         if(curr in path[1:]):
             ind = path.index(curr)
             """
             if(paths[curr]!=path[ind:]):
                 return None
             """
             # Check if the current optimal path could be trasformed into the suboptimal path considered if random hops were added.
             if self.is_not_possible_mod(path[ind:], paths[curr]):
                 return None
             # """
             if curr == adv:
                 flag1 = 1
         """
         if(curr == pre):
             if paths[pre] != path:
                 return [pre]
             else:
                 sources.append(pre)
             flag2 = 1
         """
         # Due to the fact that suboptimal path are now being used this assumption has been removed to avoid large amounts of false positives.
         # Also bugs where the sender chooses a suboptimal path because of low forward balance while having a faster path with a large capacity channel are avoided.
         if (curr == pre):
             sources.append(pre)
             flag2 = 1
         # """
         if flag1 == 1 and flag2 == 1:
             if pre in paths[curr]:
                 for [v, curr] in G.in_edges(curr):
                     if v not in paths[curr]:
                         sources.append(v)
     sources = set(sources)
     return sources

Пример #54

0

Показать файл

Файл: analysis.py Проект: INM-6/multi-area-model

    def create_rate_time_series(self, **keywords):
        """
        Calculate time series of population- and area-averaged firing rates.
        Uses ah.pop_rate_time_series.
        If the rates have previously been stored with the
        same parameters, they are loaded from file.


        Parameters
        ----------
        t_min : float, optional
            Minimal time in ms of the simulation to take into account
            for the calculation. Defaults to 500 ms.
        t_max : float, optional
            Maximal time in ms of the simulation to take into account
            for the calculation. Defaults to the simulation time.
        areas : list, optional
            Which areas to include in the calculcation.
            Defaults to all loaded areas.
        pops : list or {'complete'}, optional
            Which populations to include in the calculation.
            If set to 'complete', all populations the respective areas
            are included. Defaults to 'complete'.
        kernel : {'gauss_time_window', 'alpha_time_window', 'rect_time_window'}, optional
            Specifies the kernel to be convolved with the spike histogram.
            Defaults to 'binned', which corresponds to no convolution.
        resolution: float, optional
            Width of the convolution kernel. Specifically it correponds to:
            - 'binned' : bin width of the histogram
            - 'gauss_time_window' : sigma
            - 'alpha_time_window' : time constant of the alpha function
            - 'rect_time_window' : width of the moving rectangular function
        """
        default_dict = {
            'areas': self.areas_loaded,
            'pops': 'complete',
            'kernel': 'binned',
            'resolution': 1.0
        }
        params = ah._create_parameter_dict(default_dict, self.T, **keywords)

        # Check if firing rates have been stored with the same parameters
        fp = os.path.join(self.output_dir, 'rate_time_series')
        iterator_areas = ah.model_iter(mode='single',
                                       areas=params['areas'],
                                       pops=None)
        iterator_pops = ah.model_iter(mode='single',
                                      areas=params['areas'],
                                      pops=params['pops'])
        self.rate_time_series = ah._check_stored_data(fp, copy(iterator_areas),
                                                      params)
        fp = os.path.join(self.output_dir, 'rate_time_series_pops')
        self.rate_time_series_pops = ah._check_stored_data(
            fp, copy(iterator_pops), params)

        if self.rate_time_series is None:
            print('Computing rate time series')

            # calculate area-averaged firing rates
            d = nested_dict()
            d['Parameters'] = params
            # population-averaged firing rates
            d_pops = nested_dict()
            d_pops['Parameters'] = params
            for area, pop in iterator_pops:
                if pop in self.network.structure[area]:
                    time_series = ah.pop_rate_time_series(
                        self.spike_data[area][pop],
                        self.network.N[area][pop],
                        params['t_min'],
                        params['t_max'],
                        params['resolution'],
                        kernel=params['kernel'])
                else:
                    time_series = np.nan * np.ones(params['t_max'] -
                                                   params['t_min'])
                d_pops[area][pop] = time_series

                total_spikes = ah.area_spike_train(self.spike_data[area])
                time_series = ah.pop_rate_time_series(
                    total_spikes,
                    self.network.N[area]['total'],
                    params['t_min'],
                    params['t_max'],
                    params['resolution'],
                    kernel=params['kernel'])
                d[area] = time_series
            self.rate_time_series_pops = d_pops.to_dict()
            self.rate_time_series = d.to_dict()

Пример #55

0

Показать файл

def flatten(params):
    return {
        '.'.join(k): v
        for k, v in nested_dict(params).items_flat() if v is not None
    }

Пример #56

0

Показать файл

Файл: main.py Проект: RobertBeaudenon/ML-NaiveBaysClassifier

        return result

    if baseline_output == "baseline-result.txt":
        result = get_y_true_y_prediction(title_score_dict)
        result.append(len(words_dict))
        generate_vocab_file(words_dict)
        generate_remove_word_file()
        return result


# Specify filename
file_name = "hns_2018_2019.csv"

# initialize dictionary of performance when removing top x%,
# Key: words left in vocab Value: precision, recall, accuracy, f-mesure
performance_in_percent_dict = nd.nested_dict()

# initialize dictionary of performance when removing words with x frequency ,
# Key: words left in vocab Value: precision, recall, accuracy, f-mesure
performance_dict = nd.nested_dict()

# Part 1 & Part 2
stop_words = []
baseline_performance = main(file_name, stop_words, "model-2018.txt",
                            "baseline-result.txt")
performance_dict = get_performance(performance_dict, baseline_performance[1],
                                   baseline_performance[0],
                                   baseline_performance[2])
performance_in_percent_dict = get_performance(performance_in_percent_dict,
                                              baseline_performance[1],
                                              baseline_performance[0],

Пример #57

0

Показать файл

Файл: attack_mixed.py Проект: SatwikPrabhu/Attacking-Lightning-s-anonymity

def dest_reveal_new(G, adversary, delay, amount, pre, next):
    T = nd.nested_dict()
    flag1 = True
    anon_sets = nd.nested_dict()
    level = 0
    index = 0
    # Level 0 only contains the next node
    T[0]["nodes"] = [next]
    T[0]["delays"] = [delay]
    print(delay)
    T[0]["previous"] = [-1]
    T[0]["visited"] = [[pre, adversary, next]]
    T[0]["amounts"] = [amount]
    x = -1

    paths = nd.nested_dict()
    num_paths = 0
    # flag to indicate that going further would result only in invalid nodes as the delay limit is reached for all nodes in the current level
    flag = True

    while (flag):
        level += 1
        # Stop when level is greater than 3 - it takes forever otherwise
        if (level == 4):
            flag1 = False
            break
        t1 = T[level - 1]["nodes"]
        d1 = T[level - 1]["delays"]
        p1 = T[level - 1]["previous"]
        v1 = T[level - 1]["visited"]
        a1 = T[level - 1]["amounts"]
        pr1 = T[level - 1]["probs"]
        t2 = []
        d2 = []
        p2 = []
        v2 = [[]]
        a2 = []
        pr2 = []
        for i in range(0, len(t1)):
            u = t1[i]
            for [u, v] in G.out_edges(u):
                # Checks if v is not repeating in the same path, delay limit is not reached after visiting v and the capacity condition is true after deducting fees
                if (v != pre and v != adversary and v != next
                        and v not in v1[i]
                        and (d1[i] - G.edges[u, v]["Delay"]) >= 0 and
                    (G.edges[u, v]["Balance"] + G.edges[v, u]["Balance"]) >=
                    ((a1[i] - G.edges[u, v]["BaseFee"]) /
                     (1 + G.edges[u, v]["FeeRate"]))):
                    t2.append(v)
                    d2.append(d1[i] - G.edges[u, v]["Delay"])
                    p2.append(i)
                    v2.append(v1[i] + [v])
                    a2.append(((a1[i] - G.edges[u, v]["BaseFee"]) /
                               (1 + G.edges[u, v]["FeeRate"])))

        T[level]["nodes"] = t2
        #print(level,t2,d2)
        T[level]["delays"] = d2
        T[level]["previous"] = p2
        T[level]["visited"] = v2
        T[level]["amounts"] = a2
        #T[level]["probs"] = pr2
        #print(t2,d2,p2)
        print(level, len(t2))
        # Stop if the current level has 0 nodes
        if (len(t2) == 0):
            flag = False
    level = level - 1
    while (level >= 0):
        t = T[level]["nodes"]
        d = T[level]["delays"]
        p = T[level]["previous"]
        a = T[level]["amounts"]
        v = T[level]["visited"]
        #print(level)
        for i in range(0, len(t)):
            # Potential destination if delay is 0
            if (d[i] == 0):
                #construct the path found from the next node to the destination
                path = []
                level1 = level
                path.append(T[level1]["nodes"][i])
                loc = T[level1]["previous"][i]
                while (level1 > 0):
                    level1 = level1 - 1
                    path.append(T[level1]["nodes"][loc])
                    loc = T[level1]["previous"][loc]
                path.reverse()
                # Add pre and adversary to the start of the path
                path = [pre, adversary] + path
                # Double check that path is loop free
                if (len(path) == len(set(path))):
                    #print(path, level)
                    amt = a[i]
                    pot = path[len(path) - 1]
                    # For each destination find the sources that would use this subpath using either lnd,c-lightning or eclair
                    sources_lnd = deanonymize_lnd(G, pot, path, amt)
                    if sources_lnd != []:
                        print("match", pot, "lnd")
                        anon_sets[pot]["lnd"] = list(sources_lnd)
                    # Check for more fuzz values only if the anonymity sets do not match for fuzz values -1 and 1
                    fuzz = -0.8
                    sources_c = deanonymize_c(G, pot, path, amt, -1)
                    sources_c1 = deanonymize_c(G, pot, path, amt, 1)
                    if (sources_c1 != sources_c):
                        sources_c = sources_c + sources_c1
                        while fuzz <= 0.8:
                            s = deanonymize_c(G, pot, path, amt, fuzz)
                            if (s != []):
                                sources_c = sources_c + s
                            fuzz += 0.2
                    sources_c = list(set(sources_c))
                    if sources_c != []:
                        print("match", pot, "c", fuzz)
                        anon_sets[pot]["c"] = list(set(sources_c))
                    sources_ecl = deanonymize_ecl(G, pot, path, amt)

                    if sources_ecl != []:
                        print("match", pot, "ecl")
                        anon_sets[pot]["ecl"] = list(sources_ecl)
        level = level - 1
    return anon_sets, flag1

Пример #58

0

Показать файл

def ic_density(bed=None,
               cut_num_ls=None,
               savefn=None,
               split_overlap_ratio_min=0.5,
               sample='cell1_cell4'):
    if bed is None:
        bed = '/Share/home/zhangqf7/gongjing/zebrafish/result/dynamic_merge_region/005_005_new/abs/%s/window-anno.bed' % (
            sample)
    if cut_num_ls is None:
        cut_num_ls = [20, 200, 80]
    if savefn is None:
        savefn = '/Share/home/zhangqf7/gongjing/zebrafish/result/icshape_signal_mean/sample_%s_dynamic_density.txt' % (
            sample)

    trans_dict = loadTransGtfBed2(
        '/Share/home/zhangqf7/gongjing/zebrafish/data/reference/transcriptome/danRer10.refSeq.transcriptome.trans.bed2'
    )

    bed_region_dict = nested_dict(1, list)
    with open(bed, 'r') as BED:
        for line in BED:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            arr = line.split('\t')
            if [int(arr[1]), int(arr[2])] not in bed_region_dict[arr[0]]:
                # if [arr[1], arr[2]] not in bed_region_dict[arr[0]]:  # [arr[1], arr[2]] is str, while [int(arr[1]), int(arr[2])] is int
                bed_region_dict[arr[0]].append([int(arr[1]), int(arr[2])])
    print bed_region_dict['NM_205538']

    anno = bed.replace('.bed', '.element.txt')
    ANNO = open(anno, 'w')

    fa_dict = read_fa()
    seq_savefn = savefn.replace('.txt', '.seq.txt')
    SEQ = open(seq_savefn, 'w')

    with open(savefn, 'w') as SAVEFN:
        for tx, j in bed_region_dict.items():
            if not trans_dict.has_key(tx):
                continue
            tx_element_count = [0] * sum(cut_num_ls)
            utr_5_start, utr_5_end, cds_start, cds_end, utr_3_start, utr_3_end = [
                int(trans_dict[tx][g]) for g in [
                    'utr_5_start', 'utr_5_end', 'cds_start', 'cds_end',
                    'utr_3_start', 'utr_3_end'
                ]
            ]
            if utr_5_end < cut_num_ls[0]:
                continue
            if cds_end - cds_start + 1 < cut_num_ls[1]:
                continue
            if utr_3_end - utr_3_start + 1 < cut_num_ls[2]:
                continue
            utr_5_split = list_split_equal(xrange(utr_5_start, utr_5_end + 1),
                                           cut_num_ls[0])
            cds_split = list_split_equal(xrange(cds_start, cds_end + 1),
                                         cut_num_ls[1])
            utr_3_split = list_split_equal(xrange(utr_3_start, utr_3_end + 1),
                                           cut_num_ls[2])

            all_split = utr_5_split + cds_split + utr_3_split  # 1-based

            for (j_start, j_end) in j:
                for n, (split_start, split_end) in enumerate(all_split):
                    if max(j_start, split_start) < min(j_end, split_end):
                        overlap_len = min(j_end, split_end) - max(
                            j_start, split_start) + 1
                        split_overlap_ratio = overlap_len / float(split_end -
                                                                  split_start +
                                                                  1)
                        if split_overlap_ratio >= split_overlap_ratio_min:
                            tx_element_count[n] += 1
                            print >> ANNO, '\t'.join(
                                map(str, [
                                    tx, j_start, j_end, split_start, split_end,
                                    n, overlap_len, split_overlap_ratio
                                ]))

            print >> SAVEFN, '\t'.join(map(str, [tx] + tx_element_count))

            tx_a_content_cout = [0] * sum(cut_num_ls)
            tx_t_content_cout = [0] * sum(cut_num_ls)
            tx_c_content_cout = [0] * sum(cut_num_ls)
            tx_g_content_cout = [0] * sum(cut_num_ls)
            for n, (split_start, split_end) in enumerate(all_split):
                a = fa_dict[tx][split_start -
                                1:split_end].upper().count('A') / float(
                                    len(fa_dict[tx][split_start -
                                                    1:split_end].upper()))
                t = fa_dict[tx][split_start -
                                1:split_end].upper().count('T') / float(
                                    len(fa_dict[tx][split_start -
                                                    1:split_end].upper()))
                c = fa_dict[tx][split_start -
                                1:split_end].upper().count('C') / float(
                                    len(fa_dict[tx][split_start -
                                                    1:split_end].upper()))
                g = fa_dict[tx][split_start -
                                1:split_end].upper().count('G') / float(
                                    len(fa_dict[tx][split_start -
                                                    1:split_end].upper()))
                tx_a_content_cout[n] = a
                tx_t_content_cout[n] = t
                tx_c_content_cout[n] = c
                tx_g_content_cout[n] = g
            print >> SEQ, '\t'.join(map(str, [tx, 'A'] + tx_a_content_cout))
            print >> SEQ, '\t'.join(map(str, [tx, 'T'] + tx_t_content_cout))
            print >> SEQ, '\t'.join(map(str, [tx, 'C'] + tx_c_content_cout))
            print >> SEQ, '\t'.join(map(str, [tx, 'G'] + tx_g_content_cout))

    ANNO.close()
    SEQ.close()

    return savefn

Пример #59

0

Показать файл

Файл: utils.py Проект: szagoruyko/wide-residual-networks

def flatten(params):
    return {'.'.join(k): v for k, v in nested_dict(params).items_flat() if v is not None}

Пример #60

0

Показать файл

Файл: cifar.py Проект: mlzxy/attention-transfer

def resnet(depth, width, num_classes):
    assert (depth - 4) % 6 == 0, 'depth should be 6n+4'
    n = (depth - 4) // 6
    widths = torch.Tensor([16, 32, 64]).mul(width).int()

    def gen_block_params(ni, no):
        return {
                'conv0': conv_params(ni, no, 3),
                'conv1': conv_params(no, no, 3),
                'bn0': bnparams(ni),
                'bn1': bnparams(no),
                'convdim': conv_params(ni, no, 1) if ni != no else None,
                }

    def gen_group_params(ni, no, count):
        return {'block%d'%i: gen_block_params(ni if i==0 else no, no)
                for i in range(count)}

    def gen_group_stats(ni, no, count):
        return {'block%d'%i: {'bn0': bnstats(ni if i==0 else no), 'bn1': bnstats(no)}
                for i in range(count)}

    params = nested_dict({
            'conv0': conv_params(3,16,3),
            'group0': gen_group_params(16, widths[0], n),
            'group1': gen_group_params(widths[0], widths[1], n),
            'group2': gen_group_params(widths[1], widths[2], n),
            'bn': bnparams(widths[2]),
            'fc': linear_params(widths[2], num_classes),
            })

    stats = nested_dict({
            'group0': gen_group_stats(16, widths[0], n),
            'group1': gen_group_stats(widths[0], widths[1], n),
            'group2': gen_group_stats(widths[1], widths[2], n),
            'bn': bnstats(widths[2]),
            })

    flat_params = OrderedDict()
    flat_stats = OrderedDict()
    for keys,v in params.iteritems_flat():
        if v is not None:
            flat_params['.'.join(keys)] = Variable(v, requires_grad=True)
    for keys,v in stats.iteritems_flat():
        flat_stats['.'.join(keys)] = v

    def activation(x, params, stats, base, mode):
        return F.relu(F.batch_norm(x, weight = params[base+'.weight'],
                                   bias = params[base+'.bias'],
                                   running_mean = stats[base+'.running_mean'],
                                   running_var = stats[base+'.running_var'],
                                   training = mode, momentum = 0.1, eps = 1e-5))

    def block(x, params, stats, base, mode, stride):
        o1 = activation(x, params, stats, base+'.bn0', mode)
        y = F.conv2d(o1, params[base+'.conv0'], stride=stride, padding=1)
        o2 = activation(y, params, stats, base+'.bn1', mode)
        z = F.conv2d(o2, params[base+'.conv1'], stride=1, padding=1)
        if base + '.convdim' in params:
            return z + F.conv2d(o1, params[base+'.convdim'], stride=stride)
        else:
            return z + x

    def group(o, params, stats, base, mode, stride):
        for i in range(n):
            o = block(o, params, stats, '%s.block%d'%(base,i), mode, stride if i==0 else 1)
        return o

    def f(input, params, stats, mode, prefix=''):
        x = F.conv2d(input, params[prefix+'conv0'], padding=1)
        g0 = group(x, params, stats, prefix+'group0', mode, 1)
        g1 = group(g0, params, stats, prefix+'group1', mode, 2)
        g2 = group(g1, params, stats, prefix+'group2', mode, 2)
        o = activation(g2, params, stats, prefix+'bn', mode)
        o = F.avg_pool2d(o, 8, 1, 0)
        o = o.view(o.size(0), -1)
        o = F.linear(o, params[prefix+'fc.weight'], params[prefix+'fc.bias'])
        return o, [g0, g1, g2]

    return f, flat_params, flat_stats

Python nested_dict примеры использования