示例#1
0
文件: spqr_flow.py 项目: jim-bo/SINAH
def load_previous(agp_file, nodes):
	''' loads info from previous AGP'''
	
	# create node lookup.
	lookup = create_lookup(nodes)
	
	# load the agp array.
	agp_edges = load_agps(agp_file)
	
	# ensure sorted by scaffname and scafidx.
	agp_edges.sort(order=['scaf_name','scaf_idx'])
	
	# build list of component offsets.
	orien = dict()
	offsets = dict()
	for i in range(agp_edges.size):

		# skip non contigs.
		if agp_edges[i]['comp_type'] != "W": continue
		
		# save orientation.
		orien[lookup[agp_edges[i]['comp_name']]] = agp_edges[i]['comp_orien']
		
		# record index.
		if agp_edges[i]['scaf_name'] not in offsets:
			offsets[agp_edges[i]['scaf_name']] = list()
		offsets[agp_edges[i]['scaf_name']].append(i)
		
	# add bundle info to this.
	gaps = dict()
	active = set()
	for key in offsets:
		
		# loop over edges.
		for i in range(len(offsets[key]) - 1):
			
			# get AGP edge.
			ea = agp_edges[offsets[key][i]]
			eb = agp_edges[offsets[key][i+1]]
			
			# get index.
			idxa = lookup[ea['comp_name']]
			idxb = lookup[eb['comp_name']]
			
			# get gap.
			gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop']
			
			# note its active.
			active.add((idxa,idxb))
			
	# return gaps, active set and node set.
	return gaps
示例#2
0
文件: spqr_flow.py 项目: jim-bo/SINAH
def call_agp_gaps(agp_file, nodes):
	''' calls agp gaps'''
	
	# create node lookup.
	lookup = create_lookup(nodes)
	
	# load the agp array.
	agp_edges = load_agps(agp_file)
	
	# ensure sorted by scaffname and scafidx.
	agp_edges.sort(order=['scaf_name','scaf_idx'])
	
	# build list of component offsets.
	offsets = dict()
	for i in range(agp_edges.size):

		# skip non contigs.
		if agp_edges[i]['comp_type'] != "W": continue
		
		# record index.
		if agp_edges[i]['scaf_name'] not in offsets:
			offsets[agp_edges[i]['scaf_name']] = list()
		offsets[agp_edges[i]['scaf_name']].append(i)
		
		# add bundle info to this.
		gaps = dict()
		for key in offsets:
			
			# loop over edges.
			for i in range(len(offsets[key]) - 1):
				
				# get AGP edge.
				ea = agp_edges[offsets[key][i]]
				eb = agp_edges[offsets[key][i+1]]
				
				# get index.
				idxa = lookup[ea['comp_name']]
				idxb = lookup[eb['comp_name']]
				
				# get gap.
				gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop']
				
	return gaps
示例#3
0
def make_key(a, b):
    ''' makes sorted key'''
    if a < b:
        return (a,b)
    else:
        return (b,a)

########### script ################## 

# load hdf5 information.
logging.info("loading data arrays")
nodes = load_nodes(input_nodes_file)
edges = load_edges(input_edges_file)
agps = load_agps(input_agp_file)

nlookup = create_lookup(nodes)

# build bundle count.
logging.info("counting bundles")
blookup = dict()
for i in range(edges.size):

    # get id
    idxa = edges[i]['ctg_a_idx']
    idxb = edges[i]['ctg_b_idx']
    key = make_key(idxa, idxb)
    
    # count it.
    if key not in blookup:
        blookup[key] = 0
    blookup[key] += 1