def load_previous(agp_file, nodes): ''' loads info from previous AGP''' # create node lookup. lookup = create_lookup(nodes) # load the agp array. agp_edges = load_agps(agp_file) # ensure sorted by scaffname and scafidx. agp_edges.sort(order=['scaf_name','scaf_idx']) # build list of component offsets. orien = dict() offsets = dict() for i in range(agp_edges.size): # skip non contigs. if agp_edges[i]['comp_type'] != "W": continue # save orientation. orien[lookup[agp_edges[i]['comp_name']]] = agp_edges[i]['comp_orien'] # record index. if agp_edges[i]['scaf_name'] not in offsets: offsets[agp_edges[i]['scaf_name']] = list() offsets[agp_edges[i]['scaf_name']].append(i) # add bundle info to this. gaps = dict() active = set() for key in offsets: # loop over edges. for i in range(len(offsets[key]) - 1): # get AGP edge. ea = agp_edges[offsets[key][i]] eb = agp_edges[offsets[key][i+1]] # get index. idxa = lookup[ea['comp_name']] idxb = lookup[eb['comp_name']] # get gap. gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop'] # note its active. active.add((idxa,idxb)) # return gaps, active set and node set. return gaps
def call_agp_gaps(agp_file, nodes): ''' calls agp gaps''' # create node lookup. lookup = create_lookup(nodes) # load the agp array. agp_edges = load_agps(agp_file) # ensure sorted by scaffname and scafidx. agp_edges.sort(order=['scaf_name','scaf_idx']) # build list of component offsets. offsets = dict() for i in range(agp_edges.size): # skip non contigs. if agp_edges[i]['comp_type'] != "W": continue # record index. if agp_edges[i]['scaf_name'] not in offsets: offsets[agp_edges[i]['scaf_name']] = list() offsets[agp_edges[i]['scaf_name']].append(i) # add bundle info to this. gaps = dict() for key in offsets: # loop over edges. for i in range(len(offsets[key]) - 1): # get AGP edge. ea = agp_edges[offsets[key][i]] eb = agp_edges[offsets[key][i+1]] # get index. idxa = lookup[ea['comp_name']] idxb = lookup[eb['comp_name']] # get gap. gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop'] return gaps
def make_key(a, b): ''' makes sorted key''' if a < b: return (a,b) else: return (b,a) ########### script ################## # load hdf5 information. logging.info("loading data arrays") nodes = load_nodes(input_nodes_file) edges = load_edges(input_edges_file) agps = load_agps(input_agp_file) nlookup = create_lookup(nodes) # build bundle count. logging.info("counting bundles") blookup = dict() for i in range(edges.size): # get id idxa = edges[i]['ctg_a_idx'] idxb = edges[i]['ctg_b_idx'] key = make_key(idxa, idxb) # count it. if key not in blookup: blookup[key] = 0 blookup[key] += 1