def call_on_real_families(paths, splitword): #if not path: #path = "D:\Gal\MultiCrisper\Eilon familis\gray one\Solyc05g009500.2.1.txt-format.holeGenomeWithExtars-s2pN\_sites.txt" genes_sg_dict = {} sg_genes_dict = {} sgNames = [] sgList = [] for p in paths: f = open(p) res = [] for line in f: if line[0] != ">": res += [line[:-4]] f.close() gene_name = p.split(".txt")[0] gene_name = gene_name.split(splitword)[1] genes_sg_dict[gene_name] = res for sg in res: if sg in sg_genes_dict: sg_genes_dict[sg] = sg_genes_dict[sg] + [gene_name] else: sg_genes_dict[sg] = [gene_name] if sg not in sgNames: sgNames.append(sg) sgList.append(sg) print(bottemsUpAlgorithm.call_it_all(sgList, sgNames, sg_genes_dict))
def call_it_all_wighted(genesList, genesNames, input_sg_genes_dict, input_genes_sg_dict, Omega, protodist_outfile, pylip_temps_path): upgmaTree, distance_matrix = return_UPGMA( genesList, genesNames, protodist_outfile, pylip_temps_path) #to uncomment when using wighted bottemsUpAlgorithm.fill_leaves_sets( upgmaTree ) # as apposed to the intermediate algorithem, here leaves are genes fill_sg_genes_dict(input_sg_genes_dict) fill_genes_sg_dict(input_genes_sg_dict) #making the sgList for Algorithm B: sgList = list(input_sg_genes_dict.keys()) sgNames = copy.deepcopy(sgList) best_permutations_DS = bottemsUpAlgorithm.call_it_all( sgList, sgNames, input_sg_genes_dict, Omega, df_targets ) ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current best_permutations_DS.sort( key=lambda item: len(item[3]), reverse=True) # or (len(item[2]) and item[1])) . sort for the print res = find_w_set_cover( best_permutations_DS, distance_matrix) ##if the output of the intermadiante is wanted return res
def bottem_up(node, current_sg_genes_dict, current_genes_sg_dict, sgList, sgNames, Omega): '''caling the buttoms up algorithem with a sg genes dict sutable for the subtree''' if node.colour == 'b': return ##making the genes_sg dict for this subtree and the sg_genes_dict to send to the intermadiate algorithm if not (current_sg_genes_dict): current_sg_genes_dict = {} if not (current_genes_sg_dict): current_genes_sg_dict = {} if not (sgList): sgList = [] if not (sgNames): sgNames = [] for leaf in node.leaves_DS: ##leaf here is a gene. taking only the relevant genes current_genes_sg_dict[leaf.name] = genes_sg_dict[leaf.name] ##filling the sg genes dict for sg in current_genes_sg_dict[leaf.name]: current_sg_genes_dict[sg] = sg_genes_dict[ sg] ##the checking if this sg is already in the dict just be more expensive overall if sg not in sgList: sgList.append(sg) sgNames.append(sg) ##second, find the key sequence## current_res = None if len(current_genes_sg_dict) < 2: #only one gene current_best_perm, lowest_of_widest = bottemsUpAlgorithm.find_best_sg_for_single_gene( leaf.name, sgList) #lowest_of_widest is not in use in this function else: #get the set cover from the bottem up algorithm current_res = bottemsUpAlgorithm.call_it_all( sgList, sgNames, current_sg_genes_dict, Omega ) ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current_genes_sg_dict, Omega) #current best perm is a tuple with the perm and metedata of this perm current_best_perm = current_res[ 0] #the best sg at the current set cover if current_res == None: return global best_permutations_DS if (current_res): #remove unnided candidates from the current_res remove_unrelevant_candidates(current_res) #continue best_permutations_DS += current_res ##if there is set cover also at the bottemsUpAlgorithm else: best_permutations_DS += [current_best_perm] node.set_colour('b') ##continue up## if (node.parent) and not (stopping_condition(current_best_perm)): bottem_up(node.parent, current_sg_genes_dict, current_genes_sg_dict, sgList, sgNames, Omega) ##this line is adopted to the rapper algorithm
def call_using_CasSites(dirp, Omega=0.11, min_length=20, max_length=20, start_with_G=False, on_redundant=False, redundant_genes=[]): '''in dirp will be a list files. In each there will be a sequences in FASTA format''' #genes_list = [] genes_sg_dict = {} sg_genes_dict = {} sgNames = [] sgList = [] for p in os.listdir(dirp): #if p[-11:-1]== "RNAfile.tx": ##g gene file if "RNAfile.tx" in p: gene_name = p.split(".txt")[0] #print(gene_name) if (on_redundant): if gene_name not in redundant_genes: continue f = open(dirp + "\\" + p) next(f) gene = f.read() gene.replace('/n', '') #oledr version: #for line in f: #only 1 line left #genes_list.append(gene_name,line) # gene = line.rstrip() f.close() genes_sg_dict[gene_name] = CasSites.get_sites( gene, min_length, max_length, start_with_G) for sg in genes_sg_dict[gene_name]: if sg in sg_genes_dict: sg_genes_dict[sg] = sg_genes_dict[sg] + [gene_name] else: sg_genes_dict[sg] = [gene_name] if sg not in sgNames: sgNames.append(sg) sgList.append(sg) return (bottemsUpAlgorithm.call_it_all(sgList, sgNames, sg_genes_dict, Omega))
def call_it_all(genesList, genesNames, input_sg_genes_dict, input_genes_sg_dict, Omega, protodist_outfile, pylip_temps_path, df_targets, cfd_dict=None, PS_number=12): fill_sg_genes_dict(input_sg_genes_dict) fill_genes_sg_dict(input_genes_sg_dict) sgList = list(input_sg_genes_dict.keys()) sgNames = copy.deepcopy(sgList) best_permutations_DS = bottemsUpAlgorithm.call_it_all( sgList, sgNames, input_sg_genes_dict, Omega, df_targets, cfd_dict, PS_number ) ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current best_permutations_DS.sort( key=lambda item: item.cut_expectation, reverse=True) # or (len(item[2]) and item[1])) . sort for the print return best_permutations_DS
def E_top_down( res, node, Omega, sg_genes_dict, df_targets, internal_node_candidates=10, cfd_dict=None, PS_number=12 ): #(res, node, current_sg_genes_dict, current_genes_sg_dict, sgList, sgNames, Omega, df_targets) ''' :param node: :param current_genes_sg_dict: :param Omega: can be removed already?? :return: ''' #if len(node.polymorphic_sites_set) < 11: #change to 12! ##making the genes_sg dict for this subtree and the sg_genes_dict to send to the intermadiate algorithm current_sg_genes_dict = dict() current_genes_sg_dict = dict() sgList = list() sgNames = list() for leaf in node.node_targets_DS: ##leaf here is a gene. taking only the relevant genes current_genes_sg_dict[leaf.name] = genes_sg_dict[leaf.name] ##filling the sg genes dict for sg in current_genes_sg_dict[leaf.name]: #current_sg_genes_dict[sg] = sg_genes_dict[sg] ###here is the abnormality!! ##the checking if this sg is already in the dict just be more expensive overall #will the folowing be clearer? #untab the folowing 3 lines: if sg in current_sg_genes_dict: current_sg_genes_dict[sg] = current_sg_genes_dict[sg] + [ leaf.name ] else: current_sg_genes_dict[sg] = [ leaf.name ] ###here is the abnormality!! ##the checking if this sg is already in the dict just be more expensive overall if sg not in sgList: sgList.append(sg) sgNames.append(sg) if len(node.node_targets_DS) < 11 and len( node.node_targets_DS ) > 1: #should change the first parameter to at least 10 best_permutations_DS = bottemsUpAlgorithm.call_it_all( sgList, sgNames, current_sg_genes_dict, Omega, df_targets, cfd_dict, PS_number ) ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current) if not (best_permutations_DS): return best_permutations_DS.sort(key=lambda item: item.cut_expectation, reverse=True) current_best_perm = best_permutations_DS[: internal_node_candidates] #the best sg at the current set cover res.append( Subgroup_res(get_genes_list(best_permutations_DS), current_best_perm, node.name)) #else: if not node.clades: return if node.clades[0]: E_top_down(res, node.clades[0], Omega, sg_genes_dict, df_targets, internal_node_candidates, cfd_dict, PS_number) if node.clades[1]: E_top_down(res, node.clades[1], Omega, sg_genes_dict, df_targets, internal_node_candidates, cfd_dict, PS_number)