def formatAttributeForExport(attribute_db,filename): import IdentifyAltIsoforms export_db={} for (gene,probeset) in attribute_db: attribute_list = attribute_db[(gene,probeset)]; attribute_list2=[] for (attribute,direction) in attribute_list: attribute_list2.append(attribute+'|'+direction) export_db[probeset]=attribute_list2 print 'Exporting:',filename IdentifyAltIsoforms.exportSimple(export_db,filename,'')
def reAnalyzeRNAProbesetMatches(align_files,species,array_type,pairwise_probeset_combinations): """Import matching and non-matching probesets and export the valid comparisons""" align_files2=[] for file in align_files: if array_type in file: align_files2.append(file) align_files = align_files2 matching={}; not_matching={} for filename in align_files: print 'Reading',filename start_time = time.time() fn=filepath(filename) for line in open(fn,'rU').xreadlines(): values = string.replace(line,'\n','') probeset,call,accession = string.split(values,'\t') if call == '1': try: matching[probeset].append(accession) except KeyError: matching[probeset] = [accession] else: try: not_matching[probeset].append(accession) except KeyError: not_matching[probeset] = [accession] probeset_matching_pairs={}; matching_in_both=0; match_and_null=0; no_matches=0; no_nulls=0 for (probeset1,probeset2) in pairwise_probeset_combinations: if probeset1 in matching and probeset2 in matching: matching[probeset1].sort(); matching[probeset2].sort() match1 = string.join(matching[probeset1],'|') match2 = string.join(matching[probeset2],'|') if match1 != match2: probeset_matching_pairs[probeset1+'|'+probeset2] = [match1,match2] matching_in_both+=1 else: if probeset1 in matching and probeset1 in not_matching: match = string.join(matching[probeset1],'|') null_match = string.join(filterNullMatch(not_matching[probeset1],matching[probeset1]),'|') probeset_matching_pairs[probeset1] = [match,null_match] match_and_null+=1 elif probeset2 in matching and probeset2 in not_matching: match = string.join(matching[probeset2],'|') null_match = string.join(filterNullMatch(not_matching[probeset2],matching[probeset2]),'|') probeset_matching_pairs[probeset2] = [match,null_match] match_and_null+=1 elif probeset1 in matching or probeset2 in matching: no_nulls+=1 else: no_matches+=1 #if no_matches<10: print probeset1,probeset2 print matching_in_both, "probeset pairs with matching isoforms for both recipricol probesets." print match_and_null, "probeset pairs with a match for one and null for that one." print no_nulls, "probeset pairs with only one match." print no_matches, "probeset pairs with no matches." import IdentifyAltIsoforms export_file = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_all-transcript-matches.txt' if analysis_type == 'single': export_file = 'AltDatabase/'+species+'/'+array_type+'/junction/'+species+'_all-transcript-matches.txt' IdentifyAltIsoforms.exportSimple(probeset_matching_pairs,export_file,'')
def formatAttributeForExport(attribute_db, filename): import IdentifyAltIsoforms export_db = {} for (gene, probeset) in attribute_db: attribute_list = attribute_db[(gene, probeset)] attribute_list2 = [] for (attribute, direction) in attribute_list: attribute_list2.append(attribute + '|' + direction) export_db[probeset] = attribute_list2 print 'Exporting:', filename IdentifyAltIsoforms.exportSimple(export_db, filename, '')
def reAnalyzeRNAProbesetMatches(align_files, species, array_type, pairwise_probeset_combinations): """Import matching and non-matching probesets and export the valid comparisons""" align_files2 = [] for file in align_files: if array_type in file: align_files2.append(file) align_files = align_files2 matching = {} not_matching = {} for filename in align_files: print 'Reading', filename start_time = time.time() fn = filepath(filename) for line in open(fn, 'rU').xreadlines(): values = string.replace(line, '\n', '') probeset, call, accession = string.split(values, '\t') if call == '1': try: matching[probeset].append(accession) except KeyError: matching[probeset] = [accession] else: try: not_matching[probeset].append(accession) except KeyError: not_matching[probeset] = [accession] probeset_matching_pairs = {} matching_in_both = 0 match_and_null = 0 no_matches = 0 no_nulls = 0 for (probeset1, probeset2) in pairwise_probeset_combinations: if probeset1 in matching and probeset2 in matching: matching[probeset1].sort() matching[probeset2].sort() match1 = string.join(matching[probeset1], '|') match2 = string.join(matching[probeset2], '|') if match1 != match2: probeset_matching_pairs[probeset1 + '|' + probeset2] = [match1, match2] matching_in_both += 1 else: if probeset1 in matching and probeset1 in not_matching: match = string.join(matching[probeset1], '|') null_match = string.join( filterNullMatch(not_matching[probeset1], matching[probeset1]), '|') probeset_matching_pairs[probeset1] = [match, null_match] match_and_null += 1 elif probeset2 in matching and probeset2 in not_matching: match = string.join(matching[probeset2], '|') null_match = string.join( filterNullMatch(not_matching[probeset2], matching[probeset2]), '|') probeset_matching_pairs[probeset2] = [match, null_match] match_and_null += 1 elif probeset1 in matching or probeset2 in matching: no_nulls += 1 else: no_matches += 1 #if no_matches<10: print probeset1,probeset2 print matching_in_both, "probeset pairs with matching isoforms for both recipricol probesets." print match_and_null, "probeset pairs with a match for one and null for that one." print no_nulls, "probeset pairs with only one match." print no_matches, "probeset pairs with no matches." import IdentifyAltIsoforms export_file = 'AltDatabase/' + species + '/' + array_type + '/' + species + '_all-transcript-matches.txt' if analysis_type == 'single': export_file = 'AltDatabase/' + species + '/' + array_type + '/junction/' + species + '_all-transcript-matches.txt' IdentifyAltIsoforms.exportSimple(probeset_matching_pairs, export_file, '')