def test_splice_site_str_to_tuple(): from outrigger.validate.check_splice_sites import splice_site_str_to_tuple test = splice_site_str_to_tuple('GT/AG,AT/AC') true = 'GT/AG', 'AT/AC' assert test == true
def execute(self): valid_splice_sites = check_splice_sites.splice_site_str_to_tuple( self.valid_splice_sites) for splice_name, splice_abbrev in common.SPLICE_TYPES: splice_name_spaces = splice_name.replace('_', ' ').title() util.progress('Finding valid splice sites in {} ({}) ' 'splice type ...'.format(splice_name_spaces, splice_abbrev.upper())) isoform_exons = common.SPLICE_TYPE_ISOFORM_EXONS[splice_abbrev] validated_folder = os.path.join(self.index_folder, splice_abbrev, 'validated') self.maybe_make_folder(validated_folder) splice_sites_seriess = [] for isoform, exons in isoform_exons.items(): valid_str = ' or '.join(valid_splice_sites) util.progress('\tFinding valid splice sites for {isoform} of' ' {splice_name} events which match ' '{valid_splice_sites}' '...'.format(isoform=isoform, splice_name=splice_name_spaces, valid_splice_sites=valid_str)) exon_pairs = zip(exons, exons[1:]) for exonA, exonB in exon_pairs: util.progress('\t\tFinding splice sites for {exonA} and ' '{exonB} ...'.format(exonA=exonA, exonB=exonB)) intron_splice_site = self.exon_pair_splice_sites( exonA, exonB, splice_abbrev) splice_sites_seriess.append(intron_splice_site) util.done(4) util.done(3) splice_sites = pd.concat(splice_sites_seriess, axis=1) csv = os.path.join(self.index_folder, splice_abbrev, 'splice_sites.csv') util.progress('\tWriting splice sites to {csv} ...'.format( csv=csv)) splice_sites.to_csv(csv) util.done(3) n_total = len(splice_sites.groupby(level=0, axis=0)) splice_sites_is_valid = splice_sites.isin(valid_splice_sites) valid_events_rows = splice_sites_is_valid.all(axis=1) splice_sites_validated = splice_sites.loc[valid_events_rows] n_valid = len(splice_sites_validated.groupby(level=0, axis=0)) util.progress("\tValidated {valid}/{total} {splice_name} " "({splice_abbrev}) events. " "".format(valid=n_valid, total=n_total, splice_name=splice_name_spaces, splice_abbrev=splice_abbrev.upper())) original_events_csv = os.path.join(self.input_index, splice_abbrev, EVENTS_CSV) validated_events_csv = os.path.join(validated_folder, EVENTS_CSV) util.progress('\tWriting validated events to {csv} ...'.format( csv=validated_events_csv)) with open(validated_events_csv, 'w') as f_validated: with open(original_events_csv) as f_original: for i, line in enumerate(f_original): if i == 0: f_validated.write(line) continue if line.split(',')[0] in splice_sites_validated.index: f_validated.write(line) util.done(3)