def collect_star_alignment_results(input, samples): """Collect star alignment results""" df = None for (f, s) in zip(input, samples): df_tmp = pd.read_table(f, sep="|", names=["name", "value"], engine="python", skiprows=[7,22,27]) d = {trim_header(x, underscore=True, percent=True):recast(y) for (x,y) in zip(df_tmp["name"], df_tmp["value"])} if df is None: df = pd.DataFrame(d, index=[s]) else: df = df.append(pd.DataFrame(d, index=[s])) return df
def test_recast(self): """Test recasting string to float, int, date, or other""" self.assertEqual(type(recast("1234")), int) self.assertEqual(type(recast("123.45")), float) self.assertEqual(type(recast("123,45")), float) self.assertEqual(type(recast("23.45%")), float) self.assertEqual(type(recast("23,45%")), float) self.assertEqual(type(recast("Mar 23 00:24:12")), datetime.datetime)
def _collect_results(self): smllogger.info("collecting results") df = None for (f, s) in zip(self._inputfiles, self._samples): smllogger.debug("Reading input file {f} for sample {s}".format(f=f, s=s)) df_tmp = pd.read_table(f, sep="|", names=["name", "value"], engine="python", skiprows=[7, 22, 27]) d = {trim_header(x, underscore=True, percent=True): recast(y) for (x, y) in zip(df_tmp["name"], df_tmp["value"])} if df is None: df = pd.DataFrame(data=d, index=pd.Index([s], name="Sample")) else: df = df.append(pd.DataFrame(data=d, index=pd.Index([s], name="Sample"))) df['mismatch_sum'] = df['Mismatch_rate_per_base__PCT'] +\ df['Deletion_rate_per_base'] + df['Insertion_rate_per_base'] df['PCT_of_reads_unmapped'] = df['PCT_of_reads_unmapped:_other'] +\ df['PCT_of_reads_unmapped:_too_many_mismatches'] +\ df['PCT_of_reads_unmapped:_too_short'] self['align'] = df