示例#1
0
 def preprocessing(fileobj: gzip.GzipFile) -> None:
     """Remove headers and columns to prepare TSV files in mtx directory to make them
     compatible for use with ScanPy methods."""
     f = pd.read_table(fileobj, sep='\t')  # Pandas dataframe
     if fileobj.name == 'genes.tsv.gz':
         col_to_keep = ['featurekey', 'featurename']
         assert col_to_keep[0] in f.columns
         assert col_to_keep[1] in f.columns
     elif fileobj.name == 'cells.tsv.gz':
         fileobj.name = 'barcodes.tsv.gz'
         col_to_keep = 'cellkey'
         assert col_to_keep in f.columns
     else:
         raise ValueError(
             'Expected genes.tsv.gz and cells.tsv.gz in directory.')
     f_new = f[col_to_keep]
     # Write to file without column or row headers.
     f_new.to_csv(first(os.path.splitext(fileobj.name)),
                  index=False,
                  header=False,
                  sep='\t')