示例#1
0
def kallisto_table(kallisto_dir, index):
    """
    convert kallisto output to a count table where the rows are
    equivalence classes and the columns are cells
    """
    quant_dir = os.path.join(kallisto_dir, "quant")
    out_file = os.path.join(quant_dir, "matrix.csv")
    if file_exists(out_file):
        return out_file
    tsvfile = os.path.join(quant_dir, "matrix.tsv")
    ecfile = os.path.join(quant_dir, "matrix.ec")
    cellsfile = os.path.join(quant_dir, "matrix.cells")
    fastafile = os.path.splitext(index)[0] + ".fa"
    fasta_names = fasta.sequence_names(fastafile)
    ec_names = get_ec_names(ecfile, fasta_names)
    df = pd.read_csv(tsvfile,
                     header=None,
                     names=["ec", "cell", "count"],
                     sep="\t")
    df["ec"] = [ec_names[x] for x in df["ec"]]
    df = df.pivot(index='ec', columns='cell', values='count')
    cellnames = get_cell_names(cellsfile)
    colnames = [cellnames[x] for x in df.columns]
    df.columns = colnames
    df.to_csv(out_file)
    return out_file
示例#2
0
def kallisto_table(kallisto_dir, index):
    """
    convert kallisto output to a count table where the rows are
    equivalence classes and the columns are cells
    """
    quant_dir = os.path.join(kallisto_dir, "quant")
    out_file = os.path.join(quant_dir, "matrix.csv")
    if file_exists(out_file):
        return out_file
    tsvfile = os.path.join(quant_dir, "matrix.tsv")
    ecfile = os.path.join(quant_dir, "matrix.ec")
    cellsfile = os.path.join(quant_dir, "matrix.cells")
    fastafile = os.path.splitext(index)[0] + ".fa"
    fasta_names = fasta.sequence_names(fastafile)
    ec_names = get_ec_names(ecfile, fasta_names)
    df = pd.read_table(tsvfile, header=None, names=["ec", "cell", "count"])
    df["ec"] = [ec_names[x] for x in df["ec"]]
    df = df.pivot(index='ec', columns='cell', values='count')
    cellnames = get_cell_names(cellsfile)
    colnames = [cellnames[x] for x in df.columns]
    df.columns = colnames
    df.to_csv(out_file)
    return out_file