示例#1
0
    def base_annotation_test(self,
                             temp,
                             length,
                             dtypes,
                             ann_dtypes,
                             head=True,
                             on="cell_id"):
        """
        base test of annotate_csv
        :param temp: tempdir to test in
        :param length: length of test dfs
        :param dtypes: dtypes of test dfs
        :param ann_dtypes: dtypes of test annotation dict
        :param write_header: T/F write header post-annotation
        :param on: col to annotate on:
        """
        csv, annotation = self.make_ann_test_inputs(temp,
                                                    length,
                                                    dtypes,
                                                    ann_dtypes,
                                                    write_header=head,
                                                    on=on)

        annotated = os.path.join(temp, "annotated.csv.gz")

        csvutils.annotate_csv(csv,
                              annotation,
                              annotated,
                              ann_dtypes,
                              write_header=head,
                              on=on)

        return csv, annotation, annotated
示例#2
0
def annotate_coverage_metrics(metrics, coverage_yaml, output):
    data = {}

    for cell_id, filename in coverage_yaml.items():
        with open(filename, 'rt') as reader:
            covdata = yaml.load(reader)
            if 'cell_id' in covdata:
                assert covdata['cell_id'] == cell_id
                del covdata['cell_id']
            data[cell_id] = covdata

    csvutils.annotate_csv(metrics, data, output, dtypes()['metrics'])
示例#3
0
def add_clustering_order(
        reads, metrics, output, chromosomes=None, sample_info=None):
    """
    adds sample information to metrics in place
    """

    order = get_hierarchical_clustering_order(
        reads, chromosomes=chromosomes
    )

    if sample_info:
        for cell_id, order in order.items():
            sample_info[cell_id]['order'] = order
    else:
        sample_info = order

    csvutils.annotate_csv(metrics, sample_info, output)
示例#4
0
def add_clustering_order(
        reads, metrics, output, chromosomes=None, sample_info=None):
    """
    adds sample information to metrics in place
    """

    order = get_hierarchical_clustering_order(
        reads, chromosomes=chromosomes
    )

    if not sample_info:
        sample_info = {}

    for cell_id, order in order.items():
        if cell_id not in sample_info:
            sample_info[cell_id] = {}
        sample_info[cell_id]['order'] = order

    csvutils.annotate_csv(metrics, sample_info, output, dtypes()['metrics'])
示例#5
0
    def test_annotate_csv_annotation_col_mismatch(self, tmpdir, n_rows):
        """
        test annotating csv where annotation_data differs in length from csv
        :param tmpdir: temporary directory to write in
        :param n_rows: number of rows in test csvs
        """

        dtypes = {v: "int" for v in 'ABCD'}
        dtypes["cell_id"] = "str"
        ann_dtypes = {v: "int" for v in 'ERF'}
        annotated = os.path.join(tmpdir, "annotated.csv.gz")

        csv, annotation = self.make_ann_test_inputs(tmpdir, n_rows, dtypes,
                                                    ann_dtypes)

        annotation["new_cell"] = {"E": 1, "R": 43, "F": 2}

        csvutils.annotate_csv(csv, annotation, annotated, ann_dtypes)

        self.validate_annotation_test(csv, annotation, annotated, "cell_id")
示例#6
0
    def test_annotate_csv_annotation_col_dtype_mismatch(self, tmpdir, n_rows):
        """
        test annotating csv with inappropriate annotation_dtypes
        :param tmpdir: temporary directory to write in
        :param n_rows: number of rows in test csvs
        """

        dtypes = {v: "int" for v in 'ABCD'}
        dtypes["cell_id"] = "str"
        ann_dtypes = {v: "int" for v in 'ERF'}
        annotated = os.path.join(tmpdir, "annotated.csv.gz")

        csv, annotation = self.make_ann_test_inputs(tmpdir, n_rows, dtypes,
                                                    ann_dtypes)
        new_keys = range(len(annotation.keys()))

        annotation = {new_keys[i]: annotation[cell_id]
                      for i, cell_id in enumerate(annotation.keys())}

        csvutils.annotate_csv(csv, annotation, annotated, ann_dtypes)

        assert self.dfs_exact_match(annotated, csv)