def base_annotation_test(self, temp, length, dtypes, ann_dtypes, head=True, on="cell_id"): """ base test of annotate_csv :param temp: tempdir to test in :param length: length of test dfs :param dtypes: dtypes of test dfs :param ann_dtypes: dtypes of test annotation dict :param write_header: T/F write header post-annotation :param on: col to annotate on: """ csv, annotation = self.make_ann_test_inputs(temp, length, dtypes, ann_dtypes, write_header=head, on=on) annotated = os.path.join(temp, "annotated.csv.gz") csvutils.annotate_csv(csv, annotation, annotated, ann_dtypes, write_header=head, on=on) return csv, annotation, annotated
def annotate_coverage_metrics(metrics, coverage_yaml, output): data = {} for cell_id, filename in coverage_yaml.items(): with open(filename, 'rt') as reader: covdata = yaml.load(reader) if 'cell_id' in covdata: assert covdata['cell_id'] == cell_id del covdata['cell_id'] data[cell_id] = covdata csvutils.annotate_csv(metrics, data, output, dtypes()['metrics'])
def add_clustering_order( reads, metrics, output, chromosomes=None, sample_info=None): """ adds sample information to metrics in place """ order = get_hierarchical_clustering_order( reads, chromosomes=chromosomes ) if sample_info: for cell_id, order in order.items(): sample_info[cell_id]['order'] = order else: sample_info = order csvutils.annotate_csv(metrics, sample_info, output)
def add_clustering_order( reads, metrics, output, chromosomes=None, sample_info=None): """ adds sample information to metrics in place """ order = get_hierarchical_clustering_order( reads, chromosomes=chromosomes ) if not sample_info: sample_info = {} for cell_id, order in order.items(): if cell_id not in sample_info: sample_info[cell_id] = {} sample_info[cell_id]['order'] = order csvutils.annotate_csv(metrics, sample_info, output, dtypes()['metrics'])
def test_annotate_csv_annotation_col_mismatch(self, tmpdir, n_rows): """ test annotating csv where annotation_data differs in length from csv :param tmpdir: temporary directory to write in :param n_rows: number of rows in test csvs """ dtypes = {v: "int" for v in 'ABCD'} dtypes["cell_id"] = "str" ann_dtypes = {v: "int" for v in 'ERF'} annotated = os.path.join(tmpdir, "annotated.csv.gz") csv, annotation = self.make_ann_test_inputs(tmpdir, n_rows, dtypes, ann_dtypes) annotation["new_cell"] = {"E": 1, "R": 43, "F": 2} csvutils.annotate_csv(csv, annotation, annotated, ann_dtypes) self.validate_annotation_test(csv, annotation, annotated, "cell_id")
def test_annotate_csv_annotation_col_dtype_mismatch(self, tmpdir, n_rows): """ test annotating csv with inappropriate annotation_dtypes :param tmpdir: temporary directory to write in :param n_rows: number of rows in test csvs """ dtypes = {v: "int" for v in 'ABCD'} dtypes["cell_id"] = "str" ann_dtypes = {v: "int" for v in 'ERF'} annotated = os.path.join(tmpdir, "annotated.csv.gz") csv, annotation = self.make_ann_test_inputs(tmpdir, n_rows, dtypes, ann_dtypes) new_keys = range(len(annotation.keys())) annotation = {new_keys[i]: annotation[cell_id] for i, cell_id in enumerate(annotation.keys())} csvutils.annotate_csv(csv, annotation, annotated, ann_dtypes) assert self.dfs_exact_match(annotated, csv)