def test_gds_info(self):
        gds_info = GDSInfo()
        self.assertIsNotNone(gds_info)
        self.assertGreater(len(gds_info.keys()), 0)
        self.assertGreater(len(gds_info.items()), 0)
        self.assertGreater(len(gds_info.values()), 0)

        self.assertIsNotNone(gds_info[self.test_sample])
        self.assertEqual(gds_info[self.test_sample]['genes'], 9561)
        self.assertEqual(int(gds_info[self.test_sample]['sample_count']), 4)
        self.assertEqual(len(gds_info[self.test_sample]['subsets']), 2)
示例#2
0
def valid(info, n=40):
    """Return a set of subset types containing more than n samples in every subset"""
    invalid = set()
    subsets = set([sinfo["type"] for sinfo in info["subsets"]])
    for sampleinfo in info["subsets"]:
        if len(sampleinfo["sample_id"]) < n:
            invalid.add(sampleinfo["type"])
    return subsets.difference(invalid)


def report(stypes, info):
    """Pretty-print GDS and valid susbset types"""
    for id, sts in stypes:
        print(id)
        for st in sts:
            gds = info[id]
            print("  %s:" % st + ", ".join([
                "%s/%d" % (sinfo["description"], len(sinfo["sample_id"]))
                for sinfo in gds["subsets"] if sinfo["type"] == st
            ]))


gdsinfo = GDSInfo()
valid_subset_types = [(id, valid(info)) for id, info in sorted(gdsinfo.items())
                      if valid(info)]
report(valid_subset_types, gdsinfo)

print('datasets = ' + str(len(valid_subset_types)))
print('type subsets = ' + str(sum(len(b) for _, b in valid_subset_types)))