def _parse_sample_sets(filename, sep): if not os.path.exists(filename): logging.error("Sample set file '%s' not found" % (filename)) sys.exit(1) ext = os.path.splitext(filename)[-1] if ext == '.smx': for ss in SampleSet.parse_smx(filename, sep): yield ss elif ext == '.smt': for ss in SampleSet.parse_smt(filename, sep): yield ss elif ext == '.json': for ss in SampleSet.parse_json(filename): yield ss else: logging.error('suffix not recognized (.smx, .smt, or .json)')
def test_sample_set_smx_parser(self): # generate samples samples = ['S%d' % (i) for i in range(10000)] # generate sample sets N = 100 minsize = 1 maxsize = N sample_sets = [] for i in xrange(N): sample_sets.append(generate_random_sample_set(minsize,maxsize,samples)) # write to a temp file names = ['Name'] + [ss.name for ss in sample_sets] descs = ['Desc'] + [ss.desc for ss in sample_sets] with open('tmp', 'w') as fileh: print >>fileh, '\t'.join(names) print >>fileh, '\t'.join(descs) for i in xrange(len(samples)): fields = [samples[i]] for j in xrange(len(sample_sets)): if samples[i] in sample_sets[j].value_dict: fields.append(sample_sets[j].value_dict[samples[i]]) else: fields.append('') print >>fileh, '\t'.join(map(str,fields)) fileh.close() # read into sample sets read_sample_sets = SampleSet.parse_smx('tmp') self.assertTrue(len(read_sample_sets) == N) self.assertTrue(len(read_sample_sets) == len(sample_sets)) for i in xrange(N): ss = sample_sets[i] rss = read_sample_sets[i] self.assertEqual(rss.name, ss.name) self.assertEqual(rss.desc, ss.desc) self.assertTrue(set(rss.value_dict.items()) == set(ss.value_dict.items())) a = ss.get_array(samples) b = rss.get_array(samples) self.assertTrue(np.array_equal(a, b)) os.remove('tmp')