def test_speed(self): _LINE_LENGTH = 100 sym_count = 1 while sym_count < 100000: sym_count *= 10 with TempDirectory() as tmp: t1 = time.clock() for p in ('a.fa', 'b.fa'): with open(os.path.join(tmp.path, p), 'w', sym_count) as f: f.write('> seq\n') for j in xrange(sym_count): f.write(random.choice(['a', 'c', 't', 'g', '-'])) if (j + 1) % _LINE_LENGTH == 0: f.write('\n') f.write('\n') t2 = time.clock() ZipDirectory.create_original_sequences( tmp.path, os.path.join(tmp.path, 'a.fa'), os.path.join(tmp.path, 'b.fa')) t3 = time.clock() self.assertTrue(t1 < t2 < t3)
def test_create_cache(self): with TempDirectory() as temp: directory = create_ziphmm_directory(temp.path) xdir = ZipDirectory(directory) self.assertFalse(xdir.is_cached(3)) xdir.create_cache(3) self.assertTrue(xdir.is_cached(3))
def _init_alignments(options, fasta_index1, fasta_index2, group_dir): assert os.path.isdir(options.exp_folder), \ 'Directory not found: {0}'.format(options.exp_folder) assert len(options.fasta) > fasta_index1, \ 'Not enough FASTA files specified' assert len(options.fasta) > fasta_index2, \ 'Not enough FASTA files specified' root_dir = _get_ziphmm_root_dir(options, group_dir) if not os.path.isdir(root_dir): print '# Creating directory: {0}'.format(root_dir) os.mkdir(root_dir) ZipDirectory.create_original_sequences( root_dir, options.fasta[fasta_index1], options.fasta[fasta_index2], options.chunk_size, sys.stdout) ziphmm_dirs = map(ZipDirectory, glob.glob(os.path.join(root_dir, '*'))) for ziphmm_dir in ziphmm_dirs: if not ziphmm_dir.is_cached(NUM_STATES): print '# Creating {0}-state alignment in directory: {1}'.format( NUM_STATES, ziphmm_dir.path) ziphmm_dir.create_cache(NUM_STATES)
def _init_alignments(options, fasta_index1, fasta_index2, group_dir): assert os.path.isdir(options.exp_folder), \ 'Directory not found: {0}'.format(options.exp_folder) assert len(options.fasta) > fasta_index1, \ 'Not enough FASTA files specified' assert len(options.fasta) > fasta_index2, \ 'Not enough FASTA files specified' root_dir = _get_ziphmm_root_dir(options, group_dir) if not os.path.isdir(root_dir): print '# Creating directory: {0}'.format(root_dir) os.mkdir(root_dir) ZipDirectory.create_original_sequences(root_dir, options.fasta[fasta_index1], options.fasta[fasta_index2], options.chunk_size, sys.stdout) ziphmm_dirs = map(ZipDirectory, glob.glob(os.path.join(root_dir, '*'))) for ziphmm_dir in ziphmm_dirs: if not ziphmm_dir.is_cached(NUM_STATES): print '# Creating {0}-state alignment in directory: {1}'.format( NUM_STATES, ziphmm_dir.path) ziphmm_dir.create_cache(NUM_STATES)
def test_clear_cache(self): with TempDirectory() as temp: directory = create_ziphmm_directory(temp.path) xdir = ZipDirectory(directory) self.assertTrue(os.path.isfile( os.path.join(directory, 'data_structure'))) xdir.clear_cache() self.assertFalse(os.path.isfile( os.path.join(directory, 'data_structure')))
def test_clear_cache(self): with TempDirectory() as temp: directory = create_ziphmm_directory(temp.path) xdir = ZipDirectory(directory) self.assertTrue( os.path.isfile(os.path.join(directory, 'data_structure'))) xdir.clear_cache() self.assertFalse( os.path.isfile(os.path.join(directory, 'data_structure')))
def test_is_cached(self): with TempDirectory() as temp: directory = create_ziphmm_directory(temp.path) xdir = ZipDirectory(directory) self.assertTrue(xdir.is_cached(2)) self.assertFalse(xdir.is_cached(3)) xdir.create_cache(3) self.assertTrue(xdir.is_cached(2)) self.assertTrue(xdir.is_cached(3)) xdir.clear_cache() self.assertFalse(xdir.is_cached(2)) self.assertFalse(xdir.is_cached(3))
def test_init_without_data(self): with TempDirectory() as temp: directory = create_ziphmm_directory(temp.path) data_structure_path = os.path.join(directory, 'data_structure') os.remove(data_structure_path) xdir = ZipDirectory(directory) self.assertEqual(directory, xdir.path)
def test_create_original_sequences(self): with TempDirectory() as temp: ZipDirectory.create_original_sequences(temp.path, locate("inputs/a.fa"), locate("inputs/b.fa"), chunk_size=10) def test(name, expected): path = os.path.join(temp.path, name, 'original_sequence') self.assertTrue(os.path.isfile(path)) actual = open(path, 'r').read() self.assertEqual(expected, actual) test('s1.ziphmm0', '0 0 0 0 1 1 1 1 0 0 ') test('s1.ziphmm1', '0 0 1 1 1 1 0 0 0 0 ') test('s1.ziphmm2', '1 1 2 1 ') test('s2.ziphmm0', '0 0 0 0 1 1 0 0 0 0 ') test('s2.ziphmm1', '1 1 0 0 0 0 1 1 ')
def test_load(self): with TempDirectory() as temp: directory = create_ziphmm_directory(temp.path) xdir = ZipDirectory(directory) self.assertTrue(xdir.is_cached(2)) xseq2 = xdir.load(2) self.assertEqual(2, xseq2.state_count) self.assertTrue(xdir.is_cached(2)) self.assertFalse(xdir.is_cached(3)) xseq3 = xdir.load(3) self.assertEqual(3, xseq3.state_count) self.assertTrue(xdir.is_cached(3)) self.assertFalse(xdir.is_cached(4)) xseq4 = xdir.load(4) self.assertEqual(4, xseq4.state_count) self.assertTrue(xdir.is_cached(4))
def test_create_original_sequences(self): with TempDirectory() as temp: ZipDirectory.create_original_sequences( temp.path, locate("inputs/a.fa"), locate("inputs/b.fa"), chunk_size=10) def test(name, expected): path = os.path.join(temp.path, name, 'original_sequence') self.assertTrue(os.path.isfile(path)) actual = open(path, 'r').read() self.assertEqual(expected, actual) test('s1.ziphmm0', '0 0 0 0 1 1 1 1 0 0 ') test('s1.ziphmm1', '0 0 1 1 1 1 0 0 0 0 ') test('s1.ziphmm2', '1 1 2 1 ') test('s2.ziphmm0', '0 0 0 0 1 1 0 0 0 0 ') test('s2.ziphmm1', '1 1 0 0 0 0 1 1 ')
def test_has_original_sequence(self): with TempDirectory() as temp: self.assertFalse(ZipDirectory.has_original_sequence(temp.path)) directory = create_ziphmm_directory(temp.path) self.assertTrue(ZipDirectory.has_original_sequence(directory))
def test_init_with_data(self): with TempDirectory() as temp: directory = create_ziphmm_directory(temp.path) xdir = ZipDirectory(directory) self.assertEqual(directory, xdir.path)