def test_fastani_small_num_pairs(tmpdir): """Test FastANI cache on small number of pairs.""" db_file = tmpdir.mkdir("sub").join("tmp.db").strpath qid = 'GCF_000009045.1' rid = 'GCF_000186085.1' q_gf = './tests/data/GCF_000009045.1.fna.gz' r_gf = './tests/data/GCF_000186085.1.fna.gz' gid_pairs = [(qid, rid), (rid, qid)] genome_files = {qid: q_gf, rid: r_gf} fastani = FastANI(db_file, 1) ani_af = fastani.pairs(gid_pairs, genome_files, report_progress=True, initial_cache_check=True) # exact values will depend on FastANI version, but should be reater than these bound assert ani_af[qid][rid][0] > 99.5 assert ani_af[qid][rid][1] > 0.995 assert ani_af[rid][qid][0] > 99.5 assert ani_af[rid][qid][1] > 0.995 # re-test results now that results have been cached ani_af = fastani.pairs(gid_pairs, genome_files, report_progress=True, initial_cache_check=True) assert ani_af[qid][rid][0] > 99.5 assert ani_af[qid][rid][1] > 0.995 assert ani_af[rid][qid][0] > 99.5 assert ani_af[rid][qid][1] > 0.995
def test_fastani_large_num_pairs(tmpdir): """Test FastANI cache on large number of pairs.""" db_file = tmpdir.mkdir("sub").join("tmp.db").strpath qid = 'GCF_000009045.1' rid = 'GCF_000186085.1' q_gf = './tests/data/GCF_000009045.1.fna.gz' r_gf = './tests/data/GCF_000186085.1.fna.gz' # artificial duplicate pairs to force using multiprocessing gid_pairs = [(qid, rid), (rid, qid), (qid, rid), (rid, qid), (qid, rid), (rid, qid), (qid, rid), (rid, qid)] genome_files = {qid: q_gf, rid: r_gf} fastani = FastANI(db_file, 1) fastani.DB_BATCH_SIZE = 1 # force saving to cache to ensure code is tested ani_af = fastani.pairs(gid_pairs, genome_files, report_progress=True, initial_cache_check=True) # exact values will depend on FastANI version, but should be reater than these bound assert ani_af[qid][rid][0] > 99.5 assert ani_af[qid][rid][1] > 0.995 assert ani_af[rid][qid][0] > 99.5 assert ani_af[rid][qid][1] > 0.995 # re-test results now that results have been cached ani_af = fastani.pairs(gid_pairs, genome_files, report_progress=True, initial_cache_check=True) assert ani_af[qid][rid][0] > 99.5 assert ani_af[qid][rid][1] > 0.995 assert ani_af[rid][qid][0] > 99.5 assert ani_af[rid][qid][1] > 0.995
def test_object_db_creation(tmpdir): """Test creation of FastANI object with SQlite database.""" db_file = tmpdir.mkdir("sub").join("tmp.db").strpath fastani = FastANI(db_file, 1) assert fastani.num_db_rows() is None
def test_fastani_invalid_input(tmpdir): """Test FastANI on invalid input.""" db_file = tmpdir.mkdir("sub").join("tmp.db").strpath qid = 'invalid1' rid = 'invalid2' q_gf = 'does_not_exist.fna' r_gf = 'does_not_exist.fna' fastani = FastANI(db_file, 1) with pytest.raises(FastANIError): fastani.fastani(qid, rid, q_gf, r_gf)
def test_mean_ani(): """Test mean methods for combining reciprocal pairs.""" ani_af = {} ani_af['g1'] = {} ani_af['g1']['g2'] = (90, 0.95) ani_af['g2'] = {} ani_af['g2']['g1'] = (95, 0.90) fastani = FastANI(None, 1) ani, af = fastani.mean_ani(ani_af, 'g1', 'g2') assert ani == 92.5 assert af == 0.925
def test_symmetric_ani(): """Test symmetrical (max) methods for combining reciprocal pairs.""" ani_af = {} ani_af['g1'] = {} ani_af['g1']['g2'] = (90, 0.95) ani_af['g2'] = {} ani_af['g2']['g1'] = (95, 0.90) fastani = FastANI(None, 1) ani, af = fastani.symmetric_ani(ani_af, 'g1', 'g2') assert ani == 95 assert af == 0.95
def test_write_ani_af(tmpdir): """Test writing of results.""" out_file = tmpdir.mkdir("output").join("results.tsv").strpath ani_af = {} ani_af['g1'] = {} ani_af['g1']['g2'] = (90, 0.95) fastani = FastANI(None, 1) fastani.write_ani_af(out_file, ani_af) fin = open(out_file) assert fin.readline().strip() == 'Query\tReference\tANI\tAF' assert fin.readline().strip() == 'g1\tg2\t90\t0.95'
def test_fastani_valid_input(tmpdir): """Test FastANI on valid input.""" db_file = tmpdir.mkdir("sub").join("tmp.db").strpath qid = 'GCF_000009045.1' rid = 'GCF_000186085.1' q_gf = './tests/data/GCF_000009045.1.fna.gz' r_gf = './tests/data/GCF_000186085.1.fna.gz' fastani = FastANI(db_file, 1) ani_af = fastani.fastani(qid, rid, q_gf, r_gf) assert ani_af[0] == 'GCF_000009045.1' assert ani_af[1] == 'GCF_000186085.1' # exact values will depend on FastANI version, but should be reater than these bound assert ani_af[2] > 99.5 assert ani_af[3] > 0.995
def test_write_matrices(tmpdir): """Test ANI and AF matrices.""" out_file = tmpdir.mkdir("output").join("results.tsv").strpath ani_af = {} ani_af['g1'] = {} ani_af['g1']['g1'] = (100, 1.0) ani_af['g1']['g2'] = (90, 0.95) ani_af['g2'] = {} ani_af['g2']['g2'] = (100, 1.0) ani_af['g2']['g1'] = (95, 0.90) fastani = FastANI(None, 1) fastani.write_ani_matrix(out_file, ani_af) # test ANI matrix fin = open(out_file) assert fin.readline().rstrip() == '\tg1\tg2' assert fin.readline().rstrip() == 'g1\t100\t90' assert fin.readline().rstrip() == 'g2\t95\t100' fin.close() # test AF matrix fastani.write_af_matrix(out_file, ani_af) fin = open(out_file) assert fin.readline().rstrip() == '\tg1\tg2' assert fin.readline().rstrip() == 'g1\t1.0\t0.95' assert fin.readline().rstrip() == 'g2\t0.9\t1.0' fin.close()
def fastani(self, args): """Run FastANI and cache results.""" query_files = self._input_files(args.query_genomes, args.file_ext, args.validate_genome_files) ref_files = self._input_files(args.ref_genomes, args.file_ext, args.validate_genome_files) make_sure_path_exists(args.output_dir) self.logger.info( 'Identified {:,} query and {:,} reference genomes.'.format( len(query_files), len(ref_files))) # get genome pairs to be considered gid_pairs, genome_files = self._genome_comparisons( query_files, ref_files, args.ref_to_query) self.logger.info('Calculating ANI between {:,} genome pairs.'.format( len(gid_pairs))) # calculate ANI between genome pairs start = time.time() fastani = FastANI(args.ani_db_file, args.cpus) try: ani_af = fastani.pairs( gid_pairs, genome_files, report_progress=True, initial_cache_check=args.initial_cache_check) results_file = os.path.join(args.output_dir, 'ani_af.tsv') except Exception as e: print(e) raise finally: fastani.write_cache() fastani.write_ani_af(results_file, ani_af) if args.query_genomes == args.ref_genomes: # since pairwise calculations were performed # also write out results as matrices ani_matrix_file = os.path.join(args.output_dir, 'ani_matrix.tsv') fastani.write_ani_matrix(ani_matrix_file, ani_af) af_matrix_file = os.path.join(args.output_dir, 'af_matrix.tsv') fastani.write_ani_matrix(af_matrix_file, ani_af) elapsed_time_str = str( datetime.timedelta(seconds=round(time.time() - start))) self.logger.info( f'Time to calculate ANI values (h:mm:ss): {elapsed_time_str}') self.logger.info('Done.')