Пример #1
0
    def functional_test_2(self):
        '''
        Cluster the 5 genomes using gANI
        '''
        genomes = self.genomes
        wd_loc = self.wd_loc
        s_wd_loc = self.s_wd_loc

        # Make sure gANI is installed
        loc, works = find_program('ANIcalculator')
        if (loc == None or works == False):
            print('Cannot locate the program {0}- skipping related tests'\
                .format('ANIcalculator (for gANI)'))
            return

        args = argumentParser.parse_args(['cluster',wd_loc,'--S_algorithm',\
            'gANI','-g']+genomes)
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(s_wd_loc)
        wd = WorkDirectory(wd_loc)

        # Confirm Cdb.csv is correct
        db1 = Swd.get_db('Cdb')
        del db1['comparison_algorithm']
        db2 = wd.get_db('Cdb')
        del db2['comparison_algorithm']
        assert compare_dfs(db1, db2), "{0} is not the same!".format('Cdb')
Пример #2
0
    def functional_test_1(self):
        '''
        Call filter on 'Escherichia_coli_Sakai.fna'

        Should call both prodigal and checkM
        '''
        genomes = self.genomes
        wd_loc = self.wd_loc

        # make sure calling it on the right genome
        genome = [
            g for g in genomes if g.endswith('Enterococcus_faecalis_T2.fna')
        ]
        assert len(genome) == 1
        genome = genome[0]

        args = argumentParser.parse_args(['filter',wd_loc,'-g',genome] \
            + ['--checkM_method', 'taxonomy_wf'])
        controller = Controller()
        controller.parseArguments(args)

        # Confirm Chdb.csv is correct
        wd = drep.WorkDirectory.WorkDirectory(wd_loc)
        chdb = wd.get_db('Chdb')
        assert chdb['Completeness'].tolist()[0] == 98.28

        # Confirm genome is in Bdb.csv
        Gdb = wd.get_db('genomeInfo')
        assert Gdb['completeness'].tolist()[0] == 98.28
Пример #3
0
    def unit_test_1(self):
        '''
        Ensure choose can handle when Chdb is not present, running checkM automatically
        '''
        # Delete Chdb
        wd_loc = self.working_wd_loc
        os.remove(wd_loc + '/data_tables/Chdb.csv')

        # Modify Bdb so the genome locations are right
        genomes = load_test_genomes()
        g2l = {os.path.basename(g): g for g in genomes}

        Bdb = pd.read_csv(wd_loc + '/data_tables/Bdb.csv')
        Bdb['location'] = Bdb['genome'].map(g2l)
        Bdb.to_csv(wd_loc + '/data_tables/Bdb.csv', index=False)

        # Run choose - this should re-run checkM and re-generate chdb
        args = argumentParser.parse_args(['choose', wd_loc, '--checkM_method',\
            'taxonomy_wf'])
        controller = Controller()
        controller.parseArguments(args)

        Swd = WorkDirectory(self.s_wd_loc)
        wd = WorkDirectory(self.working_wd_loc)
        for db in ['Chdb', 'genomeInformation']:
            db1 = Swd.get_db(db)
            db2 = wd.get_db(db)
            assert compare_dfs(db1, db2), "{0} is not the same!".format(db)
Пример #4
0
    def unit_test_2(self):
        '''
        Try out the --skipCheckM argument for choose
        '''
        # Delete Chdb
        wd_loc = self.working_wd_loc
        os.remove(wd_loc + '/data_tables/Chdb.csv')
        os.remove(wd_loc + '/data_tables/Sdb.csv')
        os.remove(wd_loc + '/data_tables/Wdb.csv')

        # Run choose with --skipCheckM
        args = argumentParser.parse_args(
            ['choose', wd_loc, '--noQualityFiltering'])
        controller = Controller()
        controller.parseArguments(args)

        Swd = WorkDirectory(self.s_wd_loc)
        wd = WorkDirectory(self.working_wd_loc)
        for db in ['Sdb', 'Wdb', 'genomeInformation']:
            db1 = Swd.get_db(db)
            db2 = wd.get_db(db)
            assert not compare_dfs(db1, db2), "{0} is the same!".format(db)

        sdb = wd.get_db('Sdb')
        for s in sdb['score'].tolist():
            assert (s > 0) & (s < 5)
Пример #5
0
    def taxTest1(self):
        '''
        Check the taxonomy call for max method
        '''
        genomes = self.genomes
        wd_loc = self.wd_loc
        swd_loc = self.s_wd_loc

        # Call the command
        args = argumentParser.parse_args(['bonus',wd_loc,'-g'] +genomes \
                + ['--run_tax','--cent_index','/home/mattolm/download/centrifuge/indices/b+h+v',\
                '--tax_method', 'max'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(swd_loc)
        wd = WorkDirectory(wd_loc)

        tdbS = Swd.get_db('Bdb')
        tdb = wd.get_db('Bdb')
        del tdbS['location']
        del tdb['location']
        assert compare_dfs(tdb, tdbS), "{0} is not the same!".format('Bdb')

        tdbS = Swd.get_db('Tdb')
        tdb = wd.get_db('Tdb')

        if compare_dfs(tdb, tdbS) == False:
            print("{0} is not the same! May be due to centrifuge index issues".
                  format('Tdb'))
            my_panel = pd.Panel(dict(df1=tdbS, df2=tdb))
            print(my_panel.apply(report_diff, axis=0))

        assert True
Пример #6
0
    def unit_tests_4(self):
        '''
        Test changing cluster -pa
        '''
        # normal complete run
        args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \
            self.genomes + ['-pa', '0.10'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(self.s_wd_loc)
        wd = WorkDirectory(self.working_wd_loc)

        # Confirm the following are correct:
        # for db in ['Mdb']:
        #     db1 = Swd.get_db(db)
        #     db2 =  wd.get_db(db)
        #     assert compare_dfs(db1, db2), "{0} is not the same!".format(db)

        # Confirm the following are not the same:
        for db in ['Ndb', 'Cdb']:
            db1 = Swd.get_db(db)
            db2 = wd.get_db(db)
            assert not compare_dfs(
                db1, db2), "{0} is the same! (and shouldn't be)".format(db)
Пример #7
0
    def taxTest2(self):
        '''
        Check the taxonomy call for percent method
        '''
        genomes = self.genomes
        wd_loc = self.wd_loc
        swd_loc = self.s_wd_loc

        # Call the command
        args = argumentParser.parse_args(['bonus',wd_loc,'-g'] +genomes \
                + ['--run_tax','--cent_index','/home/mattolm/download/centrifuge/indices/b+h+v',\
                '--tax_method', 'percent'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(swd_loc)
        wd = WorkDirectory(wd_loc)

        tdbS = Swd.get_db('BdbP')
        tdb = wd.get_db('Bdb')
        del tdbS['location']
        del tdb['location']
        assert compare_dfs(tdb, tdbS), "{0} is not the same!".format('Bdb')

        tdbS = Swd.get_db('TdbP')
        tdb = wd.get_db('Tdb')
        assert compare_dfs(tdb, tdbS), "{0} is not the same!".format('Tdb')
Пример #8
0
    def plot_6_test_1(self):
        '''
        Test plot 6 with different things missing
        '''
        # Test with everything there
        args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl', '6'])
        controller = Controller()
        controller.parseArguments(args)
        fig_dir = os.path.join(self.working_wd_loc, 'figures', '')

        figs = [os.path.basename(f) for f in glob.glob(fig_dir + '*')]
        FIGS = ['Winning_genomes.pdf']

        assert sorted(figs) == sorted(FIGS)
        for fig in glob.glob(fig_dir + '*'):
            assert os.path.getsize(fig) > 0

        # Test with removing Widb
        db_loc = os.path.join(self.working_wd_loc, 'data_tables', 'Widb.csv')
        os.remove(db_loc)
        for f in glob.glob(fig_dir + '*'):
            os.remove(f)

        args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl', '6'])
        controller = Controller()
        controller.parseArguments(args)
        fig_dir = os.path.join(self.working_wd_loc, 'figures', '')

        figs = [os.path.basename(f) for f in glob.glob(fig_dir + '*')]
        FIGS = ['Winning_genomes.pdf']

        assert sorted(figs) == sorted(FIGS)
        for fig in glob.glob(fig_dir + '*'):
            assert os.path.getsize(fig) > 0
Пример #9
0
    def functional_test_2(self):
        '''
        Ensure analyze crashes gracefully
        '''
        wd_loc = self.working_wd_loc
        wd = drep.WorkDirectory.WorkDirectory(wd_loc)
        os.remove(os.path.join(wd.get_dir('data_tables'), 'Mdb.csv'))
        os.remove(os.path.join(wd.get_dir('data_tables'), 'Cdb.csv'))
        os.remove(os.path.join(wd.get_dir('data_tables'), 'Bdb.csv'))

        args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl'] + \
            ['a'])
        controller = Controller()
        controller.parseArguments(args)
Пример #10
0
def test_dereplicate_8(self):
    '''
    Test greedy clustering with some primary clusters only having a single member
    '''
    if len(self.large_genome_set) == 0:
        print("*** THIS TEST ONLY WORKS ON MO'S DEVELOPMENT MACHINE ***")
        return

    genomes = [self.large_genome_set[0], self.large_genome_set[20]]
    wd_loc = self.wd_loc
    wd_loc2 = self.wd_loc2

    # Get greedy results
    args = argumentParser.parse_args([
        'compare', wd_loc2, '--S_algorithm', 'fastANI',
        '--multiround_primary_clustering', '--primary_chunksize', '50',
        '--greedy_secondary_clustering', '-sa', '0.95', '-pa', '0.99', '-g'
    ] + genomes)
    Controller().parseArguments(args)
    wd = WorkDirectory(wd_loc2)
    CSdb = wd.get_db('Cdb')

    # Run normal
    args = argumentParser.parse_args([
        'compare', wd_loc, '--S_algorithm', 'fastANI',
        '--multiround_primary_clustering', '--primary_chunksize', '50', '-sa',
        '0.95', '-pa', '0.99', '-g'
    ] + genomes)
    Controller().parseArguments(args)

    # Verify they're the same
    wd = WorkDirectory(wd_loc)
    Cdb = wd.get_db('Cdb')

    assert len(CSdb) == len(Cdb)
    for c in ['primary_cluster', 'secondary_cluster']:
        assert set(CSdb[c].value_counts().to_dict().values()) == set(
            Cdb[c].value_counts().to_dict().values()), c
        if c != 'secondary_cluster':
            assert set(CSdb[c].value_counts().to_dict().keys()) == set(
                Cdb[c].value_counts().to_dict().keys()
            )  #, [set(CSdb[c].value_counts().to_dict().keys()), set(Cdb[c].value_counts().to_dict().keys())]
    assert set(CSdb['genome'].tolist()) == set(Cdb['genome'].tolist())
    assert set(CSdb.columns) - set(Cdb.columns) == set(
        ['greedy_representative'])
Пример #11
0
    def functional_test_1(self):
        '''
        Ensure analyze produces all plots
        '''
        args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl'] + \
            ['a'])
        controller = Controller()
        controller.parseArguments(args)

        FIGS = ['Cluster_scoring.pdf', 'Clustering_scatterplots.pdf', \
            'Primary_clustering_dendrogram.pdf', 'Secondary_clustering_dendrograms.pdf', \
            'Winning_genomes.pdf', 'Secondary_clustering_MDS.pdf']

        fig_dir = os.path.join(self.working_wd_loc, 'figures', '')
        figs = [os.path.basename(f) for f in glob.glob(fig_dir + '*')]

        assert sorted(figs) == sorted(FIGS)
        for fig in glob.glob(fig_dir + '*'):
            assert os.path.getsize(fig) > 0
Пример #12
0
    def unit_tests_5(self):
        '''
        Test changing cluster --S_algorithm gANI
        '''
        # normal complete run
        args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \
            self.genomes + ['--S_algorithm', 'gANI'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(self.s_wd_loc)
        wd = WorkDirectory(self.working_wd_loc)

        # Confirm the following are correct:
        for db in ['Cdb', 'Mdb']:
            db1 = Swd.get_db(db)
            db2 = wd.get_db(db)
            assert compare_dfs(db1, db2), "{0} is not the same!".format(db)
Пример #13
0
    def unit_tests_3(self):
        ''' Test cluster with --skipMash
        '''

        # normal complete run
        args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \
            self.genomes + ['--SkipMash'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(self.s_wd_loc)
        wd = WorkDirectory(self.working_wd_loc)

        # Confirm the following are not the same:
        for db in ['Cdb', 'Ndb', 'Mdb']:
            db1 = Swd.get_db(db)
            db2 = wd.get_db(db)
            assert not compare_dfs(
                db1, db2), "{0} is the same! (and shouldn't be)".format(db)
Пример #14
0
    def functional_test_2(self):
        genomes = self.genomes
        wd_loc = self.wd_loc
        s_wd_loc = self.s_wd_loc

        sanity_check(WorkDirectory(s_wd_loc))

        args = argumentParser.parse_args(['compare', wd_loc, '-g'] + genomes)
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        s_wd = WorkDirectory(s_wd_loc)
        wd = WorkDirectory(wd_loc)
        ensure_identicle(s_wd, wd, skip=['Bdb', 'Chdb', 'Sdb', 'Wdb', 'Widb',\
            'genomeInformation', 'Mdb'])

        # Perform sanity check to make sure solutions directiory isn't
        # being overwritten
        sanity_check(s_wd)
Пример #15
0
    def functional_test_1(self):
        genomes = self.genomes
        wd_loc = self.wd_loc
        s_wd_loc = self.s_wd_loc

        sanity_check(WorkDirectory(s_wd_loc))

        args = argumentParser.parse_args(['dereplicate',wd_loc,'-g'] + genomes \
            + ['--checkM_method', 'taxonomy_wf'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        s_wd = WorkDirectory(s_wd_loc)
        wd = WorkDirectory(wd_loc)
        ensure_identicle(s_wd, wd, skip=['Bdb', 'Mdb'])

        # Perform sanity check to make sure solutions directiory isn't
        # being overwritten
        sanity_check(s_wd)
Пример #16
0
def test_dereplicate_5(self):
    '''
    Test greedy clustering
    '''
    genomes = self.large_genome_set[:10]
    wd_loc = self.wd_loc
    wd_loc2 = self.wd_loc2

    if len(genomes) == 0:
        print("*** THIS TEST ONLY WORKS ON MO'S DEVELOPMENT MACHINE ***")
        return

    # Get greedy results
    args = argumentParser.parse_args([
        'compare', wd_loc2, '--S_algorithm', 'fastANI', '--SkipMash',
        '--greedy_secondary_clustering', '-sa', '0.95', '-g'
    ] + genomes)
    Controller().parseArguments(args)
    wd = WorkDirectory(wd_loc2)
    CSdb = wd.get_db('Cdb')

    # Run normal
    args = argumentParser.parse_args([
        'compare', wd_loc, '--S_algorithm', 'fastANI', '--SkipMash', '-sa',
        '0.95', '-g'
    ] + genomes)
    Controller().parseArguments(args)

    # Verify they're the same
    wd = WorkDirectory(wd_loc)
    Cdb = wd.get_db('Cdb')

    assert len(CSdb) == len(Cdb)
    for c in ['primary_cluster', 'secondary_cluster']:
        assert set(CSdb[c].value_counts().to_dict().values()) == set(
            Cdb[c].value_counts().to_dict().values()), c
        assert set(CSdb[c].value_counts().to_dict().keys()) == set(
            Cdb[c].value_counts().to_dict().keys()), c
    assert set(CSdb['genome'].tolist()) == set(Cdb['genome'].tolist())
    assert set(CSdb.columns) - set(Cdb.columns) == set(
        ['greedy_representative'])
Пример #17
0
def test_dereplicate_4(self):
    '''
    Test the ability of primary clustering to take a large genome set and break it into chunks
    '''
    genomes = self.large_genome_set
    wd_loc = self.wd_loc
    wd_loc2 = self.wd_loc2

    if len(genomes) == 0:
        print("*** THIS TEST ONLY WORKS ON MO'S DEVELOPMENT MACHINE ***")
        return

    # Get normal results
    args = argumentParser.parse_args([
        'compare', wd_loc2, '--S_algorithm', 'fastANI', '--SkipSecondary',
        '--primary_chunksize', '50', '-g'
    ] + genomes)
    Controller().parseArguments(args)
    wd = WorkDirectory(wd_loc2)
    CSdb = wd.get_db('Cdb')

    # Run with chunking
    args = argumentParser.parse_args([
        'compare', wd_loc, '--S_algorithm', 'fastANI', '--SkipSecondary',
        '--multiround_primary_clustering', '--primary_chunksize', '50', '-g'
    ] + genomes)
    Controller().parseArguments(args)

    # Verify they're the same
    wd = WorkDirectory(wd_loc)
    Cdb = wd.get_db('Cdb')

    assert len(CSdb) == len(Cdb)
    for c in ['primary_cluster', 'secondary_cluster']:
        assert set(CSdb[c].value_counts().to_dict().values()) == set(
            Cdb[c].value_counts().to_dict().values())
        assert set(CSdb[c].value_counts().to_dict().keys()) == set(
            Cdb[c].value_counts().to_dict().keys())
    assert set(CSdb['genome'].tolist()) == set(Cdb['genome'].tolist())
    assert set(Cdb.columns) - set(CSdb.columns) == set(
        ['length', 'subcluster', 'primary_representitive'])
Пример #18
0
def test_dereplicate_6(self):
    '''
    Test zipped genomes
    '''
    genomes = self.zipped_genomes
    wd_loc = self.wd_loc
    s_wd_loc = self.s_wd_loc

    test_utils.sanity_check(WorkDirectory(s_wd_loc))

    args = argumentParser.parse_args(
        ['compare', wd_loc, '--S_algorithm', 'fastANI', '-g'] + genomes)
    controller = Controller()
    controller.parseArguments(args)

    # Verify
    wd = WorkDirectory(wd_loc)
    anis = wd.get_db('Ndb')['ani'].tolist()
    assert max(anis) <= 1
    assert min(anis) >= 0
    assert len(set(anis)) > 1
Пример #19
0
    def functional_test_1(self):
        '''
        Cluster the 5 genomes using default settings
        '''
        genomes = self.genomes
        wd_loc = self.wd_loc
        s_wd_loc = self.s_wd_loc

        args = argumentParser.parse_args(['cluster', wd_loc, '-g'] + genomes)
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(s_wd_loc)
        wd = WorkDirectory(wd_loc)

        # Confirm Cdb.csv is correct
        db1 = Swd.get_db('Cdb')
        db2 = wd.get_db('Cdb')

        assert compare_dfs(db1, db2), "{0} is not the same!".format('Cdb')
Пример #20
0
def test_taxonomy_4(self):
    '''
    Try actually running centrifuge without prodigal done
    '''
    loc, works = drep.d_bonus.find_program('centrifuge')
    if works == False:
        print('Centrifuge not installed- skipping tests')

    else:
        genomes = self.genomes
        wd_loc = self.wd_loc
        swd_loc = self.s_wd_loc

        # Remove previous data run
        shutil.rmtree(os.path.join(self.wd_loc, 'data', 'centrifuge'))
        shutil.rmtree(os.path.join(self.wd_loc, 'data', 'prodigal'))

        # Call the command
        args = argumentParser.parse_args(['bonus',wd_loc,'-g'] +genomes \
                + ['--run_tax','--cent_index','/home/mattolm/download/centrifuge/indices/b+h+v',\
                '--tax_method', 'percent'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(swd_loc)
        wd = WorkDirectory(wd_loc)

        tdbS = Swd.get_db('BdbP')
        tdb = wd.get_db('Bdb')
        del tdbS['location']
        del tdb['location']
        assert test_utils.compare_dfs(
            tdb, tdbS), "{0} is not the same!".format('Bdb')

        tdbS = Swd.get_db('TdbP')
        tdb = wd.get_db('Tdb')
        assert test_utils.compare_dfs(
            tdb, tdbS), "{0} is not the same!".format('Tdb')
Пример #21
0
    def skipsecondary_test(self):
        genomes = self.genomes
        wd_loc = self.wd_loc
        s_wd_loc = self.s_wd_loc

        args = argumentParser.parse_args(['cluster',wd_loc,'-g'] +genomes \
                + ['--SkipSecondary'])
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(s_wd_loc)
        wd = WorkDirectory(wd_loc)

        # Confirm Mdb.csv is correct
        db1 = Swd.get_db('Mdb')
        db2 = wd.get_db('Mdb')
        #assert compare_dfs(db1, db2), "{0} is not the same!".format('Mdb')

        # Confirm Ndb.csv doesn't exist
        db2 = wd.get_db('Ndb')
        assert db2.empty, 'Ndb is not empty'
Пример #22
0
def test_dereplicate_1(self):
    genomes = self.genomes
    wd_loc = self.wd_loc
    s_wd_loc = self.s_wd_loc

    test_utils.sanity_check(WorkDirectory(s_wd_loc))

    args = argumentParser.parse_args(['dereplicate',wd_loc,'-g'] + genomes \
        + ['--checkM_method', 'taxonomy_wf', '--debug', '--S_algorithm',
                    'ANImf'])
    controller = Controller()
    controller.parseArguments(args)

    # Verify
    s_wd = WorkDirectory(s_wd_loc)
    wd = WorkDirectory(wd_loc)
    test_utils.ensure_identicle(
        s_wd,
        wd,
        skip=['Bdb', 'Mdb', 'Sdb', 'Wdb', 'genomeInformation', 'Widb'])

    # Perform sanity check to make sure solutions directiory isn't
    # being overwritten
    test_utils.sanity_check(s_wd)
Пример #23
0
def test_dereplicate_3(self):
    '''
    Use goANI
    '''
    genomes = self.genomes
    wd_loc = self.wd_loc
    s_wd_loc = self.s_wd_loc

    test_utils.sanity_check(WorkDirectory(s_wd_loc))

    args = argumentParser.parse_args(
        ['compare', wd_loc, '--S_algorithm', 'goANI', '-g'] + genomes)
    controller = Controller()
    controller.parseArguments(args)

    # Verify
    s_wd = WorkDirectory(s_wd_loc)
    wd = WorkDirectory(wd_loc)
    Ndb = wd.get_db('Ndb')
    assert len(Ndb) > 0

    # Perform sanity check to make sure solutions directiory isn't
    # being overwritten
    test_utils.sanity_check(s_wd)
Пример #24
0
    def functional_test_3(self):
        '''
        Cluster the 5 genomes using ANImf
        '''

        genomes = self.genomes
        wd_loc = self.wd_loc
        s_wd_loc = self.s_wd_loc

        args = argumentParser.parse_args(['cluster',wd_loc,'--S_algorithm',\
            'ANImf','-g']+genomes)
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(s_wd_loc)
        wd = WorkDirectory(wd_loc)

        # Confirm Cdb.csv is correct
        db1 = Swd.get_db('Cdb')
        del db1['comparison_algorithm']
        db2 = wd.get_db('Cdb')
        del db2['comparison_algorithm']
        assert compare_dfs(db1, db2), "{0} is not the same!".format('Cdb')
Пример #25
0
    def unit_tests_1(self):
        '''
        Test a normal run of cluster
        '''
        # normal complete run
        args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \
            self.genomes)
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(self.s_wd_loc)
        wd = WorkDirectory(self.working_wd_loc)

        # Confirm the following are correct:
        #for db in ['Cdb', 'Mdb', 'Ndb']:
        for db in ['Cdb', 'Ndb']:
            db1 = Swd.get_db(db)
            db2 = wd.get_db(db)

            if compare_dfs(db1, db2) == False:
                # db1['solution'] = True
                # db2['solution'] = False
                # db = pd.merge(db1, db2, on='')
                db1 = db1[['reference', 'querry', 'ani']]
                db1.rename(columns={'ani': 'ani1'}, inplace=True)
                db2 = db2[['reference', 'querry', 'ani']]
                db2.rename(columns={'ani': 'ani2'}, inplace=True)
                db1.sort_values(['reference', 'querry'], inplace=True)
                db2.sort_values(['reference', 'querry'], inplace=True)
                print("{0} is not the same!".format(db))
                my_panel = pd.Panel(dict(df1=db1, df2=db2))
                print(my_panel.apply(report_diff, axis=0))
                print(pd.merge(db1, db2, on=['reference', 'querry']))

            assert compare_dfs(db1, db2), "{0} is not the same!".format(db)
Пример #26
0
    def unit_tests_1(self):
        '''
        Test a normal run of cluster
        '''
        # normal complete run
        args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \
            self.genomes)
        controller = Controller()
        controller.parseArguments(args)

        # Verify
        Swd = WorkDirectory(self.s_wd_loc)
        wd = WorkDirectory(self.working_wd_loc)

        # Confirm the following are correct:
        #for db in ['Cdb', 'Mdb', 'Ndb']:
        for db in ['Cdb', 'Ndb']:
            db1 = Swd.get_db(db)
            db2 = wd.get_db(db)

            # get rid of some precision on the ANI
            if db == 'Ndb':
                db1['ani'] = [float("{0:.4f}".format(x)) for x in db1['ani']]
                db2['ani'] = [float("{0:.4f}".format(x)) for x in db2['ani']]

            if compare_dfs(db1, db2) == False:
                # # db1['solution'] = True
                # # db2['solution'] = False
                # # db = pd.merge(db1, db2, on='')
                db1 = db1[['reference', 'querry', 'ani']]
                # db1.rename(columns={'ani':'ani1'}, inplace=True)
                db2 = db2[['reference', 'querry', 'ani']]
                # db2.rename(columns={'ani':'ani2'}, inplace=True)

                print("now?")
                print(compare_dfs(db1, db2))

                db1 = db1.sort_values(['reference', 'querry'])
                db2 = db2.sort_values(['reference', 'querry'])
                print(db1)
                print(db2)

                my_panel = pd.Panel(dict(df1=db1, df2=db2))
                print('panel:')
                print(my_panel.apply(report_diff, axis=0))
                # print("{0} is not the same!".format(db))
                #
                # my_panel = pd.Panel(dict(df1=db1,df2=db2))
                # print('panel:')
                # print(my_panel.apply(report_diff, axis=0))
                # print('merge:')
                # xdb = pd.merge(db1, db2, on=['reference', 'querry'])
                # print(xdb)
                # print('diff:')
                # print(xdb[xdb['ani1'] != xdb['ani2']])
                #
                # print('ref sorted 1')
                # print(db1['reference'].sort_values())
                #
                # print('ref sorted 2')
                # print(db2['reference'].sort_values())
                #
                # print('querry sorted 1')
                # print(db1['querry'].sort_values())
                #
                # print('querry sorted 2')
                # print(db2['querry'].sort_values())

            assert compare_dfs(db1, db2), "{0} is not the same!".format(db)