Exemplo n.º 1
0
 def test_species_with_three_words_name(self):
     """should correctly parse a db name that contains a three words species name"""
     n = EnsemblDbName("mustela_putorius_furo_core_70_1")
     self.assertEqual(n.prefix, "mustela_putorius_furo")
     self.assertEqual(n.type, "core")
     self.assertEqual(n.build, "1")
     self.assertEqual(n.species, "Mustela putorius furo")
     n = EnsemblDbName("canis_lupus_familiaris_core_102_31")
     self.assertEqual(n.species, "Canis lupus familiaris")
Exemplo n.º 2
0
 def test_ensemblgenomes_names(self):
     """correctly handle the ensemblgenomes naming system"""
     n = EnsemblDbName("aedes_aegypti_core_5_58_1e")
     self.assertEqual(n.prefix, "aedes_aegypti")
     self.assertEqual(n.type, "core")
     self.assertEqual(n.release, "5")
     self.assertEqual(n.general_release, "58")
     self.assertEqual(n.build, "1e")
     n = EnsemblDbName("ensembl_compara_metazoa_6_59")
     self.assertEqual(n.release, "6")
     self.assertEqual(n.general_release, "59")
     self.assertEqual(n.type, "compara")
Exemplo n.º 3
0
def reduce_dirnames(dirnames, species_dbs, verbose=False, debug=False):
    """returns EnsemblNames corresponding to species db's and sort by type

    sort order put's core db's first, compara and variation last"""
    if debug:
        pprint(dirnames)

    db_names = []
    for record in dirnames:
        record = record.strip()
        if not record or record.endswith(".gz"):
            continue

        record = record.split()[-1]
        if not record[0].isalpha():
            continue

        try:
            name = EnsemblDbName(record)
        except (TypeError, RuntimeError):
            # a non-species
            if debug:
                print(record)
            continue

        if name.species in species_dbs:
            if name.type not in species_dbs[name.species] and species_dbs[
                    name.species]:
                if debug or verbose:
                    print("Skipping", name)
                continue

            db_names.append(name)
        elif name.type == "compara" and "compara" in species_dbs:
            db_names.append(name)

    db_names = _sort_dbs(db_names)
    return db_names
Exemplo n.º 4
0
 def test_name_without_build(self):
     """should correctly handle a db name without a build"""
     n = EnsemblDbName("pongo_pygmaeus_core_49_1")
     self.assertEqual(n.prefix, "pongo_pygmaeus")
     self.assertEqual(n.type, "core")
     self.assertEqual(n.build, "1")
Exemplo n.º 5
0
 def test_cmp_name(self):
     """should validly compare names by attributes"""
     n1 = EnsemblDbName("homo_sapiens_core_46_36h")
     n2 = EnsemblDbName("homo_sapiens_core_46_36h")
     self.assertEqual(n1, n2)
Exemplo n.º 6
0
 def test_species_with_three_words_name(self):
     """should correctly parse a db name that contains a three words species name"""
     n = EnsemblDbName("mustela_putorius_furo_core_70_1")
     self.assertEqual(n.prefix, "mustela_putorius_furo")
     self.assertEqual(n.type, "core")
     self.assertEqual(n.build, "1")