def test_reannotate_genome_official(self):
        """
        This test takes about 25 minutes to run. It uploads the rhodobacter_gff, runs prokka genome reannotation
        and then checks to see if a specific feature has been updated correctly
        :return:
        """
        gfu = GenomeFileUtil(os.environ["SDK_CALLBACK_URL"])

        genome_test_file = os.path.join("/kb/module/test/data/", "rhodobacter_genomic.gbff")
        genome_test_file_scratch = os.path.join("/kb/module/work/tmp", "rhodobacter_genomic.gbff")
        copyfile(genome_test_file, genome_test_file_scratch)

        genome_ref_original = gfu.genbank_to_genome({"file": {"path": genome_test_file_scratch},
                                                     "workspace_name": self.getWsName(),
                                                     "genome_name": "rhodobacter_genomic.gbff",
                                                     "generate_ids_if_needed": 1})["genome_ref"]

        genome_name = "Rhodoannotated_by_prokka"
        print("ABOUT TO ANNOTATE GENOME")
        result = self.getImpl().annotate(self.getContext(),
                                         {"object_ref": genome_ref_original,
                                          "output_workspace": self.getWsName(),
                                          "output_genome_name": genome_name,
                                          "evalue": None,
                                          "fast": 0,
                                          "gcode": 0,
                                          "genus": "genus",
                                          "kingdom": "Bacteria",
                                          "metagenome": 0,
                                          "mincontiglen": 1,
                                          "norrna": 0,
                                          "notrna": 0,
                                          "rawproduct": 0,
                                          "rfam": 1,
                                          "scientific_name": "RhodoBacter"
                                          })[0]

        genome_ref_new = self.getWsName() + "/" + genome_name

        un_annotated_genome = self.getWsClient().get_objects([{"ref": genome_ref_original}])[0][
            "data"]
        re_annotated_genome = self.getWsClient().get_objects([{"ref": genome_ref_new}])[0]["data"]

        scratch = "/kb/module/work/tmp/"
        with open(scratch + "OUTPUT_GENOME_BEFORE.txt", "w+") as outfile:
            json.dump(un_annotated_genome, outfile)
        with open(scratch + "OUTPUT_GENOME_AFTER.txt", "w+") as outfile:
            json.dump(un_annotated_genome, outfile)

        for feature in un_annotated_genome["features"]:
            if feature["id"] == "RSP_1441":
                old_function = feature["functions"]
                self.assertEqual(old_function, ["regulatory protein, GntR family"])
                break

        for feature in re_annotated_genome["features"]:
            if feature["id"] == "RSP_1441":
                new_function = feature["functions"]
                self.assertEqual(new_function, ["N-acetylglucosamine repressor"])
                break
    def getGenomeInfo(self, genome_basename, item_i=0):
        if hasattr(self.__class__, 'genomeInfo_list'):
            try:
                info = self.__class__.genomeInfo_list[item_i]
                name = self.__class__.genomeName_list[item_i]
                if info != None:
                    if name != genome_basename:
                        self.__class__.genomeInfo_list[item_i] = None
                        self.__class__.genomeName_list[item_i] = None
                    else:
                        return info
            except:
                pass

        # 1) transform genbank to kbase genome object and upload to ws
        shared_dir = "/kb/module/work/tmp"
        genome_data_file = 'data/genomes/'+genome_basename+'.gbff.gz'
        genome_file = os.path.join(shared_dir, os.path.basename(genome_data_file))
        shutil.copy(genome_data_file, genome_file)

        SERVICE_VER = 'release'
        #SERVICE_VER = 'dev'
        GFU = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'],
                             token=self.getContext()['token'],
                             service_ver=SERVICE_VER
                         )
        print ("UPLOADING genome: "+genome_basename+" to WORKSPACE "+self.getWsName()+" ...")
        genome_upload_result = GFU.genbank_to_genome({'file': {'path': genome_file },
                                                      'workspace_name': self.getWsName(),
                                                      'genome_name': genome_basename
                                                  })
#                                                  })[0]
        pprint(genome_upload_result)
        genome_ref = genome_upload_result['genome_ref']
        new_obj_info = self.getWsClient().get_object_info_new({'objects': [{'ref': genome_ref}]})[0]

        # 2) store it
        if not hasattr(self.__class__, 'genomeInfo_list'):
            self.__class__.genomeInfo_list = []
            self.__class__.genomeName_list = []
        for i in range(item_i+1):
            try:
                assigned = self.__class__.genomeInfo_list[i]
            except:
                self.__class__.genomeInfo_list.append(None)
                self.__class__.genomeName_list.append(None)

        self.__class__.genomeInfo_list[item_i] = new_obj_info
        self.__class__.genomeName_list[item_i] = genome_basename
        return new_obj_info
示例#3
0
def load_genbank_file(callback_url, ws_name, local_file, target_name):
    """
    Loads a Genbank (.gbk/.gbff/etc.) file into a workspace as a Genome object. This
    has the side effect of building an Assembly to contain the genome sequence.
    """
    gfu = GenomeFileUtil(callback_url)
    genome_ref = gfu.genbank_to_genome({
        "file": {
            "path": local_file
        },
        "genome_name": target_name,
        "workspace_name": ws_name,
        "source": "Ensembl",
        "type": "User upload",
        "generate_ids_if_needed": 1
    })
    return genome_ref.get('genome_ref')  # yeah, i know.
 def loadGenome(self):
     if hasattr(self.__class__, 'genome_ref'):
         return self.__class__.genome_ref
     genbank_file_path = os.path.join(self.scratch, 'minimal.gbff')
     shutil.copy(os.path.join('data', 'minimal.gbff'), genbank_file_path)
     gfu = GenomeFileUtil(self.callback_url)
     genome_ref = gfu.genbank_to_genome({
         'file': {
             'path': genbank_file_path
         },
         'workspace_name': self.getWsName(),
         'genome_name': 'test_genome',
         'source': 'Ensembl',
         'generate_ids_if_needed': 1,
         'generate_missing_genes': 1
     })['genome_ref']
     self.__class__.genome_ref = genome_ref
     return genome_ref
    def prepare_data(cls):
        wd = os.getcwd()
        print('WORKING DIRECTORY', wd)
        ru = ReadsUtils(cls.callback_url)
        test_directory_name = 'fama_test_data'
        cls.test_directory_path = os.path.join(cls.scratch,
                                               test_directory_name)
        print('TEST DIRECTORY', cls.test_directory_path)
        os.makedirs(cls.test_directory_path)
        shutil.copy(os.path.join('data', 'test_fastq_pe1.fq'),
                    cls.test_directory_path)
        shutil.copy(os.path.join('data', 'test_fastq_pe2.fq'),
                    cls.test_directory_path)
        shutil.copy(os.path.join('data', 'MR-1.gbff'), cls.test_directory_path)
        shutil.copy(os.path.join('data', 'SB2B.gbff'), cls.test_directory_path)
        reads_params = {
            'fwd_file': os.path.join(cls.test_directory_path,
                                     'test_fastq_pe1.fq'),
            'rev_file': os.path.join(cls.test_directory_path,
                                     'test_fastq_pe2.fq'),
            'sequencing_tech': 'Illumina',
            'wsname': cls.ws_info[1],
            'single_genome': 0,
            'name': 'Fama_test_pe_input',
            'interleaved': 0
        }
        cls.pe_reads_ref = ru.upload_reads(reads_params)

        se_reads_params = {
            'fwd_file': os.path.join(cls.test_directory_path,
                                     'test_fastq_pe1.fq'),
            'sequencing_tech': 'Illumina',
            'wsname': cls.ws_info[1],
            'single_genome': 0,
            'name': 'Fama_test_se_input'
        }
        cls.se_reads_ref = ru.upload_reads(se_reads_params)
        gu = GenomeFileUtil(cls.callback_url)
        genome1_params = {
            'file': {
                'path': os.path.join(cls.test_directory_path, 'MR-1.gbff')
            },
            'genome_name': 'Shewanella_oneidensis_MR1',
            'workspace_name': cls.ws_info[1]
        }
        cls.genome1_ref = gu.genbank_to_genome(genome1_params)['genome_ref']
        genome2_params = {
            'file': {
                'path': os.path.join(cls.test_directory_path, 'SB2B.gbff')
            },
            'genome_name': 'Shewanella_amazonensis_SB2B',
            'workspace_name': cls.ws_info[1]
        }
        cls.genome2_ref = gu.genbank_to_genome(genome2_params)['genome_ref']
        elements = {}
        elements[cls.genome1_ref] = dict()
        elements[cls.genome1_ref]['ref'] = cls.genome1_ref
        elements[cls.genome2_ref] = dict()
        elements[cls.genome2_ref]['ref'] = cls.genome2_ref
        test_GenomeSet = {
            'description': 'Test GenomeSet',
            'elements': elements
        }
        provenance = [{}]
        provenance[0]['input_ws_objects'] = [cls.genome1_ref, cls.genome2_ref]
        provenance[0]['service'] = 'kb_SetUtilities'
        provenance[0]['method'] = 'KButil_Build_GenomeSet'
        genome_set_info = cls.wsClient.save_objects({
            'workspace':
            cls.ws_info[1],
            'objects': [{
                'type': 'KBaseSearch.GenomeSet',
                'data': test_GenomeSet,
                'name': 'Test_GenomeSet',
                'meta': {},
                'provenance': provenance
            }]
        })[0]
        cls.genomeset_ref = "{}/{}/{}".format(genome_set_info[6],
                                              genome_set_info[0],
                                              genome_set_info[4])
        attribute_mapping_data = {
            "attributes": [{
                "attribute": "name",
                "source": "Fama"
            }, {
                "attribute": "description",
                "source": "Fama"
            }, {
                "attribute": "category",
                "source": "Fama"
            }],
            "instances": {
                "AmoA_PmoA": [
                    "AmoA_PmoA",
                    "amoA-pmoA; methane/ammonia monooxygenase subunit A [EC:1.14.18.3 1.14.99.39]",
                    "Ammonium oxidation"
                ],
                "AmoB_PmoB": [
                    "AmoB_PmoB",
                    "amoB-pmoB; methane/ammonia monooxygenase subunit B",
                    "Ammonium oxidation"
                ],
                "AmoC_PmoC": [
                    "AmoC_PmoC",
                    "amoC-pmoC; methane/ammonia monooxygenase subunit C",
                    "Ammonium oxidation"
                ],
                "AnfG_VnfG": [
                    "AnfG_VnfG", "Nitrogenase delta subunit [EC:1.18.6.1]",
                    "Nitrogen fixation"
                ],
                "HAO": [
                    "HAO", "hao; hydroxylamine dehydrogenase [EC:1.7.2.6]",
                    "Anaerobic ammonium oxidation"
                ],
                "Hzo": [
                    "Hzo", "Hydrazine dehydrogenase (EC:1.7.2.8)",
                    "Anaerobic ammonium oxidation"
                ],
                "HzsA": [
                    "HzsA", "Hydrazine synthase subunit A (EC:1.7.2.7)",
                    "Anaerobic ammonium oxidation"
                ],
                "HzsB": [
                    "HzsB", "Hydrazine synthase subunit B (EC:1.7.2.7)",
                    "Anaerobic ammonium oxidation"
                ],
                "HzsC": [
                    "HzsC", "Hydrazine synthase subunit C (EC:1.7.2.7)",
                    "Anaerobic ammonium oxidation"
                ],
                "NapA": [
                    "NapA",
                    "Periplasmic nitrate reductase precursor (EC 1.7.99.4)",
                    "Nitrate dissimilatory reduction"
                ],
                "NapB": [
                    "NapB",
                    "Periplasmic nitrate reductase cytochrome c550-type subunit",
                    "Nitrate dissimilatory reduction"
                ],
                "NapC": [
                    "NapC", "Cytochrome c-type protein NapC",
                    "Nitrate dissimilatory reduction"
                ],
                "NapD": [
                    "NapD", "Periplasmic nitrate reductase component NapD",
                    "Nitrate dissimilatory reduction"
                ],
                "NapE": [
                    "NapE", "Periplasmic nitrate reductase component NapE",
                    "Nitrate dissimilatory reduction"
                ],
                "NapF": [
                    "NapF",
                    "Ferredoxin-type protein NapF (periplasmic nitrate reductase)",
                    "Nitrate dissimilatory reduction"
                ],
                "NapG": [
                    "NapG",
                    "Ferredoxin-type protein NapG (periplasmic nitrate reductase)",
                    "Nitrate dissimilatory reduction"
                ],
                "NapH": [
                    "NapH",
                    "Polyferredoxin NapH (periplasmic nitrate reductase)",
                    "Nitrate dissimilatory reduction"
                ],
                "NapK": [
                    "NapK", "Periplasmic nitrate reductase component NapK",
                    "Nitrate dissimilatory reduction"
                ],
                "NapL": [
                    "NapL", "Periplasmic nitrate reductase component NapL",
                    "Nitrate dissimilatory reduction"
                ],
                "NarC": [
                    "NarC",
                    "Respiratory nitrate reductase subunit, conjectural (EC 1.7.99.4)",
                    "Nitrate dissimilatory reduction"
                ],
                "NarG_NxrA": [
                    "NarG_NxrA",
                    "narG, narZ, nxrA; nitrate reductase / nitrite oxidoreductase, alpha subunit [EC:1.7.5.1 1.7.99.-]",
                    "Nitrate dissimilatory reduction"
                ],
                "NarH_NxrB": [
                    "NarH_NxrB",
                    "narH, narY, nxrB; nitrate reductase / nitrite oxidoreductase, beta subunit [EC:1.7.5.1 1.7.99.-]",
                    "Nitrate dissimilatory reduction"
                ],
                "NarI": [
                    "NarI",
                    "Respiratory nitrate reductase gamma chain (EC 1.7.99.4)",
                    "Nitrate dissimilatory reduction"
                ],
                "NarJ": [
                    "NarJ",
                    "narJ, narW; nitrate reductase molybdenum cofactor assembly chaperone NarJ/NarW",
                    "Nitrate dissimilatory reduction"
                ],
                "NasA": [
                    "NasA",
                    "Assimilatory nitrate reductase large subunit (EC:1.7.99.4)",
                    "Nitrate assimilatory reduction"
                ],
                "NasB": [
                    "NasB",
                    "nasB; assimilatory nitrate reductase NADH oxidase subunit [EC:1.7.99.-]",
                    "Nitrate assimilatory reduction"
                ],
                "NasI": [
                    "NasI",
                    "assimilatory nitrate reductase, clostridial, electron transfer subunit [EC:1.7.99.-]",
                    "Nitrite assimilation"
                ],
                "NasJ": [
                    "NasJ",
                    "assimilatory nitrate reductase, clostridial, NADH oxidase subunit [EC:1.7.99.-]",
                    "Nitrite assimilation"
                ],
                "NifB": [
                    "NifB", "nifB; nitrogen fixation protein NifB",
                    "Nitrogen fixation"
                ],
                "NifD_AnfD_VnfD": [
                    "NifD_AnfD_VnfD",
                    "Nitrogenase alpha chain (EC 1.18.6.1) NifD/AnfD/VnfD",
                    "Nitrogen fixation"
                ],
                "NifH_AnfH_VnfH": [
                    "NifH_AnfH_VnfH",
                    "Nitrogenase reductase and maturation protein NifH/AnfH/VnfH",
                    "Nitrogen fixation"
                ],
                "NifK_AnfK_VnfK": [
                    "NifK_AnfK_VnfK",
                    "Nitrogenase beta chain (EC 1.18.6.1) NifK/AnfK/VnfK",
                    "Nitrogen fixation"
                ],
                "NirA": [
                    "NirA", "nirA; ferredoxin-nitrite reductase [EC:1.7.7.1]",
                    "Nitrite assimilation"
                ],
                "NirB": [
                    "NirB",
                    "nirB; nitrite reductase (NADH) large subunit [EC:1.7.1.15]",
                    "Nitrite assimilation"
                ],
                "NirB3": [
                    "NirB3", "Cytochrome c-552 precursor NirB",
                    "Denitrification"
                ],
                "NirC": ["NirC", "nirC; cytochrome c55X", "Denitrification"],
                "NirD": [
                    "NirD",
                    "nirD; nitrite reductase (NADH) small subunit [EC:1.7.1.15]",
                    "Nitrite assimilation"
                ],
                "NirK": [
                    "NirK",
                    "nirK; nitrite reductase (NO-forming) [EC:1.7.2.1]",
                    "Denitrification"
                ],
                "NirM": ["NirM", "Cytochrome c551 NirM", "Denitrification"],
                "NirN": [
                    "NirN",
                    "Nitrite reductase associated c-type cytochorome NirN",
                    "Denitrification"
                ],
                "NirS": [
                    "NirS",
                    "nirS; Cytochrome cd1 nitrite reductase (NO-forming) / hydroxylamine reductase [EC:1.7.2.1 1.7.99.1]",
                    "Denitrification"
                ],
                "NirT":
                ["NirT", "Cytochrome c-type protein NirT", "Denitrification"],
                "NirU": [
                    "NirU",
                    "assimilatory nitrite reductase, putative NADH oxidase subunit [EC:1.7.99.-]",
                    "Nitrite assimilation"
                ],
                "NosZ": [
                    "NosZ", "nosZ; nitrous-oxide reductase [EC:1.7.2.4]",
                    "Denitrification"
                ],
                "NrfA": [
                    "NrfA",
                    "nrfA; nitrite reductase (cytochrome c-552) [EC:1.7.2.2]",
                    "Ammonification"
                ],
                "NrfB": [
                    "NrfB", "nrfB; cytochrome c-type protein NrfB",
                    "Ammonification"
                ],
                "NrfC": ["NrfC", "nrfC; protein NrfC", "Ammonification"],
                "NrfD": ["NrfD", "nrfD; protein NrfD", "Ammonification"],
                "NrfH": [
                    "NrfH",
                    "nrfH; cytochrome c nitrite reductase small subunit",
                    "Ammonification"
                ],
                "UreA":
                ["UreA", "ureA; urease subunit gamma [EC:3.5.1.5]", "Urease"],
                "UreB":
                ["UreB", "ureB; urease subunit beta [EC:3.5.1.5]", "Urease"],
                "UreC":
                ["UreC", "ureC; urease subunit alpha [EC:3.5.1.5]", "Urease"],
                "cNor-C": [
                    "cNor-C", "Nitric-oxide reductase subunit C (EC 1.7.99.7)",
                    "Denitrification"
                ],
                "cNorB_qNor": [
                    "cNorB_qNor", "Nitric-oxide reductase (EC 1.7.99.7)",
                    "Denitrification"
                ]
            },
            "ontology_mapping_method":
            "User curation"
        }
        am_info = cls.wsClient.save_objects({
            'workspace':
            cls.ws_info[1],
            'objects': [{
                'type': 'KBaseExperiments.AttributeMapping',
                'data': attribute_mapping_data,
                'name': 'Test_row_AttributeMapping',
                'meta': {},
                'provenance': [{}]
            }]
        })[0]
        row_attribute_mapping_ref = "{}/{}/{}".format(am_info[6], am_info[0],
                                                      am_info[4])
        attribute_mapping_data = {
            "attributes": [{
                "attribute": "sample_id",
                "source": "KBase"
            }],
            "instances": {
                "Fama_test_dummy_id1": ["Fama_test_dummy_id1"],
                "Fama_test_dummy_id2": ["Fama_test_dummy_id2"]
            },
            "ontology_mapping_method": "User curation"
        }
        am_info = cls.wsClient.save_objects({
            'workspace':
            cls.ws_info[1],
            'objects': [{
                'type': 'KBaseExperiments.AttributeMapping',
                'data': attribute_mapping_data,
                'name': 'Test_col_AttributeMapping',
                'meta': {},
                'provenance': [{}]
            }]
        })[0]
        col_attribute_mapping_ref = "{}/{}/{}".format(am_info[6], am_info[0],
                                                      am_info[4])
        trait_matrix_data = {
            "col_attributemapping_ref": col_attribute_mapping_ref,
            "data": {
                "col_ids": ["Fama_test_dummy_id1", "Fama_test_dummy_id2"],
                "row_ids": [
                    "AmoA_PmoA", "AmoB_PmoB", "AmoC_PmoC", "HAO", "HzsA",
                    "NapA", "NapB", "NapC", "NapD", "NapF", "NapG", "NapH",
                    "NapL", "NarC", "NarG_NxrA", "NarH_NxrB", "NarI", "NarJ",
                    "NasA", "NasB", "NasI", "NasJ", "NifB", "NifD_AnfD_VnfD",
                    "NifH_AnfH_VnfH", "NifK_AnfK_VnfK", "NirA", "NirB", "NirC",
                    "NirD", "NirK", "NirM", "NirN", "NirS", "NirT", "NirU",
                    "NosZ", "NrfA", "NrfB", "NrfC", "NrfD", "NrfH", "UreA",
                    "UreB", "UreC", "cNor-C", "cNorB_qNor"
                ],
                "values": [[29.0, 1862.0], [9.0, 1502.0], [20.0, 1775.0],
                           [1.0, 6.0], [0.0, 1.0], [10.0, 335.0], [0.0, 11.0],
                           [1.0, 16.0], [0.0, 10.0], [1.0, 27.0], [4.0, 47.0],
                           [0.0, 26.0], [0.0, 1.0], [5.0,
                                                     424.0], [86.0, 8420.0],
                           [46.0, 5446.0], [7.0, 128.0], [3.0, 45.0],
                           [157.0, 5582.0], [12.0, 168.0], [0.0, 2.0],
                           [0.0, 2.0], [1.0, 0.0], [0.0, 3.0], [1.0, 0.0],
                           [2.0, 3.0], [129.0, 9531.0], [73.0, 994.0],
                           [0.0, 2.0], [19.0, 1622.0], [103.0, 10225.0],
                           [0.0, 2.0], [0.0, 9.0], [0.0, 16.0], [0.0, 3.0],
                           [8.0, 22.0], [4.0, 149.0], [7.0, 76.0], [0.0, 1.0],
                           [0.0, 83.0], [0.0, 18.0], [0.0, 34.0],
                           [80.0, 2687.0], [83.0, 2689.0], [246.0, 10558.0],
                           [4.0, 159.0], [19.0, 523.0]]
            },
            "row_attributemapping_ref": row_attribute_mapping_ref,
            "scale": "raw"
        }
        tm_info = cls.wsClient.save_objects({
            'workspace':
            cls.ws_info[1],
            'objects': [{
                'type': 'KBaseMatrices.TraitMatrix',
                'data': trait_matrix_data,
                'name': 'Test_TraitMatrix',
                'meta': {},
                'provenance': [{}]
            }]
        })[0]
        trait_matrix_ref = "{}/{}/{}".format(tm_info[6], tm_info[0],
                                             tm_info[4])
        func_profile_data = {
            "base_object_ref": trait_matrix_ref,
            "data": {
                "col_ids": ["Fama_test_dummy_id1", "Fama_test_dummy_id2"],
                "row_ids": [
                    "AmoA_PmoA", "AmoB_PmoB", "AmoC_PmoC", "HAO", "HzsA",
                    "NapA", "NapB", "NapC", "NapD", "NapF", "NapG", "NapH",
                    "NapL", "NarC", "NarG_NxrA", "NarH_NxrB", "NarI", "NarJ",
                    "NasA", "NasB", "NasI", "NasJ", "NifB", "NifD_AnfD_VnfD",
                    "NifH_AnfH_VnfH", "NifK_AnfK_VnfK", "NirA", "NirB", "NirC",
                    "NirD", "NirK", "NirM", "NirN", "NirS", "NirT", "NirU",
                    "NosZ", "NrfA", "NrfB", "NrfC", "NrfD", "NrfH", "UreA",
                    "UreB", "UreC", "cNor-C", "cNorB_qNor"
                ],
                "values": [[0.5877623222166435, 0.6194552807793001],
                           [0.18772393783341546, 0.5158019621326551],
                           [0.42093693109943287, 0.6389877038101813],
                           [0.01052927042506581, 9.44285005365384E-4],
                           [0.0, 1.163272453732729E-4],
                           [0.07533916331087316, 0.03985967408079665],
                           [0.0, 0.004004703811676381],
                           [0.023525707440798897, 0.005726992499611547],
                           [0.0, 0.005001872240847319],
                           [0.03374102192943467, 0.012378733183834723],
                           [0.09317287278448243, 0.01636684276395418],
                           [0.0, 0.007156293503795473],
                           [0.0, 2.558288744954014E-4],
                           [0.09551482894790636, 0.11359136011797127],
                           [0.4838004401128097, 0.7117012180568053],
                           [0.606624219369047, 1.1214863307558443],
                           [0.14386930938552234, 0.039556844643411124],
                           [0.06703428893835078, 0.013033272059964287],
                           [1.2377859668457405, 0.6541740711814014],
                           [0.1554388223417535, 0.025585775284706288],
                           [0.0, 4.157415511565216E-4],
                           [0.0, 8.79384674326921E-4],
                           [0.011357985203073509, 0.0],
                           [0.0, 5.408864203344862E-4],
                           [0.017785876274710746, 0.0],
                           [0.023945607737196006, 5.321452308325661E-4],
                           [1.3691533051493565, 1.5159715196738617],
                           [0.5575869192146357, 0.11053408671640833],
                           [0.0, 0.001003764424584737],
                           [0.6020807778521705, 0.7245394400294567],
                           [1.2760844699978113, 2.028265195299676],
                           [0.0, 8.562698274159332E-4],
                           [0.0, 0.0016013558654137605],
                           [0.0, 0.002541269397417341],
                           [0.0, 0.001148325643963228],
                           [0.08602893305399109, 0.0036767738796032027],
                           [0.035240090231966045, 0.020453010032479176],
                           [0.08339398252207368, 0.013841118202571863],
                           [0.0, 3.631862195001992E-4],
                           [0.0, 0.02686595241534706],
                           [0.0, 0.004760523591057187],
                           [0.0, 0.013633043040963725],
                           [2.4590146922690415, 1.1212330338379586],
                           [2.1843782229213273, 1.0658240082728703],
                           [2.5770680795310428, 1.662618906136703],
                           [0.08799907015919287, 0.050780650063447744],
                           [0.17980007974121962, 0.06499031332293298]]
            },
            "profile_category": "community",
            "profile_type": "sequence reads"
        }
        fp_info = cls.wsClient.save_objects({
            'workspace':
            cls.ws_info[1],
            'objects': [{
                'type': 'KBaseProfile.FunctionalProfile',
                'data': func_profile_data,
                'name': 'Test_FunctionalProfile',
                'meta': {},
                'provenance': [{}]
            }]
        })[0]
        cls.func_profile_ref = "{}/{}/{}".format(fp_info[6], fp_info[0],
                                                 fp_info[4])
示例#6
0
class ImportGenbankUtil:
    def __init__(self, config):
        self.callback_url = config['SDK_CALLBACK_URL']
        self.token = config['KB_AUTH_TOKEN']
        self.scratch = os.path.join(config['scratch'],
                                    'import_GenBank_' + str(uuid.uuid4()))
        handler_utils._mkdir_p(self.scratch)
        self.dfu = DataFileUtil(self.callback_url)
        self.gfu = GenomeFileUtil(self.callback_url, service_ver='beta')
        self.uploader_utils = UploaderUtil(config)

    def import_genbank_from_staging(self, params):
        '''
          import_genbank_from_staging: wrapper method for GenomeFileUtil.genbank_to_genome

          required params:
          staging_file_subdir_path - subdirectory file path
          e.g.
            for file: /data/bulk/user_name/file_name
            staging_file_subdir_path is file_name
            for file: /data/bulk/user_name/subdir_1/subdir_2/file_name
            staging_file_subdir_path is subdir_1/subdir_2/file_name
          genome_name - becomes the name of the object
          workspace_name - the name of the workspace it gets saved to.
          source - Source of the file typically something like RefSeq or Ensembl

          optional params:
          release - Release or version number of the data
              per example Ensembl has numbered releases of all their data: Release 31
          generate_ids_if_needed - If field used for feature id is not there,
              generate ids (default behavior is raising an exception)
          genetic_code - Genetic code of organism. Overwrites determined GC from
              taxon object
          type - Reference, Representative or User upload

          return:
          genome_ref: return object reference
        '''

        logging.info(
            '--->\nrunning ImportGenbankUtil.import_genbank_from_staging\n' +
            f'params:\n{json.dumps(params, indent=1)}')

        self.validate_import_genbank_from_staging_params(params)

        download_staging_file_params = {
            'staging_file_subdir_path': params.get('staging_file_subdir_path')
        }
        scratch_file_path = self.dfu.download_staging_file(
            download_staging_file_params).get('copy_file_path')
        file = {'path': scratch_file_path}
        import_genbank_params = params
        import_genbank_params['file'] = file
        del import_genbank_params['staging_file_subdir_path']

        returnVal = self.gfu.genbank_to_genome(import_genbank_params)
        """
        Update the workspace object related meta-data for staged file
        """
        #self.uploader_utils.update_staging_service(
        #    download_staging_file_params.get('staging_file_subdir_path'),
        #    returnVal['genome_ref'])
        return returnVal

    def validate_import_genbank_from_staging_params(self, params):
        """
        validate_import_genbank_from_staging_params:
                    validates params passed to import_genbank_from_staging method
        """
        # check for required parameters
        for p in [
                'staging_file_subdir_path', 'genome_name', 'workspace_name',
                'source'
        ]:
            if p not in params:
                raise ValueError('"' + p +
                                 '" parameter is required, but missing')

    def generate_html_report(self, genome_ref, params):
        """
        _generate_html_report: generate html summary report
        """
        logging.info('start generating html report')
        genome_obj = self.dfu.get_objects({'object_refs': [genome_ref]})
        html_report = list()
        tmp_dir = os.path.join(self.scratch, str(uuid.uuid4()))
        handler_utils._mkdir_p(tmp_dir)
        result_file_path = os.path.join(tmp_dir, 'report.html')

        genome_name = str(genome_obj.get('data')[0].get('info')[1])
        genome_file = params.get('staging_file_subdir_path')

        genome_data = genome_obj.get('data')[0].get('data')
        genome_info = genome_obj.get('data')[0].get('info')
        source = genome_info[10].get('Source')
        num_contigs = genome_info[10].get('Number contigs')
        size = genome_info[10].get('Size')
        gc_content = genome_info[10].get('GC content')
        warnings = genome_data.get('warnings', [])
        feature_counts = sorted(
            list(genome_data.get('feature_counts', {}).items()))

        genome_overview_data = collections.OrderedDict()

        genome_overview_data['Name'] = '{} ({})'.format(
            genome_name, genome_ref)
        #genome_overview_data['Uploaded File'] = genome_file
        genome_overview_data['Date Uploaded'] = time.strftime("%c")
        genome_overview_data['Source'] = source
        genome_overview_data['Number of Contigs'] = num_contigs
        genome_overview_data['Size'] = size
        genome_overview_data['GC Content'] = gc_content
        genome_overview_data['Warnings'] = "\n".join(warnings)
        genome_overview_data.update(feature_counts)

        overview_content = ''
        overview_content += '<br/><table>\n'
        for key, val in genome_overview_data.items():
            overview_content += '<tr><td><b>{}</b></td>'.format(key)
            overview_content += '<td>{}</td>'.format(val)
            overview_content += '</tr>\n'
        overview_content += '</table>'

        feature_content = str(
            [[str(k), v]
             for k, v in list(genome_data.get('feature_counts', {}).items())
             if k != 'gene'])
        contig_content = str(
            [[str(c), l]
             for c, l in zip(genome_data.get('contig_ids', []),
                             genome_data.get('contig_lengths', []))])
        with open(result_file_path, 'w') as result_file:
            with open(
                    os.path.join(os.path.dirname(__file__), 'report_template',
                                 'report_template_genome.html'),
                    'r') as report_template_file:
                report_template = report_template_file.read()
                report_template = report_template.replace(
                    '<p>Overview_Content</p>', overview_content)
                report_template = report_template.replace(
                    '*FEATURE_DATA*', feature_content)
                report_template = report_template.replace(
                    '*CONTIG_DATA*', contig_content)
                result_file.write(report_template)
        result_file.close()

        report_shock_id = self.dfu.file_to_shock({
            'file_path': tmp_dir,
            'pack': 'zip'
        })['shock_id']

        html_report.append({
            'shock_id':
            report_shock_id,
            'name':
            os.path.basename(result_file_path),
            'label':
            os.path.basename(result_file_path),
            'description':
            'HTML summary report for imported Genome'
        })
        return html_report

    def generate_report(self, genome_ref, params):
        """
        :param genome_ref:  Return Val from GenomeFileUtil for Uploaded genome
                            Need to get report warnings and message from it.
        :return:
        """
        uuid_string = str(uuid.uuid4())

        objects_created = [{
            'ref': genome_ref,
            'description': 'Imported Genome'
        }]

        output_html_files = self.generate_html_report(genome_ref, params)
        report_params = {
            'message': '',
            'workspace_name': params.get('workspace_name'),
            'objects_created': objects_created,
            'html_links': output_html_files,
            'direct_html_link_index': 0,
            'html_window_height': 300,
            'report_object_name': 'kb_genome_upload_report_' + uuid_string
        }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output