示例#1
0
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            cls.token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'GenericsAPI',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenericsAPI(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.shockURL = cls.cfg['shock-url']
        cls.dfu = DataFileUtil(cls.callback_url)
        cls.sample_uploader = sample_uploader(cls.callback_url,
                                              service_ver="dev")
        cls.sample_url = cls.cfg.get('kbase-endpoint') + '/sampleservice'
        cls.sample_ser = SampleService(cls.sample_url)
        cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                               token=cls.token)

        suffix = int(time.time() * 1000)
        cls.wsName = "test_GenericsAPI_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.wsId = ret[0]

        small_file = os.path.join(cls.scratch, 'test.txt')
        with open(small_file, "w") as f:
            f.write("empty content")
        cls.test_shock = cls.dfu.file_to_shock({
            'file_path': small_file,
            'make_handle': True
        })
        cls.handles_to_delete = []
        cls.nodes_to_delete = []
        cls.handles_to_delete.append(cls.test_shock['handle']['hid'])
        cls.nodes_to_delete.append(cls.test_shock['shock_id'])

        cls.prepare_data()
 def setUpClass(cls):
     token = environ.get('KB_AUTH_TOKEN', None)
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('kb_Bowtie2'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'kb_Bowtie2',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL)
     cls.serviceImpl = kb_Bowtie2(cls.cfg)
     cls.scratch = cls.cfg['scratch']
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
示例#3
0
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('MiscIndexer'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        # authServiceUrl = cls.cfg['auth-service-url']
        # auth_client = _KBaseAuth(authServiceUrl)
        # user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.scratch = cls.cfg['scratch']
        cls.cfg['token'] = cls.token
        cls.upa = '1/2/3'
        cls.test_dir = os.path.dirname(os.path.abspath(__file__))
        cls.mock_dir = os.path.join(cls.test_dir, 'mock_data')

        cls.assemblyobj = cls.read_mock('assembly_object.json')
        cls.narobj = cls.read_mock('narrative_object.json')
        cls.pairedend = cls.read_mock('pairedend_object.json')
        cls.singleend = cls.read_mock('singleend_object.json')
        cls.ontology = cls.read_mock('ontology_object.json')
        cls.pangenome = cls.read_mock('pangenome_object.json')
        cls.rnaseqsampleset = cls.read_mock('rnaseqsampleset_object.json')
        cls.taxon = cls.read_mock('taxon_object.json')
        cls.tree = cls.read_mock('tree_object.json')
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'provenance': [{
                'service': 'GenomeFileUtil',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.ws = workspaceService(cls.wsURL, token=token)
        cls.gaa = GenomeAnnotationAPI(os.environ['SDK_CALLBACK_URL'])
        cls.serviceImpl = GenomeFileUtil(cls.cfg)

        # create one WS for all tests
        suffix = int(time.time() * 1000)
        wsName = "test_GenomeAnnotationAPI_" + str(suffix)
        ret = cls.ws.create_workspace({'workspace': wsName})
        cls.wsName = wsName
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('kb_GenomeIndexer'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        # authServiceUrl = cls.cfg['auth-service-url']
        # auth_client = _KBaseAuth(authServiceUrl)
        # user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.scratch = cls.cfg['scratch']
        cls.cfg['token'] = cls.token
        cls.upa = '1/2/3'
        cls.upa2 = '15792/2/12'
        cls.test_dir = os.path.dirname(os.path.abspath(__file__))
        cls.mock_dir = os.path.join(cls.test_dir, 'mock_data')

        cls.wsinfo = cls.read_mock('get_workspace_info.json')
        cls.genobj = cls.read_mock('genome_object.json')
 def setUpClass(cls):
     token = environ.get('KB_AUTH_TOKEN', None)
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('kb_orthofinder'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({'token': token,
                     'user_id': user_id,
                     'provenance': [
                         {'service': 'kb_orthofinder',
                          'method': 'annotate_plant_transcripts',
                          'method_params': []
                          }],
                     'authenticated': 1})
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL)
     cls.serviceImpl = kb_orthofinder(cls.cfg)
     cls.scratch = cls.cfg['scratch']
     cls.test_data = cls.cfg['test_data']
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
     cls.gfu = GenomeFileUtil(cls.callback_url)
     cls.dfu = DataFileUtil(cls.callback_url)
     cls.genome = "Test_Genome"
     cls.prepare_data()
示例#7
0
 def setUpClass(cls):
     token = environ.get('KB_AUTH_TOKEN', None)
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('AssemblyUtil'):
         cls.cfg[nameval[0]] = nameval[1]
     authServiceUrl = cls.cfg.get(
         'auth-service-url',
         'https://kbase.us/services/authorization/Sessions/Login')
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'user_id':
         user_id,
         'provenance': [{
             'service': 'AssemblyUtil',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = AssemblyUtil(cls.cfg)
示例#8
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({'token': token,
                     'provenance': [
                         {'service': 'GenomeFileUtil',
                          'method': 'please_never_use_it_in_production',
                          'method_params': []
                          }],
                     'authenticated': 1})
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('GenomeFileUtil'):
         cls.cfg[nameval[0]] = nameval[1]
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = GenomeFileUtil(cls.cfg)
     suffix = int(time.time() * 1000)
     cls.wsName = "test_GenomeFileUtil_" + str(suffix)
     ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
     cls.genome_ref = 'KBaseExampleData/Escherichia_coli_K-12_MG1655'
 def setUpClass(cls):
     cls.token = environ.get('KB_AUTH_TOKEN', None)
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)  # type: ignore
     for nameval in config.items('GenomeFileUtil'):
         cls.cfg[nameval[0]] = nameval[1]
     authServiceUrl = cls.cfg.get(
         'auth-service-url',
         "https://kbase.us/services/authorization/Sessions/Login")
     auth_client = _KBaseAuth(authServiceUrl)
     cls.user_id = auth_client.get_user(cls.token)
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         cls.token,
         'user_id':
         cls.user_id,
         'provenance': [{
             'service': 'GenomeFileUtil',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL, token=cls.token)
     cls.serviceImpl = GenomeFileUtil(cls.cfg)
     cls.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL'], token=cls.token)
     cls.scratch = cls.cfg['scratch']
     cls.shockURL = cls.cfg['shock-url']
     cls.gfu_cfg = SDKConfig(cls.cfg)
     cls.prepare_data()
示例#10
0
    def setUpClass(cls):
        token = os.environ.get('KB_AUTH_TOKEN', None)
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'GenericsAPI',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenericsAPI(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.dfu = DataFileUtil(cls.callback_url)
        cls.pca_util = PCAUtil(cls.cfg)

        suffix = int(time.time() * 1000)
        cls.wsName = "test_pca_util_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.wsId = ret[0]
示例#11
0
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsIndexer'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        # authServiceUrl = cls.cfg['auth-service-url']
        # auth_client = _KBaseAuth(authServiceUrl)
        # user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.scratch = cls.cfg['scratch']
        cls.cfg['token'] = cls.token
        cls.upa = '1/2/3'
        cls.test_dir = os.path.dirname(os.path.abspath(__file__))
        cls.mock_dir = os.path.join(cls.test_dir, 'data')

        cls.amplicon_matrix = cls.read_mock('AmpliconMatrix.json')
        cls.attribute_mapping = cls.read_mock('AttributeMapping.json')
        cls.parsed_attribute_mapping = cls.read_mock(
            'ParsedAttributeMapping.json')
示例#12
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx['token'] = token
     cls.ctx.update({
         'token':
         token,
         'provenance': [{
             'service': 'GenomeFileUtil',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('GenomeFileUtil'):
         cls.cfg[nameval[0]] = nameval[1]
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = GenomeFileUtil(cls.cfg)
示例#13
0
 def setUpClass(cls):
     token = environ.get('KB_AUTH_TOKEN', None)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({'token': token,
                     'provenance': [
                         {'service': 'hipmer',
                          'method': 'please_never_use_it_in_production',
                          'method_params': []
                          }],
                     'authenticated': 1})
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('hipmer'):
         cls.cfg[nameval[0]] = nameval[1]
     cls.wsURL = cls.cfg['workspace-url']
     cls.ws = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = hipmer(cls.cfg)
     cls.shockURL = cls.cfg['shock-url']
     cls.handleURL = cls.cfg['handle-service-url']
     cls.scratch = cls.cfg['scratch']
     print("shock %s" % (cls.shockURL))
    def setUpClass(cls):
        print('setting up class')
        token = environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'provenance': [{
                'service': 'GenomeFileUtil',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']

        cls.ws = workspaceService(cls.wsURL, token=token)
        cls.impl = GenomeFileUtil(cls.cfg)

        cls.MINIMAL_TEST_FILE = os.path.join(cls.cfg['scratch'],
                                             'minimal.gbff')
        shutil.copy('data/minimal.gbff', cls.MINIMAL_TEST_FILE)
 def setUpClass(cls):
     config_file = environ.get("KB_DEPLOYMENT_CONFIG", None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items("ProkkaAnnotation"):
         cls.cfg[nameval[0]] = nameval[1]
     # Token validation
     token = environ.get("KB_AUTH_TOKEN", None)
     authServiceUrl = cls.cfg.get("auth-service-url",
                                  "https://kbase.us/services/authorization/Sessions/Login")
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don"t call any logging methods on the context object,
     # it"ll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({"token": token,
                     "user_id": user_id,
                     "provenance": [
                         {"service": "ProkkaAnnotation",
                          "method": "please_never_use_it_in_production",
                          "method_params": []
                          }],
                     "authenticated": 1})
     cls.wsURL = cls.cfg["workspace-url"]
     cls.wsClient = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = ProkkaAnnotation(cls.cfg)
    def setUpClass(cls):
        cls.maxDiff = 70000
        cls.token = os.environ.get('KB_AUTH_TOKEN', None)
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': cls.token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'GenericsAPI',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenericsAPI(cls.cfg)
        cls.serviceUtils = AttributesUtil(cls.cfg)
        cls.shockURL = cls.cfg['shock-url']
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.dfu = DataFileUtil(cls.callback_url)
        cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                               token=cls.token)

        suffix = int(time.time() * 1000)
        cls.wsName = "test_CompoundSetUtils_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.wsId = ret[0]
        cls.attribute_mapping = json.load(open('data/AM1.json'))
        info = cls.dfu.save_objects({
            "id": cls.wsId,
            "objects": [{
                "type": "KBaseExperiments.AttributeMapping",
                "data": cls.attribute_mapping,
                "name": "test_cond_set"
            }]
        })[0]
        cls.attribute_mapping_ref = "%s/%s/%s" % (info[6], info[0], info[4])
        cls.attribute_mapping_2 = json.load(open('data/AM2.json'))

        small_file = os.path.join(cls.scratch, 'test.txt')
        with open(small_file, "w") as f:
            f.write("empty content")
        cls.test_shock = cls.dfu.file_to_shock({'file_path': small_file, 'make_handle': True})
        cls.handles_to_delete = []
        cls.nodes_to_delete = []
        cls.handles_to_delete.append(cls.test_shock['handle']['hid'])
        cls.nodes_to_delete.append(cls.test_shock['shock_id'])
示例#17
0
    def setUpClass(cls):
        token = os.environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'provenance': [
                            {'service': 'GenomeFileUtil',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = GenomeFileUtil(cls.cfg)
        gff_path = "data/fasta_gff/RefSeq/Bacterial_Data/NC_021490.gff.gz"
        fasta_path = "data/fasta_gff/RefSeq/Bacterial_Data/NC_021490.fasta.gz"
        ws_obj_name = 'fungal_model'
        suffix = int(time.time() * 1000)
        cls.wsName = "test_GenomeFileUtil_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})

        print('Uploading GFF file')
        result = cls.serviceImpl.fasta_gff_to_genome(
            cls.ctx,
            {
                'workspace_name': cls.wsName,
                'genome_name': 'MyGenome',
                'fasta_file': {'path': fasta_path},
                'gff_file': {'path': gff_path},
                'source': 'GFF',
                'type': 'Reference'
            })[0]
        data_file_cli = DataFileUtil(os.environ['SDK_CALLBACK_URL'])
        cls.genome_orig = data_file_cli.get_objects(
            {'object_refs': [result['genome_ref']]})['data'][0]['data']

        print('testing GFF download by building the file')
        down_result = cls.serviceImpl.genome_to_gff(
            cls.ctx, {'genome_ref': result['genome_ref']})[0]

        print('Reuploading GFF file')
        new_result = cls.serviceImpl.fasta_gff_to_genome(
            cls.ctx,
            {
                'workspace_name': cls.wsName,
                'genome_name': 'MyGenome',
                'fasta_file': {'path': fasta_path},
                'gff_file': {'path': down_result['file_path']},
                'source': 'GFF',
                'type': 'Reference'
            })[0]
        cls.genome_new = data_file_cli.get_objects({'object_refs': [new_result['genome_ref']]})['data'][0]['data']
 def __init__(self, config):
     #BEGIN_CONSTRUCTOR
     self.config = config
     self.config['SDK_CALLBACK_URL'] = os.environ['SDK_CALLBACK_URL']
     self.config['KB_AUTH_TOKEN'] = os.environ['KB_AUTH_TOKEN']
     self.ws_client = workspaceService(config["workspace-url"])
     #END_CONSTRUCTOR
     pass
 def __init__(self, config):
     self.callback_url = config['SDK_CALLBACK_URL']
     self.scratch = config['scratch']
     self.shock_url = config['shock-url']
     self.dfu = DataFileUtil(self.callback_url)
     self.au = AssemblyUtil(self.callback_url)
     self.setapi = SetAPI(self.callback_url)
     self.wss = workspaceService(config['workspace-url'])
示例#20
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('SetAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        authServiceUrl = cls.cfg.get(
            'auth-service-url',
            "https://kbase.us/services/authorization/Sessions/Login")
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'SetAPI',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = SetAPI(cls.cfg)

        # setup data at the class level for now (so that the code is run
        # once for all tests, not before each test case.  Not sure how to
        # do that outside this function..)
        suffix = int(time.time() * 1000)
        wsName = "test_SetAPI_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL'])
        [info1, info2, info3] = foft.create_fake_reads({
            'ws_name':
            wsName,
            'obj_names': ['reads1', 'reads2', 'reads3']
        })
        cls.read1ref = str(info1[6]) + '/' + str(info1[0]) + '/' + str(
            info1[4])
        cls.read2ref = str(info2[6]) + '/' + str(info2[0]) + '/' + str(
            info2[4])
        cls.read3ref = str(info3[6]) + '/' + str(info3[0]) + '/' + str(
            info3[4])

        cls.fake_sampleset_ref = make_fake_sampleset(
            "test_sampleset", [cls.read1ref, cls.read2ref, cls.read3ref],
            ['wt', 'cond1', 'cond2'], cls.wsName, cls.wsClient)
示例#21
0
 def __init__(self, config):
     self.ws_url = config["workspace-url"]
     self.callback_url = config['SDK_CALLBACK_URL']
     self.token = config['KB_AUTH_TOKEN']
     self.scratch = config['scratch']
     self.serviceWizardURL = config['srv-wiz-url']
     self.wsClient = workspaceService(self.ws_url, token=self.token)
     self.dfu = DataFileUtil(self.callback_url)
     self.generics_service = GenericsService(self.serviceWizardURL)
示例#22
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('SetAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        authServiceUrl = cls.cfg.get(
            "auth-service-url",
            "https://kbase.us/services/authorization/Sessions/Login")
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'SetAPI',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = SetAPI(cls.cfg)

        # setup data at the class level for now (so that the code is run
        # once for all tests, not before each test case.  Not sure how to
        # do that outside this function..)
        suffix = int(time.time() * 1000)
        wsName = "test_SetAPI_" + str(suffix)
        cls.wsClient.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL'])

        # Make fake genomes
        [fake_genome, fake_genome2] = foft.create_fake_genomes({
            "ws_name":
            wsName,
            "obj_names": ["fake_genome", "fake_genome2"]
        })
        cls.genome_refs = [info_to_ref(fake_genome), info_to_ref(fake_genome2)]

        # Make some fake feature sets
        cls.featureset_refs = [
            make_fake_feature_set("feature_set_{}".format(i),
                                  cls.genome_refs[0], wsName, cls.wsClient)
            for i in range(3)
        ]
    def setUpClass(cls):
        token = os.environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'provenance': [
                            {'service': 'GenomeFileUtil',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = GenomeFileUtil(cls.cfg)
        # gbk_path = "data/Cyanidioschyzon/Cyanidioschyzon_merolae.ASM9120v1.30.gbff"
        gbk_path = "data/Cyanidioschyzon/Cyanidioschyzon_merolae_one_locus.gbff"
        ws_obj_name = 'Cyanidioschyzon_merolae_duplicate_test_orig'
        suffix = int(time.time() * 1000)
        cls.wsName = "test_GenomeFileUtil_" + str(suffix)
        cls.wsClient.create_workspace({'workspace': cls.wsName})
        result = cls.serviceImpl.genbank_to_genome(cls.ctx, {
            'file': {'path': gbk_path},
            'workspace_name': cls.wsName,
            'genome_name': ws_obj_name,
            'generate_ids_if_needed': 1,
            'taxon_id': '511145',
            'source': "Ensembl user"
        })[0]

        data_file_cli = DataFileUtil(
            os.environ['SDK_CALLBACK_URL'],
            token=cls.ctx['token'],
            service_ver='dev'
        )
        cls.genome_orig = data_file_cli.get_objects({'object_refs': [result['genome_ref']]})['data'][0]['data']
        print('testing Genbank download by building the file')
        cls.serviceImpl.export_genome_as_genbank(cls.ctx, {
            'input_ref': result['genome_ref']})
        new_gbk_path = "/kb/module/work/tmp/Cyanidioschyzon_merolae_duplicate_test_orig/KBase_derived_Cyanidioschyzon_merolae_duplicate_test_orig.gbff"
        new_ws_obj_name = 'Cyanidioschyzon_merolae_duplicate_test_new'
        new_result = cls.serviceImpl.genbank_to_genome(cls.ctx, {
            'file': {'path': new_gbk_path},
            'workspace_name': cls.wsName,
            'genome_name': new_ws_obj_name,
            'generate_ids_if_needed': 1,
            'taxon_id': '511145',
            'source': "Ensembl user"
        })[0]
        cls.genome_new = data_file_cli.get_objects({'object_refs': [new_result['genome_ref']]})['data'][0]['data']
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'provenance': [{
             'service': 'GenomeFileUtil',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('GenomeFileUtil'):
         cls.cfg[nameval[0]] = nameval[1]
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = GenomeFileUtil(cls.cfg)
     gbk_path = "data/Arabidopsis_gbff/A_thaliana_Ensembl_TAIR10_38_chr4_minus_xref.gbff"
     ws_obj_name = 'Yeast_chromosome1'
     suffix = int(time.time() * 1000)
     cls.wsName = "test_GenomeFileUtil_" + str(suffix)
     ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
     result = cls.serviceImpl.genbank_to_genome(
         cls.ctx, {
             'file': {
                 'path': gbk_path
             },
             'workspace_name': cls.wsName,
             'genome_name': ws_obj_name,
             'generate_ids_if_needed': 1,
             'source': "Ensembl"
         })[0]
     #        print("HERE IS THE RESULT:")
     data_file_cli = DataFileUtil(os.environ['SDK_CALLBACK_URL'],
                                  token=cls.ctx['token'],
                                  service_ver='dev')
     cls.genome = data_file_cli.get_objects(
         {'object_refs': [result['genome_ref']]})['data'][0]['data']
     json.dump(
         cls.genome,
         open(cls.cfg['scratch'] + "/relationship_test_genome.json", 'w'))
     cls.gene_ids = set((x['id'] for x in cls.genome['features']))
     cls.nc_feat_ids = set(
         (x['id'] for x in cls.genome['non_coding_features']))
     cls.mrna_ids = set((x['id'] for x in cls.genome['mrnas']))
     cls.cds_ids = set((x['id'] for x in cls.genome['cdss']))
示例#25
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('kb_Msuite'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'kb_Msuite',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = kb_Msuite(cls.cfg)
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.scratch = cls.cfg['scratch']
        cls.suffix = int(time.time() * 1000)
        #cls.scratch = cls.cfg['scratch']+'_'+str(suffix)
        #cls.cfg['scratch'] = cls.scratch
        #if not os.path.exists(cls.scratch):
        #    os.mkdir(cls.scratch)
        cls.checkm_runner = CheckMUtil(cls.cfg, cls.ctx)

        cls.wsName = "test_kb_Msuite_" + str(cls.suffix)
        cls.ws_info = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        cls.setAPI = SetAPI(url=cls.cfg['srv-wiz-url'], token=cls.ctx['token'])
        cls.gfu = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'], service_ver='dev')
        cls.mu = MetagenomeUtils(os.environ['SDK_CALLBACK_URL'])

        # stage an input and output directory
        """
        cls.input_dir = os.path.join(cls.scratch, 'input_1')
        cls.output_dir = os.path.join(cls.scratch, 'output_1')
        cls.all_seq_fasta = os.path.join(cls.scratch, 'all_seq.fna')
        shutil.copytree(os.path.join('data', 'example_out', 'input'), cls.input_dir)
        shutil.copytree(os.path.join('data', 'example_out', 'output'), cls.output_dir)
        shutil.copy(os.path.join('data', 'example_out', 'all_seq.fna'), cls.all_seq_fasta)
        """

        # prepare WS data
        cls.prepare_data()
示例#26
0
 def __init__(self, config):
     #BEGIN_CONSTRUCTOR
     self.shared_folder = config['scratch']
     logging.basicConfig(format='%(created)s %(levelname)s: %(message)s',
                         level=logging.INFO)
     self.config = config
     self.config['SDK_CALLBACK_URL'] = os.environ['SDK_CALLBACK_URL']
     self.config['KB_AUTH_TOKEN'] = os.environ['KB_AUTH_TOKEN']
     self.ws_client = workspaceService(config["workspace-url"])
     #END_CONSTRUCTOR
     pass
示例#27
0
 def setUpClass(cls):
     token = os.environ.get('KB_AUTH_TOKEN', None)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({
         'token':
         token,
         'provenance': [{
             'service': 'GenomeFileUtil',
             'method': 'please_never_use_it_in_production',
             'method_params': []
         }],
         'authenticated':
         1
     })
     config_file = os.environ['KB_DEPLOYMENT_CONFIG']
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('GenomeFileUtil'):
         cls.cfg[nameval[0]] = nameval[1]
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = GenomeFileUtil(cls.cfg)
     gff_path = "data/e_coli/NC_000913.3.gff3"
     fna_path = "data/e_coli/NC_000913.3.fasta"
     # fna_path = "data/e_coli/GCF_000005845.2_ASM584v2.fasta"
     ws_obj_name = 'ecoli_contigs'
     suffix = int(time.time() * 1000)
     cls.wsName = "test_GenomeFileUtil_" + str(suffix)
     cls.wsClient.create_workspace({'workspace': cls.wsName})
     result = cls.serviceImpl.fasta_gff_to_genome(
         cls.ctx, {
             'gff_file': {
                 'path': gff_path
             },
             'fasta_file': {
                 'path': fna_path
             },
             'taxon_id': 511145,
             'workspace_name': cls.wsName,
             'genome_name': ws_obj_name,
             'generate_missing_genes': 1,
             'generate_ids_if_needed': 1
         })[0]
     data_file_cli = DataFileUtil(os.environ['SDK_CALLBACK_URL'],
                                  token=cls.ctx['token'],
                                  service_ver='dev')
     dfu_result = data_file_cli.get_objects(
         {'object_refs': [result['genome_ref']]})
     cls.genome = dfu_result['data'][0]['data']
    def setUpClass(cls):
        cls.maxDiff = 70000
        token = os.environ.get('KB_AUTH_TOKEN', None)
        config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'GenericsAPI',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenericsAPI(cls.cfg)
        cls.serviceUtils = AttributesUtil(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.dfu = DataFileUtil(cls.callback_url)

        suffix = int(time.time() * 1000)
        #cls.wsName = "test_CompoundSetUtils_" + str(suffix)
        cls.wsName = "man4ish_gupta:narrative_1568644342277"
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
        #exit(cls.wsId)
        #cls.wsId = ret[0]
        cls.wsid = 44071
        cls.attribute_mapping = json.load(open('data/AM1.json'))
        info = cls.dfu.save_objects({
            "id": cls.wsId,
            "objects": [{
                "type": "KBaseExperiments.AttributeMapping",
                "data": cls.attribute_mapping,
                "name": "test_cond_set"
            }]
        })[0]
        cls.attribute_mapping_ref = "%s/%s/%s" % (info[6], info[0], info[4])
        cls.attribute_mapping_2 = json.load(open('data/AM2.json'))
示例#29
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'provenance': [
                            {'service': 'GenomeFileUtil',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenomeFileUtil'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.ws = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = GenomeFileUtil(cls.cfg)
        gi_config = SDKConfig(cls.cfg)
        cls.genome_interface = GenomeInterface(gi_config)
        # create one WS for all tests
        suffix = int(time.time() * 1000)
        wsName = "test_GenomeAnnotationAPI_" + str(suffix)
        cls.ws.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        # save new genome
        assembly_file_path = os.path.join(cls.cfg['scratch'],
                                          'Rhodo_SPAdes_assembly.fa')
        shutil.copy('data/Rhodo_SPAdes_assembly.fa', assembly_file_path)
        au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'])
        cls.assembly_ref = au.save_assembly_from_fasta({
            'workspace_name': cls.wsName,
            'assembly_name': 'ecoli.assembly',
            'file': {'path': assembly_file_path}
        })

        rhodobacter_contigs = json.load(open('data/rhodobacter_contigs.json'))
        save_info = {
            'workspace': cls.wsName,
            'objects': [{
                'type': 'KBaseGenomes.ContigSet',
                'data': rhodobacter_contigs,
                'name': 'rhodobacter_contigs'
            }]
        }
        cls.contigset_ref = cls.ws.save_objects(save_info)
示例#30
0
    def __init__(self, config):
        self.scratch = config["scratch"]
        self.ctx = config['ctx']
        self.callback_url = config["SDK_CALLBACK_URL"]

        self.ws_client = workspaceService(config["workspace-url"])
        self.kbr = KBaseReport(self.callback_url)
        self.genome_api = GenomeAnnotationAPI(self.callback_url)
        """
        self.gfu = GenomeFileUtil(self.callback_url)
        self.au = AssemblyUtil(self.callback_url)
        """
        self.dfu = DataFileUtil(self.callback_url)
        self.output_workspace = None
 def setUpClass(cls):
     token = environ.get('KB_AUTH_TOKEN', None)
     cls.ctx = {'token': token, 'provenance': [{'service': 'kb_gblocks',
         'method': 'please_never_use_it_in_production', 'method_params': []}],
         'authenticated': 1}
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('kb_gblocks'):
         cls.cfg[nameval[0]] = nameval[1]
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL, token=token)
     cls.serviceImpl = kb_gblocks(cls.cfg)
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('FeatureSetUtils'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'FeatureSetUtils',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = FeatureSetUtils(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']

        suffix = int(time.time() * 1000)
        cls.wsName = "test_kb_featureset_util_" + str(suffix)
        cls.wsClient.create_workspace({'workspace': cls.wsName})

        cls.gfu = GenomeFileUtil(cls.callback_url)
        cls.dfu = DataFileUtil(cls.callback_url)
        cls.prepare_data()
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'provenance': [
                            {'service': 'AssemblyAPI',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('AssemblyAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = AssemblyAPI(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']

        suffix = int(time.time() * 1000)
        cls.wsName = "test_kb_maxbin_" + str(suffix)
        cls.ws_info = cls.wsClient.create_workspace({'workspace': cls.wsName})

        cls.obj_ref = "7989/489/2"
        cls.contigs = ['NZ_ALQT01000016']

        # create an example Assembly
        cls.au = AssemblyUtil(cls.callback_url)
        assembly_filename = 'test.fa'
        cls.assembly_fasta_file_path = os.path.join(cls.scratch, assembly_filename)
        shutil.copy(os.path.join("data", assembly_filename), cls.assembly_fasta_file_path)

        assembly_params = {
            'file': {'path': cls.assembly_fasta_file_path},
            'workspace_name': cls.wsName,
            'assembly_name': 'MyAssembly'
        }
        cls.assembly_ref_1 = cls.au.save_assembly_from_fasta(assembly_params)
        print('Assembly1:' + cls.assembly_ref_1)

        # create a test legacy contigset
        with open('data/contigset1.json') as file:
            contigset_data = json.load(file)
        saveData = {
            'type': 'KBaseGenomes.ContigSet',
            'data': contigset_data,
            'name': 'contigset'
        }
        info = cls.wsClient.save_objects(
            {'workspace': cls.wsName, 'objects': [saveData]})[0]
        cls.contig_set_ref = f'{info[6]}/{info[0]}/{info[4]}'
        print('ContigSet1:' + cls.contig_set_ref)

        # create a test legacy contigset
        with open('data/contigset2.json') as file:
            contigset_data = json.load(file)
        saveData = {
            'type': 'KBaseGenomes.ContigSet',
            'data': contigset_data,
            'name': 'contigset'
        }
        info = cls.wsClient.save_objects(
            {'workspace': cls.wsName, 'objects': [saveData]})[0]
        cls.contig_set_ref_2 = f'{info[6]}/{info[0]}/{info[4]}'
        print('ContigSet2:' + cls.contig_set_ref_2)
示例#34
0
    def upload_SingleEndLibrary_to_shock_and_ws (self,
                                                 ctx,
                                                 console,  # DEBUG
                                                 workspace_name,
                                                 obj_name,
                                                 file_path,
                                                 provenance,
                                                 sequencing_tech):

        self.log(console,'UPLOADING FILE '+file_path+' TO '+workspace_name+'/'+obj_name)

        # 1) upload files to shock
        token = ctx['token']
        forward_shock_file = self.upload_file_to_shock(
            console,  # DEBUG
            shock_service_url = self.shockURL,
            filePath = file_path,
            token = token
            )
        #pprint(forward_shock_file)
        self.log(console,'SHOCK UPLOAD DONE')

        # 2) create handle
        self.log(console,'GETTING HANDLE')
        hs = HandleService(url=self.handleURL, token=token)
        forward_handle = hs.persist_handle({
                                        'id' : forward_shock_file['id'], 
                                        'type' : 'shock',
                                        'url' : self.shockURL,
                                        'file_name': forward_shock_file['file']['name'],
                                        'remote_md5': forward_shock_file['file']['checksum']['md5']})

        
        # 3) save to WS
        self.log(console,'SAVING TO WORKSPACE')
        single_end_library = {
            'lib': {
                'file': {
                    'hid':forward_handle,
                    'file_name': forward_shock_file['file']['name'],
                    'id': forward_shock_file['id'],
                    'url': self.shockURL,
                    'type':'shock',
                    'remote_md5':forward_shock_file['file']['checksum']['md5']
                },
                'encoding':'UTF8',
                'type':'fasta',
                'size':forward_shock_file['file']['size']
            },
            'sequencing_tech':sequencing_tech
        }
        self.log(console,'GETTING WORKSPACE SERVICE OBJECT')
        ws = workspaceService(self.workspaceURL, token=ctx['token'])
        self.log(console,'SAVE OPERATION...')
        new_obj_info = ws.save_objects({
                        'workspace':workspace_name,
                        'objects':[
                            {
                                'type':'KBaseFile.SingleEndLibrary',
                                'data':single_end_library,
                                'name':obj_name,
                                'meta':{},
                                'provenance':provenance
                            }]
                        })[0]
        self.log(console,'SAVED TO WORKSPACE')

        return new_obj_info[0]
示例#35
0
    def run_Gblocks(self, ctx, params):
        """
        Method for trimming MSAs of either DNA or PROTEIN sequences
        **
        **        input_type: MSA
        **        output_type: MSA
        :param params: instance of type "Gblocks_Params" (Gblocks Input
           Params) -> structure: parameter "workspace_name" of type
           "workspace_name" (** The workspace object refs are of form: ** ** 
           objects = ws.get_objects([{'ref':
           params['workspace_id']+'/'+params['obj_name']}]) ** ** "ref" means
           the entire name combining the workspace id and the object name **
           "id" is a numerical identifier of the workspace or object, and
           should just be used for workspace ** "name" is a string identifier
           of a workspace or object.  This is received from Narrative.),
           parameter "desc" of String, parameter "input_ref" of type
           "data_obj_ref", parameter "output_name" of type "data_obj_name",
           parameter "trim_level" of Long, parameter "min_seqs_for_conserved"
           of Long, parameter "min_seqs_for_flank" of Long, parameter
           "max_pos_contig_nonconserved" of Long, parameter "min_block_len"
           of Long, parameter "remove_mask_positions_flag" of Long
        :returns: instance of type "Gblocks_Output" (Gblocks Output) ->
           structure: parameter "report_name" of type "data_obj_name",
           parameter "report_ref" of type "data_obj_ref"
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN run_Gblocks
        console = []
        invalid_msgs = []
        self.log(console,'Running run_Gblocks with params=')
        self.log(console, "\n"+pformat(params))
        report = ''
#        report = 'Running run_Gblocks with params='
#        report += "\n"+pformat(params)


        #### do some basic checks
        #
        if 'workspace_name' not in params:
            raise ValueError('workspace_name parameter is required')
        if 'input_ref' not in params:
            raise ValueError('input_ref parameter is required')
        if 'output_name' not in params:
            raise ValueError('output_name parameter is required')


        #### Get the input_ref MSA object
        ##
        try:
            ws = workspaceService(self.workspaceURL, token=ctx['token'])
            objects = ws.get_objects([{'ref': params['input_ref']}])
            data = objects[0]['data']
            info = objects[0]['info']
            input_name = info[1]
            input_type_name = info[2].split('.')[1].split('-')[0]

        except Exception as e:
            raise ValueError('Unable to fetch input_ref object from workspace: ' + str(e))
            #to get the full stack trace: traceback.format_exc()

        if input_type_name == 'MSA':
            MSA_in = data
            row_order = []
            default_row_labels = dict()
            if 'row_order' in MSA_in.keys():
                row_order = MSA_in['row_order']
            else:
                row_order = sorted(MSA_in['alignment'].keys())

            if 'default_row_labels' in MSA_in.keys():
                default_row_labels = MSA_in['default_row_labels']
            else:
                for row_id in row_order:
                    default_row_labels[row_id] = row_id
            if len(row_order) < 2:
                self.log(invalid_msgs,"must have multiple records in MSA: "+params['input_ref'])

            # export features to FASTA file
            input_MSA_file_path = os.path.join(self.scratch, input_name+".fasta")
            self.log(console, 'writing fasta file: '+input_MSA_file_path)
            records = []
            for row_id in row_order:
                #self.log(console,"row_id: '"+row_id+"'")  # DEBUG
                #self.log(console,"alignment: '"+MSA_in['alignment'][row_id]+"'")  # DEBUG
            # using SeqIO makes multiline sequences.  (Gblocks doesn't care, but FastTree doesn't like multiline, and I don't care enough to change code)
                #record = SeqRecord(Seq(MSA_in['alignment'][row_id]), id=row_id, description=default_row_labels[row_id])
                #records.append(record)
            #SeqIO.write(records, input_MSA_file_path, "fasta")
                records.extend(['>'+row_id,
                                MSA_in['alignment'][row_id]
                               ])
            with open(input_MSA_file_path,'w',0) as input_MSA_file_handle:
                input_MSA_file_handle.write("\n".join(records)+"\n")


            # Determine whether nuc or protein sequences
            #
            NUC_MSA_pattern = re.compile("^[\.\-_ACGTUXNRYSWKMBDHVacgtuxnryswkmbdhv \t\n]+$")
            all_seqs_nuc = True
            for row_id in row_order:
                #self.log(console, row_id+": '"+MSA_in['alignment'][row_id]+"'")
                if NUC_MSA_pattern.match(MSA_in['alignment'][row_id]) == None:
                    all_seqs_nuc = False
                    break

        # Missing proper input_type
        #
        else:
            raise ValueError('Cannot yet handle input_ref type of: '+type_name)


        # DEBUG: check the MSA file contents
#        with open(input_MSA_file_path, 'r', 0) as input_MSA_file_handle:
#            for line in input_MSA_file_handle:
#                #self.log(console,"MSA_LINE: '"+line+"'")  # too big for console
#                self.log(invalid_msgs,"MSA_LINE: '"+line+"'")


        # validate input data
        #
        N_seqs = 0
        L_first_seq = 0
        with open(input_MSA_file_path, 'r', 0) as input_MSA_file_handle:
            for line in input_MSA_file_handle:
                if line.startswith('>'):
                    N_seqs += 1
                    continue
                if L_first_seq == 0:
                    for c in line:
                        if c != '-' and c != ' ' and c != "\n":
                            L_first_seq += 1
        # min_seqs_for_conserved
        if 'min_seqs_for_conserved' in params and params['min_seqs_for_conserved'] != None and int(params['min_seqs_for_conserved']) != 0:
            if int(params['min_seqs_for_conserved']) < int(0.5*N_seqs)+1:
                self.log(invalid_msgs,"Min Seqs for Conserved Pos ("+str(params['min_seqs_for_conserved'])+") must be >= N/2+1 (N="+str(N_seqs)+", N/2+1="+str(int(0.5*N_seqs)+1)+")\n")
            if int(params['min_seqs_for_conserved']) > int(params['min_seqs_for_flank']):
                self.log(invalid_msgs,"Min Seqs for Conserved Pos ("+str(params['min_seqs_for_conserved'])+") must be <= Min Seqs for Flank Pos ("+str(params['min_seqs_for_flank'])+")\n")

        # min_seqs_for_flank
        if 'min_seqs_for_flank' in params and params['min_seqs_for_flank'] != None and int(params['min_seqs_for_flank']) != 0:
            if int(params['min_seqs_for_flank']) > N_seqs:
                self.log(invalid_msgs,"Min Seqs for Flank Pos ("+str(params['min_seqs_for_flank'])+") must be <= N (N="+str(N_seqs)+")\n")

        # max_pos_contig_nonconserved
        if 'max_pos_contig_nonconserved' in params and params['max_pos_contig_nonconserved'] != None and int(params['max_pos_contig_nonconserved']) != 0:
            if int(params['max_pos_contig_nonconserved']) < 0:
                self.log(invalid_msgs,"Max Num Non-Conserved Pos ("+str(params['max_pos_contig_nonconserved'])+") must be >= 0"+"\n")
            if int(params['max_pos_contig_nonconserved']) > L_first_seq or int(params['max_pos_contig_nonconserved']) >= 32000:
                self.log(invalid_msgs,"Max Num Non-Conserved Pos ("+str(params['max_pos_contig_nonconserved'])+") must be <= L first seq ("+str(L_first_seq)+") and < 32000\n")

        # min_block_len
        if 'min_block_len' in params and params['min_block_len'] != None and int(params['min_block_len']) != 0:
            if int(params['min_block_len']) < 2:
                self.log(invalid_msgs,"Min Block Len ("+str(params['min_block_len'])+") must be >= 2"+"\n")
            if int(params['min_block_len']) > L_first_seq or int(params['min_block_len']) >= 32000:
                self.log(invalid_msgs,"Min Block Len ("+str(params['min_block_len'])+") must be <= L first seq ("+str(L_first_seq)+") and < 32000\n")

        # trim_level
        if 'trim_level' in params and params['trim_level'] != None and int(params['trim_level']) != 0:
            if int(params['trim_level']) < 0 or int(params['trim_level']) > 2:
                self.log(invalid_msgs,"Trim Level ("+str(params['trim_level'])+") must be >= 0 and <= 2"+"\n")


        if len(invalid_msgs) > 0:

            # load the method provenance from the context object
            self.log(console,"SETTING PROVENANCE")  # DEBUG
            provenance = [{}]
            if 'provenance' in ctx:
                provenance = ctx['provenance']
            # add additional info to provenance here, in this case the input data object reference
            provenance[0]['input_ws_objects'] = []
            provenance[0]['input_ws_objects'].append(params['input_ref'])
            provenance[0]['service'] = 'kb_gblocks'
            provenance[0]['method'] = 'run_Gblocks'

            # report
            report += "FAILURE\n\n"+"\n".join(invalid_msgs)+"\n"
            reportObj = {
                'objects_created':[],
                'text_message':report
                }

            reportName = 'gblocks_report_'+str(uuid.uuid4())
            report_obj_info = ws.save_objects({
#                'id':info[6],
                'workspace':params['workspace_name'],
                'objects':[
                    {
                        'type':'KBaseReport.Report',
                        'data':reportObj,
                        'name':reportName,
                        'meta':{},
                        'hidden':1,
                        'provenance':provenance
                    }
                ]
            })[0]


            self.log(console,"BUILDING RETURN OBJECT")
            returnVal = { 'report_name': reportName,
                          'report_ref': str(report_obj_info[6]) + '/' + str(report_obj_info[0]) + '/' + str(report_obj_info[4])
#                          'output_ref': None
                          }
            self.log(console,"run_Gblocks DONE")
            return [returnVal]


        ### Construct the command
        #
        #  e.g.
        #  for "0.5" gaps: cat "o\n<MSA_file>\nb\n5\ng\nm\nq\n" | Gblocks
        #  for "all" gaps: cat "o\n<MSA_file>\nb\n5\n5\ng\nm\nq\n" | Gblocks
        #
        gblocks_cmd = [self.GBLOCKS_bin]

        # check for necessary files
        if not os.path.isfile(self.GBLOCKS_bin):
            raise ValueError("no such file '"+self.GBLOCKS_bin+"'")
        if not os.path.isfile(input_MSA_file_path):
            raise ValueError("no such file '"+input_MSA_file_path+"'")
        if not os.path.getsize(input_MSA_file_path) > 0:
            raise ValueError("empty file '"+input_MSA_file_path+"'")

        # DEBUG
#        with open(input_MSA_file_path,'r',0) as input_MSA_file_handle:
#            for line in input_MSA_file_handle:
#                #self.log(console,"MSA LINE: '"+line+"'")  # too big for console
#                self.log(invalid_msgs,"MSA LINE: '"+line+"'")


        # set the output path
        timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000)
        output_dir = os.path.join(self.scratch,'output.'+str(timestamp))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Gblocks names output blocks MSA by appending "-gb" to input file
        #output_GBLOCKS_file_path = os.path.join(output_dir, input_name+'-gb')
        output_GBLOCKS_file_path = input_MSA_file_path+'-gb'
        output_aln_file_path = output_GBLOCKS_file_path

        # Gblocks is interactive and only accepts args from pipe input
        #if 'arg' in params and params['arg'] != None and params['arg'] != 0:
        #    fasttree_cmd.append('-arg')
        #    fasttree_cmd.append(val)


        # Run GBLOCKS, capture output as it happens
        #
        self.log(console, 'RUNNING GBLOCKS:')
        self.log(console, '    '+' '.join(gblocks_cmd))
#        report += "\n"+'running GBLOCKS:'+"\n"
#        report += '    '+' '.join(gblocks_cmd)+"\n"

        # FastTree requires shell=True in order to see input data
        env = os.environ.copy()
        #joined_fasttree_cmd = ' '.join(fasttree_cmd)  # redirect out doesn't work with subprocess unless you join command first
        #p = subprocess.Popen([joined_fasttree_cmd], \
        p = subprocess.Popen(gblocks_cmd, \
                             cwd = self.scratch, \
                             stdin = subprocess.PIPE, \
                             stdout = subprocess.PIPE, \
                             stderr = subprocess.PIPE, \
                             shell = True, \
                             env = env)
#                             executable = '/bin/bash' )

        
        # write commands to process
        #
        #  for "0.5" gaps: cat "o\n<MSA_file>\nb\n5\ng\nm\nq\n" | Gblocks
        #  for "all" gaps: cat "o\n<MSA_file>\nb\n5\n5\ng\nm\nq\n" | Gblocks

        p.stdin.write("o"+"\n")  # open MSA file
        p.stdin.write(input_MSA_file_path+"\n")

        if 'trim_level' in params and params['trim_level'] != None and int(params['trim_level']) != 0:
            p.stdin.write("b"+"\n")
            if int(params['trim_level']) >= 1:
                self.log (console,"changing trim level")
                p.stdin.write("5"+"\n")  # set to "half"
                if int(params['trim_level']) == 2:
                    self.log (console,"changing trim level")
                    p.stdin.write("5"+"\n")  # set to "all"
                elif int(params['trim_level']) > 2:
                    raise ValueError ("trim_level ("+str(params['trim_level'])+") was not between 0-2")
                p.stdin.write("m"+"\n")

        # flank must precede conserved because it acts us upper bound for acceptable conserved values
        if 'min_seqs_for_flank' in params and params['min_seqs_for_flank'] != None and int(params['min_seqs_for_flank']) != 0:
            self.log (console,"changing min_seqs_for_flank")
            p.stdin.write("b"+"\n")
            p.stdin.write("2"+"\n")
            p.stdin.write(str(params['min_seqs_for_flank'])+"\n")
            p.stdin.write("m"+"\n")

        if 'min_seqs_for_conserved' in params and params['min_seqs_for_conserved'] != None and int(params['min_seqs_for_conserved']) != 0:
            self.log (console,"changing min_seqs_for_conserved")
            p.stdin.write("b"+"\n")
            p.stdin.write("1"+"\n")
            p.stdin.write(str(params['min_seqs_for_conserved'])+"\n")
            p.stdin.write("m"+"\n")

        if 'max_pos_contig_nonconserved' in params and params['max_pos_contig_nonconserved'] != None and int(params['max_pos_contig_nonconserved']) > -1:
            self.log (console,"changing max_pos_contig_nonconserved")
            p.stdin.write("b"+"\n")
            p.stdin.write("3"+"\n")
            p.stdin.write(str(params['max_pos_contig_nonconserved'])+"\n")
            p.stdin.write("m"+"\n")

        if 'min_block_len' in params and params['min_block_len'] != None and params['min_block_len'] != 0:
            self.log (console,"changing min_block_len")
            p.stdin.write("b"+"\n")
            p.stdin.write("4"+"\n")
            p.stdin.write(str(params['min_block_len'])+"\n")
            p.stdin.write("m"+"\n")
        
        p.stdin.write("g"+"\n")  # get blocks
        p.stdin.write("q"+"\n")  # quit
        p.stdin.close()
        p.wait()


        # Read output
        #
        while True:
            line = p.stdout.readline()
            #line = p.stderr.readline()
            if not line: break
            self.log(console, line.replace('\n', ''))

        p.stdout.close()
        #p.stderr.close()
        p.wait()
        self.log(console, 'return code: ' + str(p.returncode))
#        if p.returncode != 0:
        if p.returncode != 1:
            raise ValueError('Error running GBLOCKS, return code: '+str(p.returncode) + 
                '\n\n'+ '\n'.join(console))

        # Check that GBLOCKS produced output
        #
        if not os.path.isfile(output_GBLOCKS_file_path):
            raise ValueError("failed to create GBLOCKS output: "+output_GBLOCKS_file_path)
        elif not os.path.getsize(output_GBLOCKS_file_path) > 0:
            raise ValueError("created empty file for GBLOCKS output: "+output_GBLOCKS_file_path)


        # load the method provenance from the context object
        #
        self.log(console,"SETTING PROVENANCE")  # DEBUG
        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]['input_ws_objects'] = []
        provenance[0]['input_ws_objects'].append(params['input_ref'])
        provenance[0]['service'] = 'kb_gblocks'
        provenance[0]['method'] = 'run_Gblocks'


        # reformat output to single-line FASTA MSA and check that output not empty (often happens when param combinations don't produce viable blocks
        #
        output_fasta_buf = []
        id_order = []
        this_id = None
        ids = dict()
        alignment = dict()
        L_alignment = 0;
        L_alignment_set = False
        with open(output_GBLOCKS_file_path,'r',0) as output_GBLOCKS_file_handle:
            for line in output_GBLOCKS_file_handle:
                line = line.rstrip()
                if line.startswith('>'):
                    this_id = line[1:]
                    output_fasta_buf.append ('>'+re.sub('\s','_',default_row_labels[this_id]))
                    id_order.append(this_id)
                    alignment[this_id] = ''
                    if L_alignment != 0 and not L_alignment_set:
                         L_alignment_set = True
                    continue
                output_fasta_buf.append (line)
                for c in line:
                    if c != ' ' and c != "\n":
                        alignment[this_id] += c
                        if not L_alignment_set:
                            L_alignment += 1
        if L_alignment == 0:
            self.log(invalid_msgs,"params produced no blocks.  Consider changing to less stringent values")
        else:
            if 'remove_mask_positions_flag' in params and params['remove_mask_positions_flag'] != None and params['remove_mask_positions_flag'] != '' and params['remove_mask_positions_flag'] == 1:
                self.log (console,"removing mask positions")
                mask = []
                new_alignment = dict()
                for i in range(0,L_alignment):
                    mask[i] = '+'
                    if alignment[id_order[0]][i] == '-' \
                        or alignment[id_order[0]][i] == 'X' \
                        or alignment[id_order[0]][i] == 'x':
                        mask[i] = '-'
                for row_id in id_order:
                    new_alignment[row_id] = ''
                    for i,c in enumerate(alignment[row_id]):
                         if mask[i] == '+':
                            new_alignment[row_id] += c
                alignment = new_alignment

            L_alignment = len(alignment[id_order[0]])

            # write fasta with tidied ids
            output_MSA_file_path = os.path.join(output_dir, params['output_name']+'.fasta');
            with open(output_MSA_file_path,'w',0) as output_MSA_file_handle:
                output_MSA_file_handle.write("\n".join(output_fasta_buf)+"\n")


        # Upload results
        #
        if len(invalid_msgs) == 0:
            self.log(console,"UPLOADING RESULTS")  # DEBUG

# Didn't write file
#            with open(output_MSA_file_path,'r',0) as output_MSA_file_handle:
#                output_MSA_buf = output_MSA_file_handle.read()
#            output_MSA_buf = output_MSA_buf.rstrip()
#            self.log(console,"\nMSA:\n"+output_MSA_buf+"\n")
        
            # Build output_MSA structure
            #   first extract old info from MSA (labels, ws_refs, etc.)
            #
            MSA_out = dict()
            for key in MSA_in.keys():
                 MSA_out[key] = MSA_in[key]

            # then replace with new info
            #
            MSA_out['alignment'] = alignment
            MSA_out['name'] = params['output_name']
            MSA_out['alignment_length'] = alignment_length = L_alignment
            MSA_name = params['output_name']
            MSA_description = ''
            if 'desc' in params and params['desc'] != None and params['desc'] != '':
                MSA_out['desc'] = MSA_description = params['desc']

            # Store MSA_out
            #
            new_obj_info = ws.save_objects({
                            'workspace': params['workspace_name'],
                            'objects':[{
                                    'type': 'KBaseTrees.MSA',
                                    'data': MSA_out,
                                    'name': params['output_name'],
                                    'meta': {},
                                    'provenance': provenance
                                }]
                        })[0]


            # create CLW formatted output file
            max_row_width = 60
            id_aln_gap_width = 1
            gap_chars = ''
            for sp_i in range(id_aln_gap_width):
                gap_chars += ' '
            # DNA
            if all_seqs_nuc:
                strong_groups = { 'AG': True,
                                  'CTU': True
                                  }
                weak_groups = None
            # PROTEINS
            else:
                strong_groups = { 'AST':  True,
                                  'EKNQ': True,
                                  'HKNQ': True,
                                  'DENQ': True,
                                  'HKQR': True,
                                  'ILMV': True,
                                  'FILM': True,
                                  'HY':   True,
                                  'FWY':  True
                                  }
                weak_groups = { 'ACS':    True,
                                'ATV':    True,
                                'AGS':    True,
                                'KNST':   True,
                                'APST':   True,
                                'DGNS':   True,
                                'DEKNQS': True,
                                'DEHKNQ': True,
                                'EHKNQR': True,
                                'FILMV':  True,
                                'FHY':    True
                                }
                
            clw_buf = []
            clw_buf.append ('CLUSTALW format of GBLOCKS trimmed MSA '+MSA_name+': '+MSA_description)
            clw_buf.append ('')

            long_id_len = 0
            aln_pos_by_id = dict()
            for row_id in row_order:
                aln_pos_by_id[row_id] = 0
                row_id_disp = default_row_labels[row_id]
                if long_id_len < len(row_id_disp):
                    long_id_len = len(row_id_disp)

            full_row_cnt = alignment_length // max_row_width
            if alignment_length % max_row_width == 0:
                full_row_cnt -= 1
            for chunk_i in range (full_row_cnt + 1):
                for row_id in row_order:
                    row_id_disp = re.sub('\s','_',default_row_labels[row_id])
                    for sp_i in range (long_id_len-len(row_id_disp)):
                        row_id_disp += ' '

                    aln_chunk_upper_bound = (chunk_i+1)*max_row_width
                    if aln_chunk_upper_bound > alignment_length:
                        aln_chunk_upper_bound = alignment_length
                    aln_chunk = alignment[row_id][chunk_i*max_row_width:aln_chunk_upper_bound]
                    for c in aln_chunk:
                        if c != '-':
                            aln_pos_by_id[row_id] += 1

                    clw_buf.append (row_id_disp+gap_chars+aln_chunk+' '+str(aln_pos_by_id[row_id]))

                # conservation line
                cons_line = ''
                for pos_i in range(chunk_i*max_row_width, aln_chunk_upper_bound):
                    col_chars = dict()
                    seq_cnt = 0
                    for row_id in row_order:
                        char = alignment[row_id][pos_i]
                        if char != '-':
                            seq_cnt += 1
                            col_chars[char] = True
                    if seq_cnt <= 1:
                        cons_char = ' '
                    elif len(col_chars.keys()) == 1:
                        cons_char = '*'
                    else:
                        strong = False
                        for strong_group in strong_groups.keys():
                            this_strong_group = True
                            for seen_char in col_chars.keys():
                                if seen_char not in strong_group:
                                    this_strong_group = False
                                    break
                            if this_strong_group:
                                strong = True
                                break
                        if not strong:
                            weak = False
                            if weak_groups != None:
                                for weak_group in weak_groups.keys():
                                    this_weak_group = True
                                    for seen_char in col_chars.keys():
                                        if seen_char not in weak_group:
                                            this_strong_group = False
                                            break
                                    if this_weak_group:
                                        weak = True
                        if strong:
                            cons_char = ':'
                        elif weak:
                            cons_char = '.'
                        else:
                            cons_char = ' '
                    cons_line += cons_char

                lead_space = ''
                for sp_i in range(long_id_len):
                    lead_space += ' '
                lead_space += gap_chars

                clw_buf.append(lead_space+cons_line)
                clw_buf.append('')

            # write clw to file
            clw_buf_str = "\n".join(clw_buf)+"\n"
            output_clw_file_path = os.path.join(output_dir, input_name+'-MSA.clw');
            with open (output_clw_file_path, "w", 0) as output_clw_file_handle:
                output_clw_file_handle.write(clw_buf_str)
            output_clw_file_handle.close()


            # upload GBLOCKS FASTA output to SHOCK for file_links
            dfu = DFUClient(self.callbackURL)
            try:
                output_upload_ret = dfu.file_to_shock({'file_path': output_aln_file_path,
# DEBUG
#                                                      'make_handle': 0,
#                                                      'pack': 'zip'})
                                                       'make_handle': 0})
            except:
                raise ValueError ('error loading aln_out file to shock')

            # upload GBLOCKS CLW output to SHOCK for file_links
            try:
                output_clw_upload_ret = dfu.file_to_shock({'file_path': output_clw_file_path,
# DEBUG
#                                                      'make_handle': 0,
#                                                      'pack': 'zip'})
                                                           'make_handle': 0})
            except:
                raise ValueError ('error loading clw_out file to shock')


            # make HTML reports
            #
            # HERE


            # build output report object
            #
            self.log(console,"BUILDING REPORT")  # DEBUG

            reportName = 'gblocks_report_'+str(uuid.uuid4())
            reportObj = {
                'objects_created':[{'ref':params['workspace_name']+'/'+params['output_name'],
                                    'description':'GBLOCKS MSA'}],
                #'message': '',
                'message': clw_buf_str,
                'direct_html': '',
                #'direct_html_link_index': 0,
                'file_links': [],
                'html_links': [],
                'workspace_name': params['workspace_name'],
                'report_object_name': reportName
                }
            reportObj['file_links'] = [{'shock_id': output_upload_ret['shock_id'],
                                        'name': params['output_name']+'-GBLOCKS.FASTA',
                                        'label': 'GBLOCKS-trimmed MSA FASTA'
                                        },
                                       {'shock_id': output_clw_upload_ret['shock_id'],
                                        'name': params['output_name']+'-GBLOCKS.CLW',
                                        'label': 'GBLOCKS-trimmed MSA CLUSTALW'
                                        }]

            # save report object
            #
            SERVICE_VER = 'release'
            reportClient = KBaseReport(self.callbackURL, token=ctx['token'], service_ver=SERVICE_VER)
            #report_info = report.create({'report':reportObj, 'workspace_name':params['workspace_name']})
            report_info = reportClient.create_extended_report(reportObj)                                       

        else:  # len(invalid_msgs) > 0
            reportName = 'gblocks_report_'+str(uuid.uuid4())
            report += "FAILURE:\n\n"+"\n".join(invalid_msgs)+"\n"
            reportObj = {
                'objects_created':[],
                'text_message':report
                }

            ws = workspaceService(self.workspaceURL, token=ctx['token'])
            report_obj_info = ws.save_objects({
                    #'id':info[6],
                    'workspace':params['workspace_name'],
                    'objects':[
                        {
                            'type':'KBaseReport.Report',
                            'data':reportObj,
                            'name':reportName,
                            'meta':{},
                            'hidden':1,
                            'provenance':provenance
                            }
                        ]
                    })[0]

            report_info = dict()
            report_info['name'] = report_obj_info[1]
            report_info['ref'] = str(report_obj_info[6])+'/'+str(report_obj_info[0])+'/'+str(report_obj_info[4])


        # done
        returnVal = { 'report_name': report_info['name'],
                      'report_ref': report_info['ref']
                      }

        self.log(console,"run_Gblocks DONE")
        #END run_Gblocks

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method run_Gblocks return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]