Пример #1
0
    def test_index(self):
        BmajIndex.do_index = True
        prod = {
            "data_dir": "/tmp/test/data",
            "formats": {
                "fasta": [{
                    "files": ["fasta/chr1.fa", "fasta/chr2.fa"],
                    "types": ["nucleic"],
                    "tags": {
                        "organism": "hg19"
                    }
                }],
                "blast": [{
                    "files": ["blast/chr1/chr1db"],
                    "types": ["nucleic"],
                    "tags": {
                        "chr": "chr1",
                        "organism": "hg19"
                    }
                }]
            },
            "freeze": False,
            "session": 1416229253.930908,
            "prod_dir": "alu-2003-11-26",
            "release": "2003-11-26",
            "types": ["nucleic"]
        }

        BmajIndex.add('test', prod, True)

        query = {'query': {'match': {'bank': 'test'}}}
        res = BmajIndex.search(query)
        self.assertTrue(len(res) == 2)
Пример #2
0
    def test_index(self):
        BmajIndex.do_index = True
        prod = {
    			"data_dir" : "/tmp/test/data",
    			"formats" : {
    				"fasta" : [
    					{
    						"files" : [
    							"fasta/chr1.fa",
    							"fasta/chr2.fa"
    						],
    						"types" : [
    							"nucleic"
    						],
    						"tags" : {
    							"organism" : "hg19"
    						}
    					}
    				],
    				"blast": [
    					{
    						"files" : [
    							"blast/chr1/chr1db"
    						],
    						"types" : [
    							"nucleic"
    						],
    						"tags" : {
    							"chr" : "chr1",
    							"organism" : "hg19"
    						}
    					}
    				]

    			},
    			"freeze" : False,
    			"session" : 1416229253.930908,
    			"prod_dir" : "alu-2003-11-26",
    			"release" : "2003-11-26",
    			"types" : [
    				"nucleic"
    			]
    		}

        BmajIndex.add('test',prod, True)

        query = {
          'query' : {
            'match' : {'bank': 'test'}
            }
          }
        res = BmajIndex.search(query)
        self.assertTrue(len(res)==2)
Пример #3
0
    def save_session(self):
        '''
        Save session in database
        '''
        self.session._session['last_update_time'] = time.time()
        self.session._session['log_file'] = self.config.log_file
        if self.use_last_session:
            # Remove last session
            self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': self.session._session['id']}}})
        # Insert session
        if self.session.get('action') == 'update':
            action = 'last_update_session'
        if self.session.get('action') == 'remove':
            action = 'last_remove_session'


        cache_dir = self.config.get('cache.dir')
        download_files = self.session.get('download_files')
        if download_files is not None:
            f_downloaded_files = open(os.path.join(cache_dir, 'files_'+str(self.session.get('id'))), 'w')
            f_downloaded_files.write(json.dumps(download_files))
            f_downloaded_files.close()
            self.session.set('download_files',[])

        local_files = self.session.get('files')
        if local_files is not None:
            f_local_files = open(os.path.join(cache_dir, 'local_files_'+str(self.session.get('id'))), 'w')
            f_local_files.write(json.dumps(download_files))
            f_local_files.close()
            self.session.set('files',[])


        self.banks.update({'name': self.name}, {
            '$set': {
                action: self.session._session['id'],
                'properties': self.get_properties()
            },
            '$push': {'sessions': self.session._session}
        })
        BmajIndex.add(self.name, self.session._session)
        if self.session.get('action') == 'update' and not self.session.get_status(
                Workflow.FLOW_OVER) and self.session.get('release'):
            self.banks.update({'name': self.name},
                              {'$set': {'pending.' + self.session.get('release'): self.session._session['id']}})
        if self.session.get('action') == 'update' and self.session.get_status(Workflow.FLOW_OVER) and self.session.get(
                'update'):
            # We expect that a production release has reached the FLOW_OVER status.
            # If no update is needed (same release etc...), the *update* session of the session is set to False
            logging.debug('Bank:Save:' + self.name)
            if len(self.bank['production']) > 0:
                # Remove from database
                self.banks.update({'name': self.name},
                                  {'$pull': {'production': {'release': self.session._session['release']}}})
                # Update local object
                # index = 0
                # for prod in self.bank['production']:
                #  if prod['release'] == self.session._session['release']:
                #    break;
                #  index += 1
                # if index < len(self.bank['production']):
                #  self.bank['production'].pop(index)
            release_types = []
            if self.config.get('db.type'):
                release_types = self.config.get('db.type').split(',')
            release_formats = list(self.session._session['formats'].keys())
            if self.config.get('db.formats'):
                config_formats = self.config.get('db.formats').split(',')
                for config_format in config_formats:
                    if config_format not in release_formats:
                        release_formats.append(config_format)

            for release_format in self.session._session['formats']:
                for release_files in self.session._session['formats'][release_format]:
                    if release_files['types']:
                        for rtype in release_files['types']:
                            if rtype not in release_types:
                                release_types.append(rtype)
            prod_dir = self.session.get_release_directory()
            if self.session.get('prod_dir'):
                prod_dir = self.session.get('prod_dir')
            production = {'release': self.session.get('release'),
                          'remoterelease': self.session.get('remoterelease'),
                          'session': self.session._session['id'],
                          'formats': release_formats,
                          'types': release_types,
                          'size': self.session.get('fullsize'),
                          'data_dir': self.session._session['data_dir'],
                          'dir_version': self.session._session['dir_version'],
                          'prod_dir': prod_dir,
                          'freeze': False}
            self.bank['production'].append(production)

            self.banks.update({'name': self.name},
                              {'$push': {'production': production},
                               '$unset': {'pending.' + self.session.get('release'): ''}
                               })

            # self.banks.update({'name': self.name},
            #                  {'$unset': 'pending.'+self.session.get('release')
            #                  })

        self.bank = self.banks.find_one({'name': self.name})
Пример #4
0
    def save_session(self):
        '''
        Save session in database
        '''
        self.session._session['last_update_time'] = time.time()
        self.session._session['log_file'] = self.config.log_file
        if self.use_last_session:
            # Remove last session
            self.banks.update(
                {'name': self.name},
                {'$pull': {
                    'sessions': {
                        'id': self.session._session['id']
                    }
                }})
        # Insert session
        if self.session.get('action') == 'update':
            action = 'last_update_session'
        if self.session.get('action') == 'remove':
            action = 'last_remove_session'

        cache_dir = self.config.get('cache.dir')
        download_files = self.session.get('download_files')
        if download_files is not None:
            f_downloaded_files = open(
                os.path.join(cache_dir,
                             'files_' + str(self.session.get('id'))), 'w')
            f_downloaded_files.write(json.dumps(download_files))
            f_downloaded_files.close()
            self.session.set('download_files', [])

        local_files = self.session.get('files')
        if local_files is not None:
            f_local_files = open(
                os.path.join(cache_dir,
                             'local_files_' + str(self.session.get('id'))),
                'w')
            f_local_files.write(json.dumps(download_files))
            f_local_files.close()
            self.session.set('files', [])

        self.banks.update({'name': self.name}, {
            '$set': {
                action: self.session._session['id'],
                'properties': self.get_properties()
            },
            '$push': {
                'sessions': self.session._session
            }
        })
        BmajIndex.add(self.name, self.session._session)
        if self.session.get(
                'action') == 'update' and not self.session.get_status(
                    Workflow.FLOW_OVER) and self.session.get('release'):
            self.banks.update({'name': self.name}, {
                '$set': {
                    'pending.' + self.session.get('release'):
                    self.session._session['id']
                }
            })
        if self.session.get('action') == 'update' and self.session.get_status(
                Workflow.FLOW_OVER) and self.session.get('update'):
            # We expect that a production release has reached the FLOW_OVER status.
            # If no update is needed (same release etc...), the *update* session of the session is set to False
            logging.debug('Bank:Save:' + self.name)
            if len(self.bank['production']) > 0:
                # Remove from database
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'production': {
                            'release': self.session._session['release']
                        }
                    }
                })
                # Update local object
                # index = 0
                # for prod in self.bank['production']:
                #  if prod['release'] == self.session._session['release']:
                #    break;
                #  index += 1
                # if index < len(self.bank['production']):
                #  self.bank['production'].pop(index)
            release_types = []
            if self.config.get('db.type'):
                release_types = self.config.get('db.type').split(',')
            release_formats = list(self.session._session['formats'].keys())
            if self.config.get('db.formats'):
                config_formats = self.config.get('db.formats').split(',')
                for config_format in config_formats:
                    if config_format not in release_formats:
                        release_formats.append(config_format)

            for release_format in self.session._session['formats']:
                for release_files in self.session._session['formats'][
                        release_format]:
                    if release_files['types']:
                        for rtype in release_files['types']:
                            if rtype not in release_types:
                                release_types.append(rtype)
            prod_dir = self.session.get_release_directory()
            if self.session.get('prod_dir'):
                prod_dir = self.session.get('prod_dir')
            production = {
                'release': self.session.get('release'),
                'remoterelease': self.session.get('remoterelease'),
                'session': self.session._session['id'],
                'formats': release_formats,
                'types': release_types,
                'size': self.session.get('fullsize'),
                'data_dir': self.session._session['data_dir'],
                'dir_version': self.session._session['dir_version'],
                'prod_dir': prod_dir,
                'freeze': False
            }
            self.bank['production'].append(production)

            self.banks.update({'name': self.name}, {
                '$push': {
                    'production': production
                },
                '$unset': {
                    'pending.' + self.session.get('release'): ''
                }
            })

            # self.banks.update({'name': self.name},
            #                  {'$unset': 'pending.'+self.session.get('release')
            #                  })

        self.bank = self.banks.find_one({'name': self.name})