def __init__(self, config_file): self.logger = logging self.session = None self.executed_callback = None with open(config_file, 'r') as ymlfile: self.config = yaml.load(ymlfile) Utils.service_config_override(self.config) Zipkin.set_config(self.config) BiomajConfig.load_config(self.config['biomaj']['config']) if 'log_config' in self.config: for handler in list(self.config['log_config']['handlers'].keys()): self.config['log_config']['handlers'][handler] = dict( self.config['log_config']['handlers'][handler]) logging.config.dictConfig(self.config['log_config']) self.logger = logging.getLogger('biomaj') self.redis_client = redis.StrictRedis( host=self.config['redis']['host'], port=self.config['redis']['port'], db=self.config['redis']['db'], decode_responses=True) self.logger.info('Daemon service started')
def setUp(self): self.utils = UtilsForTest() self.curdir = os.path.dirname(os.path.realpath(__file__)) + '/' self.examples = os.path.join(self.curdir, 'bank') + '/' BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
def tearDown(self): # Delete lock files for bank_name in self.BANKS: config = BiomajConfig(bank_name) data_dir = config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean()
def test_check_method(self): """Check .name, .exe and .args are well check during bank configuration checking""" BiomajConfig.load_config(self.utils.global_properties) for conf in [ 'noname', 'noexe', 'noargs', 'prenoname', 'prenoexe', 'prenoargs', 'rmnoname', 'rmnoexe', 'rmnoargs' ]: config = BiomajConfig(conf) self.assertFalse(config.check())
def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) #Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
def setUp(self): self.utils = UtilsForTest() BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) self.config = BiomajConfig('testhttp') self.http_parse = HTTPParse( self.config.get('http.parse.dir.line'), self.config.get('http.parse.file.line'), int(self.config.get('http.group.dir.name')), int(self.config.get('http.group.dir.date')), int(self.config.get('http.group.file.name')), int(self.config.get('http.group.file.date')), self.config.get('http.group.file.date_format', None), int(self.config.get('http.group.file.size')))
def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank('alu') b.banks.remove({}) self.config = BiomajConfig('alu') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'alu.lock') if os.path.exists(lock_file): os.remove(lock_file)
def setUp(self): self.utils = UtilsForTest() BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Clean banks used in tests for bank_name in self.BANKS: # Delete all releases b = Bank(bank_name) b.banks.remove({}) # Delete lock files config = BiomajConfig(bank_name) data_dir = config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
def biomaj_daemon(): ''' Execute a command request (bank update, removal, ...) ''' apikey = request.headers.get('Authorization') token = None if apikey: bearer = apikey.split() if bearer[0] == 'APIKEY': token = bearer[1] try: params = request.get_json() options = params['options'] options_object = Options(options) options_object.token = token options_object.user = None options_object.redis_host = config['redis']['host'] options_object.redis_port = config['redis']['port'] options_object.redis_db = config['redis']['db'] options_object.redis_prefix = config['redis']['prefix'] user = None if token: proxy = Utils.get_service_endpoint(config, 'user') r = requests.get(proxy + '/api/user/info/apikey/' + token) if not r.status_code == 200: abort(404, {'message': 'Invalid API Key or connection issue'}) user = r.json()['user'] if user: options_object.user = user['id'] if options_object.maintenance in ['on', 'off']: if not options_object.user or 'admin' not in config[ 'biomaj'] or options_object.user not in config['biomaj'][ 'admin']: abort(401, { 'message': 'This action requires authentication with api key' }) if options_object.bank: bmaj_options = BmajOptions(options_object) BiomajConfig(options_object.bank, bmaj_options) if not options_object.search and not options_object.show and not options_object.check and not options_object.status: if not user: abort( 401, { 'message': 'This action requires authentication with api key' }) (res, msg) = biomaj_client_action(options_object, config) except Exception as e: logging.exception(e) return jsonify({'status': False, 'msg': str(e)}) return jsonify({'status': res, 'msg': msg})
def setUp(self): self.utils = UtilsForTest() self.curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) config = { 'mongo': { 'url': BiomajConfig.global_config.get('GENERAL', 'db.url'), 'db': BiomajConfig.global_config.get('GENERAL', 'db.name') }, 'ldap': { 'host': BiomajConfig.global_config.get('GENERAL', 'ldap.host'), 'port': int(BiomajConfig.global_config.get('GENERAL', 'ldap.port')), 'dn': BiomajConfig.global_config.get('GENERAL', 'ldap.dn') } } BmajUser.set_config(config)
def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest( "Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank('local') b.banks.remove({}) BmajIndex.delete_all_bank('local') self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file)
def set_version(version=None): """ Set BioMAJ current installed version in db_schema collection if version is None :param version: db_schema collection version to set :type version: str """ installed_version = version if installed_version is None: installed_version = pkg_resources.get_distribution("biomaj").version if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file: " + str(err)) return None if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema schema.update_one({'id': 1}, {'$set': {'version': installed_version}}) print("Schema version set to %s" % str(installed_version))
def test_properties_override(self): BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) config = BiomajConfig('local') ldap_host = config.get('ldap.host') self.assertTrue(ldap_host == 'localhost') os.environ['BIOMAJ_LDAP_HOST'] = 'someserver' ldap_host = config.get('ldap.host') self.assertTrue(ldap_host == 'someserver')
def biomaj_daemon(): ''' List users ''' apikey = request.headers.get('Authorization') token = None if apikey: bearer = apikey.split() if bearer[0] == 'APIKEY': token = bearer[1] try: params = request.get_json() options = params['options'] options_object = Options(options) options_object.token = token options_object.user = None options_object.redis_host = config['redis']['host'] options_object.redis_port = config['redis']['port'] options_object.redis_db = config['redis']['db'] options_object.redis_prefix = config['redis']['prefix'] user = None if token: r = requests.get(config['web']['local_endpoint'] + '/api/user/info/apikey/' + token) if not r.status_code == 200: abort(404, {'message': 'Invalid API Key or connection issue'}) user = r.json()['user'] if user: options_object.user = user['id'] if options_object.bank: bmaj_options = BmajOptions(options_object) BiomajConfig(options_object.bank, bmaj_options) if not options_object.search and not options_object.show and not options_object.check and not options_object.status: if not user: abort(403, {'message': 'This action requires authentication with api key'}) (res, msg) = biomaj_client_action(options_object) except Exception as e: logging.exception(e) return jsonify({'status': False, 'msg': str(e)}) return jsonify({'status': res, 'msg': msg})
class TestBiomajFunctional(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) #Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_extract_release_from_file_name(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') def test_remoterelease_check(self): b = Bank('local') b.load_session(ReleaseCheckWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') workflow = ReleaseCheckWorkflow(b) res = workflow.start() remoterelease = b.session.get('remoterelease') self.assertTrue(remoterelease == '100') def test_extract_release_from_file_content(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_100\.txt') b.session.config.set('release.regexp', 'Release\s*(\d+)') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '103') def test_publish(self): """ Update a bank, then publish it """ b = Bank('local') b.update() current_link = os.path.join(b.config.get('data.dir'), b.config.get('dir.version'), 'current') self.assertFalse(os.path.exists(current_link)) self.assertTrue(b.bank['current'] is None) b.publish() self.assertTrue(os.path.exists(current_link)) self.assertTrue(b.bank['current'] == b.session._session['id']) # Should test this on local downloader, changing 1 file to force update, # else we would get same bank and there would be no update def test_no_update(self): """ Try updating twice, at second time, bank should not be updated """ b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS)) @attr('remotelist') def test_download_from_list(self): """ Use remote.list to define a list of files to download """ b = Bank('local') fd, file_path = tempfile.mkstemp() try: b.config.set('remote.list', file_path) with os.fdopen(fd, 'w') as tmp: tmp.write('[{"name": "test_100.txt", "root": "' + b.config.get('remote.dir') + '"}]') b.update() self.assertTrue(b.session.get('update')) finally: #os.remove(file_path) print(file_path) @attr('release') def test_release_control(self): """ Try updating twice, at second time, modify one file (same date), bank should update """ b = Bank('local') b.update() b.session.config.set('keep.old.version', '3') self.assertTrue(b.session.get('update')) remote_file = b.session.config.get('remote.dir') + 'test2.fasta' os.utime(remote_file, None) # Update test2.fasta and set release.control b.session.config.set('release.control', 'true') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) b.session.config.set('copy.skip', '1') b.session.config.set('remote.files', '^test2.fasta') b.update() self.assertTrue(b.session.get('update')) def test_fromscratch_update(self): """ Try updating twice, at second time, bank should be updated (force with fromscratc) """ b = Bank('local') b.update() self.assertTrue(b.session.get('update')) sess = b.session.get('release') b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertEqual(b.session.get('release'), sess + '__1') def test_fromscratch_update_with_release(self): """ Try updating twice, at second time, bank should be updated (force with fromscratch) Use case with release defined in release file """ b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') os.makedirs(b.session.get_full_release_directory()) w = UpdateWorkflow(b) # Reset release b.session.set('release', None) w.options.fromscratch = True w.wf_release() self.assertTrue(b.session.get('release') == '100__1') def test_mix_stop_from_task(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel + '__1') b3 = Bank('local') res = b3.update() self.assertTrue(b3.session.get('release') == rel + '__1') self.assertTrue(res) def test_mix_stop_from_task2(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel + '__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'download' self.assertTrue(b3.session.get('release') == rel + '__1') self.assertTrue(res) def test_mix_stop_from_task3(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel + '__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'postprocess' self.assertTrue(b3.session.get('release') == rel + '__1') self.assertTrue(res) def test_mix_stop_from_task4(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_before = 'download' b2.options.fromscratch = True res = b2.update() b3 = Bank('local') b3.options.from_task = 'postprocess' res = b3.update() self.assertFalse(res) def test_delete_old_dirs(self): """ Try updating 3 times, oldest dir should be removed """ b = Bank('local') b.removeAll(True) b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 2) def test_delete_old_dirs_with_freeze(self): """ Try updating 3 times, oldest dir should be removed but not freezed releases """ b = Bank('local') b.removeAll(True) b = Bank('local') b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 3) def test_removeAll(self): b = Bank('local') b.update() b.removeAll() self.assertFalse(os.path.exists(b.get_data_dir())) bdb = b.banks.find_one({'name': b.name}) self.assertTrue(bdb is None) def test_remove(self): """ test removal of a production dir """ b = Bank('local') b.update() self.assertTrue(os.path.exists(b.session.get_full_release_directory())) self.assertTrue(len(b.bank['production']) == 1) b.remove(b.session.get('release')) self.assertFalse(os.path.exists( b.session.get_full_release_directory())) b = Bank('local') self.assertTrue(len(b.bank['production']) == 0) def test_update_stop_after(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_update_stop_before(self): b = Bank('local') b.options.stop_before = 'postprocess' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_reupdate_from_task(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(b2.session.get_status('postprocess')) self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory()) def test_reupdate_from_task_error(self): b = Bank('local') b.options.stop_after = 'check' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') res = b2.update() self.assertFalse(res) def test_reupdate_from_task_wrong_release(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = 'wrongrelease' res = b2.update() self.assertFalse(res) @attr('process') def test_postprocesses_restart_from_proc(self): b = Bank('localprocess') b.update() proc1file = os.path.join(b.session.get_full_release_directory(), 'proc1.txt') proc2file = os.path.join(b.session.get_full_release_directory(), 'proc2.txt') self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, reexecute all processes b2 = Bank('localprocess') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, but at process PROC2 and following b3 = Bank('localprocess') b3.options.from_task = 'postprocess' b3.options.process = 'PROC2' b3.options.release = b.session.get('release') b3.update() #self.assertFalse(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) @attr('process') def test_postprocess_wrong_process_name(self): """If a wrong process name is given, update returns False and prints an error message""" b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.options.process = 'fake' self.assertFalse(b2.update()) self.assertFalse(b2.session.get_status('postprocess')) self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory()) def test_computed(self): b = Bank('computed') res = b.update(True) self.assertTrue(res) self.assertTrue( os.path.exists(b.session.get_full_release_directory() + '/sub1/flat/test_100.txt')) self.assertTrue(b.session.get('update')) # Check that, with depends non updated, bank is not updated itself nextb = Bank('computed') res = nextb.update(True) self.assertFalse(nextb.session.get('update')) @attr('nofile') def test_computed_nofile(self): b = Bank('computed2') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('protocol', 'none') b.session.config.set('sub1.files.move', 'flat/test_.*') res = b.update(True) self.assertTrue(res) self.assertTrue( os.path.exists(b.session.get_full_release_directory() + '/sub1/flat/test_100.txt')) def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) b2 = Bank('sub1') b2release = b2.bank['production'][len(b2.bank['production']) - 1]['release'] brelease = b.bank['production'][len(b.bank['production']) - 1]['release'] self.assertTrue(res) self.assertTrue(brelease == b2release) @attr('computed') def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) self.assertTrue(b.session.get('update')) b2 = Bank('computed2') res = b2.update(True) self.assertFalse(b2.session.get('update')) def test_computederror(self): b = Bank('computederror') res = b.update(True) self.assertFalse(res) self.assertTrue(b.session._session['depends']['sub2']) self.assertFalse(b.session._session['depends']['error']) @attr('directrelease') def test_directhttp_release(self): b = Bank('directhttp') res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue( os.path.exists(b.session.get_full_release_directory() + '/flat/debian/README.html')) # print str(b.session.get('release')) # print str(b.session.get('remoterelease')) @attr('network') def test_multi(self): b = Bank('multi') res = b.update() with open( os.path.join(b.session.get_full_release_directory(), 'flat/test1.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['args']['key1'] == 'value1') with open( os.path.join(b.session.get_full_release_directory(), 'flat/test2.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['form']['key1'] == 'value1') def test_freeze(self): b = Bank('local') b.update() rel = b.session.get('release') b.freeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == True) res = b.remove(rel) self.assertTrue(res == False) b.unfreeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == False) res = b.remove(rel) self.assertTrue(res == True) def test_stats(self): b = Bank('local') b.update() rel = b.session.get('release') stats = Bank.get_banks_disk_usage() self.assertTrue(stats[0]['size'] > 0) for release in stats[0]['releases']: if release['name'] == rel: self.assertTrue(release['size'] > 0) @attr('process') def test_processes_meta_data(self): b = Bank('localprocess') b.update() formats = b.session.get('formats') self.assertTrue(len(formats['blast']) == 2) self.assertTrue(len(formats['test'][0]['files']) == 3) @attr('process') def test_search(self): b = Bank('localprocess') b.update() search_res = Bank.search(['blast'], []) self.assertTrue(len(search_res) == 1) search_res = Bank.search([], ['nucleic']) self.assertTrue(len(search_res) == 1) search_res = Bank.search(['blast'], ['nucleic']) self.assertTrue(len(search_res) == 1) search_res = Bank.search(['blast'], ['proteic']) self.assertTrue(len(search_res) == 0) def test_owner(self): """ test ACL with owner """ b = Bank('local') res = b.update() self.assertTrue(res) b.set_owner('sample') b2 = Bank('local') try: res = b2.update() self.fail('not owner, should not be allowed') except Exception as e: pass
def __init__(self, config_file): self.logger = logging self.curBank = None self.session = None self.executed_callback = None with open(config_file, 'r') as ymlfile: self.config = yaml.load(ymlfile) Utils.service_config_override(self.config) Zipkin.set_config(self.config) BiomajConfig.load_config(self.config['biomaj']['config']) for svc in Utils.services: service = svc.lower() if self.config['web'].get('local_endpoint_' + service, None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.service.' + service, '1') BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.proxy.' + service, self.config['web']['local_endpoint_' + service]) if self.config['web'].get('local_endpoint', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.proxy', self.config['web']['local_endpoint']) if self.config.get('rabbitmq', None): if self.config['rabbitmq'].get('host', None): BiomajConfig.global_config.set('GENERAL', 'micro.biomaj.rabbit_mq', self.config['rabbitmq']['host']) if self.config['rabbitmq'].get('port', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.rabbit_mq_port', str(self.config['rabbitmq']['port'])) if self.config['rabbitmq'].get('user', None): BiomajConfig.global_config.set('GENERAL', 'micro.biomaj.rabbit_mq_user', self.config['rabbitmq']['user']) if self.config['rabbitmq'].get('password', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.rabbit_mq_password', self.config['rabbitmq']['password']) if self.config['rabbitmq'].get('virtual_host', None): BiomajConfig.global_config.set( 'GENERAL', 'micro.biomaj.rabbit_mq_virtual_host', self.config['rabbitmq']['virtual_host']) if 'log_config' in self.config: for handler in list(self.config['log_config']['handlers'].keys()): self.config['log_config']['handlers'][handler] = dict( self.config['log_config']['handlers'][handler]) logging.config.dictConfig(self.config['log_config']) self.logger = logging.getLogger('biomaj') self.redis_client = redis.StrictRedis( host=self.config['redis']['host'], port=self.config['redis']['port'], db=self.config['redis']['db'], decode_responses=True) self.logger.info('Daemon service started') signal.signal(signal.SIGTERM, self.catch) signal.siginterrupt(signal.SIGTERM, False)
from biomaj_core.config import BiomajConfig from biomaj_core.utils import Utils from biomaj.workflow import Workflow from biomaj.workflow import UpdateWorkflow from biomaj.workflow import RemoveWorkflow config_file = 'config.yml' if 'BIOMAJ_CONFIG' in os.environ: config_file = os.environ['BIOMAJ_CONFIG'] config = None with open(config_file, 'r') as ymlfile: config = yaml.load(ymlfile) Utils.service_config_override(config) BiomajConfig.load_config(config['biomaj']['config']) data_dir = BiomajConfig.global_config.get('GENERAL', 'data.dir') if not os.path.exists(data_dir): os.makedirs(data_dir) log_dir = BiomajConfig.global_config.get('GENERAL', 'log.dir') if not os.path.exists(log_dir): os.makedirs(log_dir) process_dir = BiomajConfig.global_config.get('GENERAL', 'process.dir') if not os.path.exists(process_dir): os.makedirs(process_dir) cache_dir = BiomajConfig.global_config.get('GENERAL', 'cache.dir') if not os.path.exists(cache_dir): os.makedirs(cache_dir) lock_dir = BiomajConfig.global_config.get('GENERAL', 'lock.dir') if not os.path.exists(lock_dir):
class TestBiomajFunctional(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) #Delete all banks b = Bank('local') b.banks.remove({}) self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_extract_release_from_file_name(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') def test_remoterelease_check(self): b = Bank('local') b.load_session(ReleaseCheckWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') workflow = ReleaseCheckWorkflow(b) res = workflow.start() remoterelease = b.session.get('remoterelease') self.assertTrue(remoterelease == '100') def test_extract_release_from_file_content(self): b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_100\.txt') b.session.config.set('release.regexp', 'Release\s*(\d+)') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '103') def test_publish(self): """ Update a bank, then publish it """ b = Bank('local') b.update() current_link = os.path.join(b.config.get('data.dir'), b.config.get('dir.version'), 'current') self.assertFalse(os.path.exists(current_link)) self.assertTrue(b.bank['current'] is None) b.publish() self.assertTrue(os.path.exists(current_link)) self.assertTrue(b.bank['current'] == b.session._session['id']) # Should test this on local downloader, changing 1 file to force update, # else we would get same bank and there would be no update def test_no_update(self): """ Try updating twice, at second time, bank should not be updated """ b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS)) @attr('remotelist') def test_download_from_list(self): """ Use remote.list to define a list of files to download """ b = Bank('local') fd, file_path = tempfile.mkstemp() try: b.config.set('remote.list', file_path) with os.fdopen(fd, 'w') as tmp: tmp.write('[{"name": "test_100.txt", "root": "' + b.config.get('remote.dir') + '"}]') b.update() self.assertTrue(b.session.get('update')) finally: #os.remove(file_path) print(file_path) @attr('release') def test_release_control(self): """ Try updating twice, at second time, modify one file (same date), bank should update """ b = Bank('local') b.update() b.session.config.set('keep.old.version', '3') self.assertTrue(b.session.get('update')) remote_file = b.session.config.get('remote.dir') + 'test2.fasta' os.utime(remote_file, None) # Update test2.fasta and set release.control b.session.config.set('release.control', 'true') b.update() self.assertTrue(b.session.get('update')) b.update() self.assertFalse(b.session.get('update')) b.session.config.set('copy.skip', '1') b.session.config.set('remote.files', '^test2.fasta') b.update() self.assertTrue(b.session.get('update')) def test_update_hardlinks(self): """ Update a bank twice with hard links^. Files copied from previous release must be links. """ b = Bank('local') b.config.set('keep.old.version', '3') b.config.set('use_hardlinks', '1') # First update b.update() self.assertTrue(b.session.get('update')) old_release = b.session.get_full_release_directory() # Update test.fasta to force update (not that this file is modified in the # source tree) remote_file = b.session.config.get('remote.dir') + 'test.fasta.gz' stat = os.stat(remote_file) one_day = 3600 * 24 os.utime(remote_file, (stat.st_atime + one_day, stat.st_atime + one_day)) # Second update b.update() self.assertTrue(b.session.get('update')) new_release = b.session.get_full_release_directory() # Test that test2.fasta in both release are the same file (we can't use # test.fasta because it is uncompressed and then not the same file) file_old_release = os.path.join(old_release, 'flat', 'test2.fasta') file_new_release = os.path.join(new_release, 'flat', 'test2.fasta') try: self.assertTrue(os.path.samefile(file_old_release, file_new_release)) except AssertionError: msg = "In %s: copy worked but hardlinks were not used." % self.id() logging.info(msg) # Restore date (otherwise repeated tests fail) os.utime(remote_file, (stat.st_atime, stat.st_atime)) def test_fromscratch_update(self): """ Try updating twice, at second time, bank should be updated (force with fromscratc) """ b = Bank('local') b.update() self.assertTrue(b.session.get('update')) sess = b.session.get('release') b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertEqual(b.session.get('release'), sess+'__1') def test_fromscratch_update_with_release(self): """ Try updating twice, at second time, bank should be updated (force with fromscratch) Use case with release defined in release file """ b = Bank('local') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('release.file', 'test_(\d+)\.txt') b.session.config.set('release.regexp', '') w = UpdateWorkflow(b) w.wf_release() self.assertTrue(b.session.get('release') == '100') os.makedirs(b.session.get_full_release_directory()) w = UpdateWorkflow(b) # Reset release b.session.set('release', None) w.options.fromscratch = True w.wf_release() self.assertTrue(b.session.get('release') == '100__1') def test_mix_stop_from_task(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel+'__1') b3 = Bank('local') res = b3.update() self.assertTrue(b3.session.get('release') == rel+'__1') self.assertTrue(res) def test_mix_stop_from_task2(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel+'__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'download' self.assertTrue(b3.session.get('release') == rel+'__1') self.assertTrue(res) def test_mix_stop_from_task3(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_after = 'download' b2.options.fromscratch = True res = b2.update() self.assertTrue(b2.session.get('release') == rel+'__1') b3 = Bank('local') res = b3.update() b2.options.from_task = 'postprocess' self.assertTrue(b3.session.get('release') == rel+'__1') self.assertTrue(res) def test_mix_stop_from_task4(self): """ Get a first release, then fromscratch --stop-after, then restart from-task """ b = Bank('local') b.update() rel = b.session.get('release') b2 = Bank('local') b2.options.stop_before = 'download' b2.options.fromscratch = True res = b2.update() b3 = Bank('local') b3.options.from_task = 'postprocess' res = b3.update() self.assertFalse(res) def test_delete_old_dirs(self): """ Try updating 3 times, oldest dir should be removed """ b = Bank('local') b.removeAll(True) b = Bank('local') b.update() self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 2) def test_delete_old_dirs_with_freeze(self): """ Try updating 3 times, oldest dir should be removed but not freezed releases """ b = Bank('local') b.removeAll(True) b = Bank('local') b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) b.options.fromscratch = True b.update() b.freeze(b.session.get('release')) self.assertTrue(b.session.get('update')) self.assertTrue(len(b.bank['production']) == 2) b.update() self.assertTrue(b.session.get('update')) # one new dir, but olders must be deleted self.assertTrue(len(b.bank['production']) == 3) def test_removeAll(self): b = Bank('local') b.update() b.removeAll() self.assertFalse(os.path.exists(b.get_data_dir())) bdb = b.banks.find_one({'name': b.name}) self.assertTrue(bdb is None) def test_remove(self): """ test removal of a production dir """ b = Bank('local') b.update() self.assertTrue(os.path.exists(b.session.get_full_release_directory())) self.assertTrue(len(b.bank['production'])==1) b.remove(b.session.get('release')) self.assertFalse(os.path.exists(b.session.get_full_release_directory())) b = Bank('local') self.assertTrue(len(b.bank['production'])==0) def test_update_stop_after(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_update_stop_before(self): b = Bank('local') b.options.stop_before = 'postprocess' b.update() self.assertTrue(b.session.get_status('download')) self.assertFalse(b.session.get_status('postprocess')) def test_reupdate_from_task(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(b2.session.get_status('postprocess')) self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory()) def test_reupdate_from_task_error(self): b = Bank('local') b.options.stop_after = 'check' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') res = b2.update() self.assertFalse(res) def test_reupdate_from_task_wrong_release(self): b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = 'wrongrelease' res = b2.update() self.assertFalse(res) @attr('process') def test_postprocesses_restart_from_proc(self): b = Bank('localprocess') b.update() proc1file = os.path.join(b.session.get_full_release_directory(),'proc1.txt') proc2file = os.path.join(b.session.get_full_release_directory(),'proc2.txt') self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, reexecute all processes b2 = Bank('localprocess') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.update() self.assertTrue(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) os.remove(proc1file) os.remove(proc2file) # Restart from postprocess, but at process PROC2 and following b3 = Bank('localprocess') b3.options.from_task = 'postprocess' b3.options.process = 'PROC2' b3.options.release = b.session.get('release') b3.update() #self.assertFalse(os.path.exists(proc1file)) self.assertTrue(os.path.exists(proc2file)) @attr('process') def test_postprocess_wrong_process_name(self): """If a wrong process name is given, update returns False and prints an error message""" b = Bank('local') b.options.stop_after = 'download' b.update() self.assertFalse(b.session.get_status('postprocess')) b2 = Bank('local') b2.options.from_task = 'postprocess' b2.options.release = b.session.get('release') b2.options.process = 'fake' self.assertFalse(b2.update()) self.assertFalse(b2.session.get_status('postprocess')) self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory()) def test_computed(self): b = Bank('computed') res = b.update(True) self.assertTrue(res) self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/sub1/flat/test_100.txt')) self.assertTrue(b.session.get('update')) # Check that, with depends non updated, bank is not updated itself nextb = Bank('computed') res = nextb.update(True) self.assertFalse(nextb.session.get('update')) @attr('nofile') def test_computed_nofile(self): b = Bank('computed2') b.load_session(UpdateWorkflow.FLOW) b.session.config.set('protocol', 'none') b.session.config.set('sub1.files.move', 'flat/test_.*') res = b.update(True) self.assertTrue(res) self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/sub1/flat/test_100.txt')) def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) b2 = Bank('sub1') b2release = b2.bank['production'][len(b2.bank['production'])-1]['release'] brelease = b.bank['production'][len(b.bank['production'])-1]['release'] self.assertTrue(res) self.assertTrue(brelease == b2release) @attr('computed') def test_computed_ref_release(self): b = Bank('computed2') res = b.update(True) self.assertTrue(b.session.get('update')) b2 = Bank('computed2') res = b2.update(True) self.assertFalse(b2.session.get('update')) def test_computederror(self): b = Bank('computederror') res = b.update(True) self.assertFalse(res) self.assertTrue(b.session._session['depends']['sub2']) self.assertFalse(b.session._session['depends']['error']) @attr('directrelease') def test_directhttp_release(self): b = Bank('directhttp') res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/flat/debian/README.html')) # print str(b.session.get('release')) # print str(b.session.get('remoterelease')) @attr('network') def test_multi(self): b = Bank('multi') res = b.update() with open(os.path.join(b.session.get_full_release_directory(),'flat/test1.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['args']['key1'] == 'value1') with open(os.path.join(b.session.get_full_release_directory(),'flat/test2.json'), 'r') as content_file: content = content_file.read() my_json = json.loads(content) self.assertTrue(my_json['form']['key1'] == 'value1') def test_freeze(self): b = Bank('local') b.update() rel = b.session.get('release') b.freeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == True) res = b.remove(rel) self.assertTrue(res == False) b.unfreeze(rel) prod = b.get_production(rel) self.assertTrue(prod['freeze'] == False) res = b.remove(rel) self.assertTrue(res == True) def test_stats(self): b = Bank('local') b.update() rel = b.session.get('release') stats = Bank.get_banks_disk_usage() self.assertTrue(stats[0]['size']>0) for release in stats[0]['releases']: if release['name'] == rel: self.assertTrue(release['size']>0) @attr('process') def test_processes_meta_data(self): b = Bank('localprocess') b.update() formats = b.session.get('formats') self.assertTrue(len(formats['blast'])==2) self.assertTrue(len(formats['test'][0]['files'])==3) @attr('process') def test_search(self): b = Bank('localprocess') b.update() search_res = Bank.search(['blast'],[]) self.assertTrue(len(search_res)==1) search_res = Bank.search([],['nucleic']) self.assertTrue(len(search_res)==1) search_res = Bank.search(['blast'],['nucleic']) self.assertTrue(len(search_res)==1) search_res = Bank.search(['blast'],['proteic']) self.assertTrue(len(search_res)==0) def test_owner(self): """ test ACL with owner """ b = Bank('local') res = b.update() self.assertTrue(res) b.set_owner('sample') b2 = Bank('local') try: res = b2.update() self.fail('not owner, should not be allowed') except Exception as e: pass
def main(): parser = argparse.ArgumentParser(add_help=False) Utils.set_args(parser) options = Options() parser.parse_args(namespace=options) options.no_log = False if options.help: print(''' --config: global.properties file path (local install only) --proxy: BioMAJ daemon url (http://x.y.z) --trace: Trace workflow in Zipkin server --api-key: User API key to authenticate against proxy --whatsup: Get info on what biomaj is doing --last-log: Get log file of last session [MANDATORY] --proxy http://x.y.z [OPTIONAL] --tail X number of lines to tail from log file --about-me: Get my info [MANDATORY] --proxy http://x.y.z --user-login XX --user-password XX --update-status: get status of an update [MANDATORY] --bank xx: name of the bank to check --proxy http://x.y.z --update-cancel: cancel current update [MANDATORY] --bank xx: name of the bank to cancel --proxy http://x.y.z --status: list of banks with published release [OPTIONAL] --bank xx / bank: Get status details of bank --status-ko: list of banks in error status (last run) --log DEBUG|INFO|WARN|ERR [OPTIONAL]: set log level in logs for this run, default is set in global.properties file --check: Check bank property file [MANDATORY] --bank xx: name of the bank to check (will check xx.properties) --owner yy: Change owner of the bank (user id) [MANDATORY] --bank xx: name of the bank --visibility public|private: change visibility public/private of a bank [MANDATORY] --bank xx: name of the bank --change-dbname yy: Change name of the bank to this new name [MANDATORY] --bank xx: current name of the bank --move-production-directories yy: Change bank production directories location to this new path, path must exists [MANDATORY] --bank xx: current name of the bank --update: Update bank [MANDATORY] --bank xx: name of the bank(s) to update, comma separated [OPTIONAL] --publish: after update set as *current* version --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1) --stop-before xx: stop update cycle before the start of step xx --stop-after xx: stop update cycle after step xx has completed --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx* --process xx: linked to from-task, optionally specify a block, meta or process name to start from --release xx: release to update --publish: Publish bank as current release to use [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to publish --unpublish: Unpublish bank (remove current) [MANDATORY] --bank xx: name of the bank to update --remove-all: Remove all bank releases and database records [MANDATORY] --bank xx: name of the bank to update [OPTIONAL] --force: remove freezed releases --remove-pending: Remove pending releases [MANDATORY] --bank xx: name of the bank to update --remove: Remove bank release (files and database release) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove Release must not be the *current* version. If this is the case, publish a new release before. --freeze: Freeze bank release (cannot be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --unfreeze: Unfreeze bank release (can be removed) [MANDATORY] --bank xx: name of the bank to update --release xx: release of the bank to remove --search: basic search in bank production releases, return list of banks --formats xx,yy : list of comma separated format AND/OR --types xx,yy : list of comma separated type --query "LUCENE query syntax": search in index (if activated) --show: Show bank files per format [MANDATORY] --bank xx: name of the bank to show [OPTIONAL] --release xx: release of the bank to show --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal ''') return proxy = options.proxy if 'BIOMAJ_PROXY' in os.environ: proxy = os.environ['BIOMAJ_PROXY'] options.proxy = proxy if 'BIOMAJ_APIKEY' in os.environ: apikey = os.environ['BIOMAJ_APIKEY'] options.apikey = apikey if not proxy: try: from biomaj_daemon.daemon.utils import biomaj_client_action except Exception as e: print( 'Failed to import biomaj libraries. Either you forgot the --proxy option, either you use a local biomaj install and did not installed it (biomaj-daemon package)' ) try: if not proxy: from biomaj_daemon.daemon.utils import biomaj_client_action options.user = getpass.getuser() BiomajConfig.load_config(options.config) (status, msg) = biomaj_client_action(options) else: headers = {} if options.apikey: headers = {'Authorization': 'APIKEY ' + options.apikey} if options.lastlog: if not options.bank: print("--bank is missing\n") sys.exit(1) if options.tail: r = requests.get(proxy + '/api/daemon/bank/' + options.bank + '/log/' + options.tail, headers=headers) print(r.text) else: r = requests.get(proxy + '/api/daemon/bank/' + options.bank + '/log', headers=headers) print(r.text) sys.exit(0) r = requests.post(proxy + '/api/daemon', headers=headers, json={'options': options.__dict__}) if not r.status_code == 200: print('Failed to contact BioMAJ daemon') sys.exit(1) result = r.json() status = result['status'] msg = result['msg'] if not status: print('An error occured:\n') print(str(msg)) else: if msg: print(str(msg)) else: print('Done.') except Exception as e: logging.exception(e) print('Error:' + str(e))
''' Import biomaj banks statistics in Influxdb if never done before..... ''' from influxdb import InfluxDBClient from biomaj.bank import Bank from biomaj_core.config import BiomajConfig import sys if len(sys.argv) != 2: print('Usage: influxdb_import.py path_to_global.properties') sys.exit(1) BiomajConfig.load_config(config_file=sys.argv[1]) influxdb = None try: host = BiomajConfig.global_config.get('GENERAL', 'influxdb.host') user = BiomajConfig.global_config.get('GENERAL', 'influxdb.user') password = BiomajConfig.global_config.get('GENERAL', 'influxdb.password') port = BiomajConfig.global_config.get('GENERAL', 'influxdb.port') database = BiomajConfig.global_config.get('GENERAL', 'influxdb.db') influxdb = InfluxDBClient(host=host, database=database, port=port, username=user, password=password) except Exception as e: print('Failed to connect to influxdb, check configuration in global.properties: ' + str(e)) sys.exit(1) res = influxdb.query('select last("value") from "biomaj.banks.quantity"') if res: print('Found data in influxdb, update info....') banks = Bank.list()
def __init__(self): config_file = 'config.yml' if 'BIOMAJ_CONFIG' in os.environ: config_file = os.environ['BIOMAJ_CONFIG'] self.cfg = None with open(config_file, 'r') as ymlfile: self.cfg = yaml.load(ymlfile, Loader=Loader) Utils.service_config_override(self.cfg) # There is an issue with tcp checks, see https://github.com/cablehead/python-consul/issues/136 if self.cfg['consul']['host']: consul_agent = consul.Consul(host=self.cfg['consul']['host']) consul_agent.agent.service.register( 'biomaj-ftp', service_id=self.cfg['consul']['id'], address=self.cfg['consul']['id'], port=self.cfg['ftp']['port'], tags=['biomaj']) check = consul.Check.tcp(host=self.cfg['consul']['id'], port=self.cfg['ftp']['port'], interval=20) consul_agent.agent.check.register( self.cfg['consul']['id'] + '_check', check=check, service_id=self.cfg['consul']['id']) if self.cfg['log_config'] is not None: for handler in list(self.cfg['log_config']['handlers'].keys()): self.cfg['log_config']['handlers'][handler] = dict( self.cfg['log_config']['handlers'][handler]) logging.config.dictConfig(self.cfg['log_config']) self.logger = logging.getLogger('biomaj') BiomajConfig.load_config(self.cfg['biomaj']['config']) BmajUser.set_config(self.cfg) authorizer = BiomajAuthorizer() authorizer.set_config(self.cfg) authorizer.set_logger(self.logger) self.handler = FTPHandler self.handler.authorizer = authorizer if 'passive_ports_start' in self.cfg[ 'ftp'] and 'passive_ports_end' in self.cfg['ftp'] and self.cfg[ 'ftp']['passive_ports_start'] and self.cfg['ftp'][ 'passive_ports_end']: self.handler.passive_ports = range( self.cfg['ftp']['passive_ports_start'], self.cfg['ftp']['passive_ports_end']) self.logger.info('Use passive ports range %d:%d' % (self.cfg['ftp']['passive_ports_start'], self.cfg['ftp']['passive_ports_end'])) else: self.handler.passive_ports = range(60000, 65535) self.logger.info('Use passive ports range %d:%d' % (60000, 65535)) masquerade_address = os.environ.get('MASQUERADE_ADDRESS', None) if masquerade_address: self.handler.masquerade_address = os.environ['MASQUERADE_ADDRESS'] elif 'masquerade_address' in self.cfg['ftp'] and self.cfg['ftp'][ 'masquerade_address'] is not None: self.handler.masquerade_address = self.cfg['ftp'][ 'masquerade_address']
class TestBiomajSetup(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank('alu') b.banks.remove({}) self.config = BiomajConfig('alu') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_new_bank(self): """ Checks bank init """ b = Bank('alu') def test_new_session(self): """ Checks an empty session is created """ b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) for key in b.session._session['status'].keys(): self.assertFalse(b.session.get_status(key)) def test_session_reload_notover(self): """ Checks a session is used if present """ b = Bank('alu') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertTrue(b.session.get_status(Workflow.FLOW_INIT)) def test_clean_old_sessions(self): """ Checks a session is used if present """ b = Bank('local') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b2 = Bank('local') b2.update() b2.clean_old_sessions() self.assertTrue(len(b2.bank['sessions']) == 1) def test_session_reload_over(self): """ Checks a session if is not over """ b = Bank('alu') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True s._session['status'][Workflow.FLOW_OVER] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertFalse(b.session.get_status(Workflow.FLOW_INIT)) def test_bank_list(self): b1 = Bank('alu') b2 = Bank('local') banks = Bank.list() self.assertTrue(len(banks) == 2) @attr('test') @attr('network') def test_get_release(self): """ Get release """ b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue(res) self.assertTrue(b.session._session['release'] is not None) @attr('network') def test_remove_session(self): b = Bank('alu') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() self.assertTrue(len(b.bank['sessions']) == 4) b.remove_session(b.session.get('id')) self.assertTrue(len(b.bank['sessions']) == 3) @attr('process') def test_postprocesses_setup(self): b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run(True) self.assertTrue(len(pfactory.threads_tasks[0]) == 2) self.assertTrue(len(pfactory.threads_tasks[1]) == 1) @attr('process') def test_postprocesses_exec_again(self): """ Execute once, set a status to false, check that False processes are executed """ b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run() self.assertTrue(pfactory.blocks['BLOCK1']['META0']['PROC0']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC1']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC2']) blocks = copy.deepcopy(pfactory.blocks) blocks['BLOCK2']['META1']['PROC2'] = False pfactory2 = PostProcessFactory(b, blocks) pfactory2.run() self.assertTrue(pfactory2.blocks['BLOCK2']['META1']['PROC2']) @attr('process') def test_preprocesses(self): b = Bank('localprocess') pfactory = PreProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) @attr('process') def test_removeprocesses(self): b = Bank('localprocess') pfactory = RemoveProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) def test_dependencies_list(self): b = Bank('computed') deps = b.get_dependencies() self.assertTrue(len(deps) == 2)
def test_use_hardlinks_config(self): """ Test that hardlinks are disabled by default and can be overridden. """ BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Must be disabled in local.properties config = BiomajConfig('local') self.assertFalse(config.get_bool("use_hardlinks")) # Must be enabled for hardlinks.properties (override) config = BiomajConfig('hardlinks') self.assertTrue(config.get_bool("use_hardlinks")) # Reload file with use_hardlinks=1 BiomajConfig.load_config(self.utils.global_properties_hl, allow_user_config=False) config = BiomajConfig('local') self.assertTrue(config.get_bool("use_hardlinks"))
def migrate_pendings(): """ Migrate database 3.0.18: Check the actual BioMAJ version and if older than 3.0.17, do the 'pending' key migration """ if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file: " + str(err)) return None if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema banks = MongoConnector.banks users = MongoConnector.users schema_version = SchemaVersion.get_dbschema_version(schema) moderate = int(schema_version.split('.')[1]) minor = int(schema_version.split('.')[2]) if moderate == 0 and minor <= 17: print("Migrate from release: %s" % schema_version) # Update pending releases bank_list = banks.find() updated = 0 for bank in bank_list: if 'pending' in bank: # Check we have an old pending type if type(bank['pending']) == dict: updated += 1 pendings = [] for release in sorted(bank['pending'], key=lambda r: bank['pending'][r]): pendings.append({'release': str(release), 'id': bank['pending'][str(release)]}) if len(pendings) > 0: banks.update({'name': bank['name']}, {'$set': {'pending': pendings}}) else: # We remove old type for 'pending' banks.update({'name': bank['name']}, {'$unset': {'pending': ""}}) print("Migration: %d bank(s) updated" % updated) if moderate < 1: updated = 0 user_list = users.find() for user in user_list: if 'apikey' not in user: updated += 1 api_key = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) users.update({'_id': user['_id']}, {'$set': {'apikey': api_key}}) print("Migration: %d user(s) updated" % updated) # production size bank_list = banks.find() updated = 0 for bank in bank_list: for prod in bank['production']: ''' { "_id" : ObjectId("54edb10856e8bb11340b5f51"), "production" : [ { "freeze" : false, "remoterelease" : "2003-11-26", "session" : 1427809848.560108, "data_dir" : "/db", "formats" : [ ], "release" : "2003-11-26", "dir_version" : "ncbi/blast/alu", "prod_dir" : "alu-2003-11-26", "types" : [ ], "size" : 319432 } ] } ''' if 'size' not in prod or prod['size'] == 0: logging.info('Calculate size for bank %s' % (bank['name'])) if 'data_dir' not in prod or not prod['data_dir'] or 'prod_dir' not in prod or not prod['prod_dir'] or 'dir_version' not in prod or not prod['dir_version']: logging.warn('no production directory information for %s, skipping...' % (bank['name'])) continue prod_dir = os.path.join(prod['data_dir'], prod['dir_version'], prod['prod_dir']) if not os.path.exists(prod_dir): logging.warn('production directory %s does not exists for %s, skipping...' % (prod_dir, bank['name'])) continue dir_size = Utils.get_folder_size(prod_dir) banks.update({'name': bank['name'], 'production.release': prod['release']}, {'$set': {'production.$.size': dir_size}}) updated += 1 print("Migration: %d bank production info updated" % updated)
def load_config(request): if BiomajConfig.global_config is None: settings = request.registry.settings global_properties = settings['global_properties'] BiomajConfig.load_config(global_properties)
class TestElastic(unittest.TestCase): """ test indexing and search """ def setUp(self): BmajIndex.es = None self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) if BmajIndex.do_index == False: self.skipTest( "Skipping indexing tests due to elasticsearch not available") # Delete all banks b = Bank('local') b.banks.remove({}) BmajIndex.delete_all_bank('local') self.config = BiomajConfig('local') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir, 'local.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() BmajIndex.delete_all_bank('test') def test_index(self): BmajIndex.do_index = True prod = { "data_dir": "/tmp/test/data", "formats": { "fasta": [{ "files": ["fasta/chr1.fa", "fasta/chr2.fa"], "types": ["nucleic"], "tags": { "organism": "hg19" } }], "blast": [{ "files": ["blast/chr1/chr1db"], "types": ["nucleic"], "tags": { "chr": "chr1", "organism": "hg19" } }] }, "freeze": False, "session": 1416229253.930908, "prod_dir": "alu-2003-11-26", "release": "2003-11-26", "types": ["nucleic"] } BmajIndex.add('test', prod, True) query = {'query': {'match': {'bank': 'test'}}} res = BmajIndex.search(query) self.assertTrue(len(res) == 2) def test_remove_all(self): self.test_index() query = {'query': {'match': {'bank': 'test'}}} BmajIndex.delete_all_bank('test') res = BmajIndex.search(query) self.assertTrue(len(res) == 0)
def add_property(bank=None, prop=None, value=None, cfg=None): """ Update properties field for banks. :param bank: Bank name to update, default all :type bank: str :param prop: New property to add :type prop: str :param value: Property value, if cfg set, value taken from bank configuration cfg key :type value: str :param cfg: Bank configuration key value is taken from :type cfg: str :raise Exception: If not configuration file found :returns: True/False :rtype: bool """ if BiomajConfig.global_config is None: try: BiomajConfig.load_config() except Exception as err: print("* SchemaVersion: Can't find config file: " + str(err)) return False if prop is None: print("Property key is required", file=sys.stderr) return False if MongoConnector.db is None: MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'), BiomajConfig.global_config.get('GENERAL', 'db.name')) schema = MongoConnector.db_schema banks = MongoConnector.banks schema_version = SchemaVersion.get_dbschema_version(schema) moderate = int(schema_version.split('.')[1]) minor = int(schema_version.split('.')[2]) if moderate <= 1 and minor <= 0: bank_list = [] if bank is None: bank_list = banks.find() else: bank_list = [banks.find_one({'name': bank})] updated = 0 for bank in bank_list: if 'properties' in bank: b = Bank(bank['name'], no_log=True) new_prop = 'properties.' + prop new_value = value if new_value is None: if cfg is not None: new_value = b.config.get(cfg) else: print("[%s] With value set to None, you must set cfg to get " "corresponding value" % str(bank['name']), file=sys.stderr) continue banks.update({'name': bank['name']}, {'$set': {new_prop: new_value}}) updated += 1 else: logging.warn("Bank %s does not have 'properties' field!" % str(bank['name'])) print("Add property: %d bank(s) updated" % updated)
def main(global_config, **settings): """ This function returns a Pyramid WSGI application. """ config_file = 'config.yml' if 'BIOMAJ_CONFIG' in os.environ: config_file = os.environ['BIOMAJ_CONFIG'] config = None with open(config_file, 'r') as ymlfile: config = yaml.load(ymlfile) Utils.service_config_override(config) BiomajConfig.load_config(config['biomaj']['config']) settings['watcher_config'] = config settings['global_properties'] = config['biomaj']['config'] if config['consul']['host']: consul_agent = consul.Consul(host=config['consul']['host']) consul_agent.agent.service.register( 'biomaj-watcher-static', service_id=config['consul']['id'], address=config['web']['hostname'], port=config['web']['port'], tags=[ 'biomaj', 'watcher', 'static', 'traefik.backend=biomaj-watcher', 'traefik.frontend.rule=PathPrefix:/app', 'traefik.enable=true' ] ) consul_agent.agent.service.register( 'biomaj-watcher-api', service_id=config['consul']['id'] + '_api', address=config['web']['hostname'], port=config['web']['port'], tags=[ 'biomaj', 'watcher', 'api', 'traefik.backend=biomaj-watcher', 'traefik.frontend.rule=PathPrefix:/api/watcher', 'traefik.enable=true' ] ) check = consul.Check.http(url='http://' + config['web']['hostname'] + ':' + str(config['web']['port']) + '/api/watcher', interval=20) consul_agent.agent.check.register(config['consul']['id'] + '_check', check=check, service_id=config['consul']['id']) config = Configurator(settings=settings) config.include('pyramid_chameleon') config.add_subscriber(before_render, BeforeRender) authentication_policy = AuthTktAuthenticationPolicy('seekrit', callback=None, hashalg='sha512') authorization_policy = ACLAuthorizationPolicy() config.set_authentication_policy(authentication_policy) config.set_authorization_policy(authorization_policy) config.add_static_view('static', 'static', cache_max_age=3600) config.add_static_view('app', 'biomajwatcher:webapp/app') config.add_route('home', '/') config.add_route('ping', '/api/watcher') config.add_route('user','/api/watcher/user') config.add_route('user_banks','/api/watcher/user/{id}/banks') config.add_route('api_user','/user') config.add_route('api_user_banks','/user/{id}/banks') config.add_route('bank', '/bank') config.add_route('bankdetails', '/bank/{id}') config.add_route('banklocked', '/bank/{id}/locked') config.add_route('bankstatus', '/bank/{id}/status') config.add_route('bankconfig', '/bank/{id}/config') config.add_route('bankreleaseremove', '/bank/{id}/{release}') config.add_route('sessionlog', '/bank/{id}/log/{session}') config.add_route('api_bank', '/api/watcher/bank') config.add_route('api_bankdetails', '/api/watcher/bank/{id}') config.add_route('api_bankconfig', '/api/watcher/bank/{id}/config') config.add_route('api_banklocked', '/api/watcher/bank/{id}/locked') config.add_route('api_bankstatus', '/api/watcher/bank/{id}/status') config.add_route('api_sessionlog', '/api/watcher/bank/{id}/log/{session}') config.add_route('schedulebank','/schedule') config.add_route('updateschedulebank','/schedule/{name}') config.add_route('api_schedulebank','/api/watcher/schedule') config.add_route('api_updateschedulebank','/api/watcher/schedule/{name}') config.add_route('search', '/search') config.add_route('search_format', '/search/format/{format}') config.add_route('search_format_type', '/search/format/{format}/type/{type}') config.add_route('search_type', '/search/type/{type}') config.add_route('api_search', '/api/watcher/search') config.add_route('api_search_format', '/api/watcher/search/format/{format}') config.add_route('api_search_format_type', '/api/watcher/search/format/{format}/type/{type}') config.add_route('api_search_type', '/api/watcher/search/type/{type}') config.add_route('stat', '/stat') config.add_route('api_stat', '/api/watcher/stat') config.add_route('is_auth', '/auth') config.add_route('auth', '/auth/{id}') config.add_route('logout', '/logout') config.add_route('api_is_auth', '/api/watcher/auth') config.add_route('api_auth', '/api/watcher/auth/{id}') config.add_route('api_logout', '/api/watcher/logout') config.add_route('old_api', 'BmajWatcher/GET') config.scan() # automatically serialize bson ObjectId and datetime to Mongo extended JSON json_renderer = JSON() def pymongo_adapter(obj, request): return json_util.default(obj) json_renderer.add_adapter(ObjectId, pymongo_adapter) json_renderer.add_adapter(datetime.datetime, pymongo_adapter) config.add_renderer('json', json_renderer) return config.make_wsgi_app()
class TestBiomajSetup(unittest.TestCase): def setUp(self): self.utils = UtilsForTest() curdir = os.path.dirname(os.path.realpath(__file__)) BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) # Delete all banks b = Bank('alu') b.banks.remove({}) self.config = BiomajConfig('alu') data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) def tearDown(self): data_dir = self.config.get('data.dir') lock_file = os.path.join(data_dir,'alu.lock') if os.path.exists(lock_file): os.remove(lock_file) self.utils.clean() def test_new_bank(self): """ Checks bank init """ b = Bank('alu') def test_new_session(self): """ Checks an empty session is created """ b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) for key in b.session._session['status'].keys(): self.assertFalse(b.session.get_status(key)) def test_session_reload_notover(self): """ Checks a session is used if present """ b = Bank('alu') for i in range(1, 5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertTrue(b.session.get_status(Workflow.FLOW_INIT)) def test_clean_old_sessions(self): """ Checks a session is used if present """ b = Bank('local') for i in range(1,5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() b2 = Bank('local') b2.update() b2.clean_old_sessions() self.assertTrue(len(b2.bank['sessions']) == 1) def test_session_reload_over(self): """ Checks a session if is not over """ b = Bank('alu') for i in range(1,5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True s._session['status'][Workflow.FLOW_OVER] = True b.session = s b.save_session() b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) self.assertFalse(b.session.get_status(Workflow.FLOW_INIT)) def test_bank_list(self): b1 = Bank('alu') b2 = Bank('local') banks = Bank.list() self.assertTrue(len(banks) == 2) @attr('test') @attr('network') def test_get_release(self): """ Get release """ b = Bank('alu') b.load_session(UpdateWorkflow.FLOW) res = b.update() self.assertTrue(b.session.get('update')) self.assertTrue(res) self.assertTrue(b.session._session['release'] is not None) def test_remove_session(self): b = Bank('alu') for i in range(1,5): s = Session('alu', self.config, UpdateWorkflow.FLOW) s._session['status'][Workflow.FLOW_INIT] = True b.session = s b.save_session() self.assertTrue(len(b.bank['sessions'])==4) b.remove_session(b.session.get('id')) self.assertTrue(len(b.bank['sessions'])==3) @attr('process') def test_postprocesses_setup(self): b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run(True) self.assertTrue(len(pfactory.threads_tasks[0])==2) self.assertTrue(len(pfactory.threads_tasks[1])==1) @attr('process') def test_postprocesses_exec_again(self): """ Execute once, set a status to false, check that False processes are executed """ b = Bank('localprocess') pfactory = PostProcessFactory(b) pfactory.run() self.assertTrue(pfactory.blocks['BLOCK1']['META0']['PROC0']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC1']) self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC2']) blocks = copy.deepcopy(pfactory.blocks) blocks['BLOCK2']['META1']['PROC2'] = False pfactory2 = PostProcessFactory(b, blocks) pfactory2.run() self.assertTrue(pfactory2.blocks['BLOCK2']['META1']['PROC2']) @attr('process') def test_preprocesses(self): b = Bank('localprocess') pfactory = PreProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) @attr('process') def test_removeprocesses(self): b = Bank('localprocess') pfactory = RemoveProcessFactory(b) pfactory.run() self.assertTrue(pfactory.meta_status['META0']['PROC0']) def test_dependencies_list(self): b = Bank('computed') deps = b.get_dependencies() self.assertTrue(len(deps)==2)
class TestBiomajHTTPDownload(unittest.TestCase): """ Test HTTP downloader """ def setUp(self): self.utils = UtilsForTest() BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False) self.config = BiomajConfig('testhttp') self.http_parse = HTTPParse( self.config.get('http.parse.dir.line'), self.config.get('http.parse.file.line'), int(self.config.get('http.group.dir.name')), int(self.config.get('http.group.dir.date')), int(self.config.get('http.group.file.name')), int(self.config.get('http.group.file.date')), self.config.get('http.group.file.date_format', None), int(self.config.get('http.group.file.size'))) def tearDown(self): self.utils.clean() def test_http_list(self): httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse) (file_list, dir_list) = httpd.list() httpd.close() self.assertTrue(len(file_list) == 1) def test_http_list_dateregexp(self): #self.http_parse.file_date_format = "%%d-%%b-%%Y %%H:%%M" self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M" httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse) (file_list, dir_list) = httpd.list() httpd.close() self.assertTrue(len(file_list) == 1) def test_http_download_no_size(self): self.http_parse = HTTPParse( self.config.get('http.parse.dir.line'), self.config.get('http.parse.file.line'), int(self.config.get('http.group.dir.name')), int(self.config.get('http.group.dir.date')), int(self.config.get('http.group.file.name')), int(self.config.get('http.group.file.date')), self.config.get('http.group.file.date_format', None), -1) self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M" httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse) (file_list, dir_list) = httpd.list() httpd.match([r'^README$'], file_list, dir_list) httpd.download(self.utils.data_dir) httpd.close() self.assertTrue(len(httpd.files_to_download) == 1) def test_http_download_no_date(self): self.http_parse = HTTPParse( self.config.get('http.parse.dir.line'), self.config.get('http.parse.file.line'), int(self.config.get('http.group.dir.name')), int(self.config.get('http.group.dir.date')), int(self.config.get('http.group.file.name')), -1, self.config.get('http.group.file.date_format', None), int(self.config.get('http.group.file.size'))) httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse) (file_list, dir_list) = httpd.list() httpd.match([r'^README$'], file_list, dir_list) httpd.download(self.utils.data_dir) httpd.close() self.assertTrue(len(httpd.files_to_download) == 1) def test_http_download(self): self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M" httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/dists/', self.http_parse) (file_list, dir_list) = httpd.list() print(str(file_list)) httpd.match([r'^README$'], file_list, dir_list) httpd.download(self.utils.data_dir) httpd.close() self.assertTrue(len(httpd.files_to_download) == 1) def test_http_download_in_subdir(self): self.http_parse.file_date_format = "%%Y-%%m-%%d %%H:%%M" httpd = CurlDownload('http', 'ftp2.fr.debian.org', '/debian/', self.http_parse) (file_list, dir_list) = httpd.list() httpd.match([r'^dists/README$'], file_list, dir_list) httpd.download(self.utils.data_dir) httpd.close() self.assertTrue(len(httpd.files_to_download) == 1)