def test_download_staging_post_zip(self): """ We check that a tmp.zip file on staging post is processed correctly """ self.assertTrue(file_manager.manager.TEST_SFTP) with open(join(self.staging_post, "tmp.zip"), "w") as file_out: file_out.write("Dummy staging post test file") FileManager.dir_b = HarnessTree.get("temp_dissRequest_B") FileManager.dir_c = HarnessTree.get("temp_dissRequest_C") diss_instructions = dict() all_files_fetched = [] process_ok, instructions, files_fetched = FileManager.process_instruction_file( self.json_file) if process_ok: req_id = instructions["req_id"] hostname = instructions["hostname"] diss_instructions[req_id + hostname] = instructions all_files_fetched += [ item for item in files_fetched if item not in all_files_fetched ] #process the tmp.zip file by renaming it correctly FileManager.package_data(all_files_fetched, diss_instructions) dir_c_list = os.listdir(FileManager.dir_c) self.assertTrue(len(dir_c_list) > 0) if len(dir_c_list) > 0: file_packaged = dir_c_list[0] self.assertTrue( match(r'fr-meteo-harnaisdiss,00000,,\d+.tar.gz', file_packaged) is not None)
def test_sending(self): """ Testing FTP transfer of 5 small files """ # prepare settings SettingsManager.load_settings() SettingsManager.update(dict(harnaisLogdir=self.tmpdir, harnaisDir=self.tmpdir, harnaisAckDir=self.tmpdir, dissHost="0.0." + "0.0", dissFtpUser="******", dissFtpPasswd="12345", bandwidth=10, dissFtpDir=self.difmet_deposit, dissFtpMode=None, dissFtpPort=2121, sendFTPlimitConn=5, sendFTPIdle=10), testing=True) os.environ[ENV.settings] = join(self.tmpdir, "settings_testing.yaml") with open(os.environ[ENV.settings], "w") as file_: yaml.dump(SettingsManager._parameters, file_) setup_logging() SettingsManager.load_settings() dir_C = HarnessTree.get("temp_dissRequest_C") dir_D = HarnessTree.get("temp_dissRequest_D") #create dummy files of size 1000 bits to send for i in range(5): filename = "package_file_%i.tar.gz" % i with open(join(dir_C, filename), "wb") as file_out: size = 1000 file_out.seek(size - 1) file_out.write(b"\0") DifmetSender.process(max_loops=3) list_dwld = [ 'package_file_0.tar.gz', 'package_file_2.tar.gz', 'package_file_4.tar.gz', 'package_file_1.tar.gz', 'package_file_3.tar.gz' ] expected_result = True for file_ in os.listdir(self.difmet_deposit): expected_result = expected_result and (file_ in list_dwld) self.assertTrue(expected_result)
def process(cls, max_loops=0): cls.nb_workers = SettingsManager.get("sendFTPlimitConn") # in debug mode, it is possible to set pool_method = cls.get_pool_method() cls.pool = pool_method(processes=cls.nb_workers) counter = 0 cls.setup_process() while cls._running: counter += 1 cls.signal_loop(counter) cls.load_settings() cls.update_workers() # idle time idle_time = SettingsManager.get("sendFTPIdle") sleep(idle_time) # get settings cls.dir_c = dir_c = HarnessTree.get("temp_dissRequest_C") cls.dir_d = dir_d = HarnessTree.get("temp_dissRequest_D") # move back any remaining file from D to C cls.move_back_files() # get files in C max_files = cls.nb_workers list_files_c = cls.get_file_list(dir_c, max_files) files_to_ftp = cls.move_files(list_files_c, dir_d) for file_ in files_to_ftp: file_expired = cls.check_file_age(file_) if file_expired: # TODO we need to find a way to update the info to the database # would require looking at the file compressed though Tools.remove_file(file_, "difmet archive", LOGGER) continue size = os.stat(file_).st_size timeout = cls.compute_timeout(size, file_) # start download # renaming file to prevent any operation on it. cls.lock_file(file_) res = cls.pool.apply_async( cls.abortable_ftp, (cls.upload_file, file_, dir_c, dir_d), dict(timeout=timeout)) # for testing and debugging purpose only cls.check_end_loop(counter, max_loops)
def fetch(self, file_uri): fetch_ok = False dir_path = os.path.join(self.staging_path, file_uri) destination_dir = HarnessTree.get("temp_dissRequest_B") # move the file if hostname is localhost. Sftp it otherwise files_fetched = [] if self.hostname == "localhost" and \ os.path.isdir(dir_path) and \ not TEST_SFTP: for item in os.listdir(dir_path): file_path = os.path.join(dir_path, item) # folders are ignored if os.path.isdir(file_path): continue destination_path = self.check_zip(item, destination_dir) # if the file has already been fetched by a previous instruction file, # we don't do it again if not os.path.isfile(destination_path): LOGGER.debug("Copying file from %s to %s.", file_path, destination_path) shutil.copy(file_path, destination_path) self.update_filename(os.path.basename(destination_path)) files_fetched.append(destination_path) fetch_ok = True # case where hostname is localhost but staging post is not a directory elif self.hostname == "localhost" and \ not os.path.isdir(dir_path) and \ not TEST_SFTP: msg = ("Staging post path %s is not a directory. " "Dissemination failed" % dir_path) LOGGER.error(msg) Database.update_field_by_query("requestStatus", REQ_STATUS.failed, **dict(fullrequestId=self.req_id)) Database.update_field_by_query("message", msg, **dict(fullrequestId=self.req_id)) fetch_ok = False # SFTP case else: fetch_ok, files_fetched = self.sftp_dir(dir_path, destination_dir) return fetch_ok, files_fetched
def test_json_file(self): """ This tests check that the json instruction file is correctly created with the necessary informations """ test_diffusion = self.factory.MailDiffusion(address="*****@*****.**", headerLine="dummyHeaderLine", subject= "dummySubject", dispatchMode = "TO", attachmentMode="AS_ATTACHMENT") info = self.factory.DisseminationInfo(priority=5,SLA=6, dataPolicy="dummyDataPolicy", diffusion=test_diffusion) self.client.service.disseminate(requestId="123456", fileURI=self.staging_post, disseminationInfo=info) res_dir = HarnessTree.get("temp_dissRequest_A") json_file = join(res_dir, os.listdir(res_dir)[0]) with open(json_file, "r") as file_: info_file = json.load(file_) dict_ref = {'hostname': self.hostname, 'uri': self.staging_post, 'req_id': '123456', 'diffpriority': 81, 'date': info_file["date"], 'diffusion': {'fileName': None, 'attachmentMode': 'AS_ATTACHMENT', 'dispatchMode': 'TO', 'DiffusionType': 'EMAIL', 'subject': 'dummySubject', 'headerLine': 'dummyHeaderLine', 'address': '*****@*****.**'} } test = info_file == dict_ref if not test: info_file.pop("hostname") dict_ref.pop("hostname") test = info_file == dict_ref self.assertTrue(test)
def test_download_staging_post(self): """ We check that files on staging post get in cache/B_fromstaging """ self.assertTrue(file_manager.manager.TEST_SFTP) files_list = [] for i in range(4): filename = "A_SNFR30LFPW270700_C_LFPW_20180927070000_%i.txt" % i files_list.append(filename) with open(join(self.staging_post, filename), "w") as file_out: file_out.write("Dummy staging post test file") FileManager.process_instruction_file(self.json_file) dir_b = HarnessTree.get("temp_dissRequest_B") for filename in files_list: self.assertTrue(os.path.isfile(join(dir_b, filename)))
def test_download_large_file(self): """ We check that download a big file on staging post is not an issue """ self.assertTrue(file_manager.manager.TEST_SFTP) SettingsManager.reset() SettingsManager.load_settings() SettingsManager.update(dict(harnaisLogdir=self.tmpdir, harnaisDir=self.tmpdir, harnaisAckDir=self.tmpdir, openwisStagingPath=gettempdir(), openwisHost="localhost", openwisSftpUser="******", openwisSftpPassword="******", openwisSftpPort=3373, bandwidth=5), testing=True) with open(os.environ[ENV.settings], "w") as file_: yaml.dump(SettingsManager._parameters, file_) files_list = [] for i in range(1): filename = "A_largefile_C_LFPW_20180927070000_%i.txt" % i files_list.append(filename) with open(join(self.staging_post, filename), "wb") as file_out: size = 200 * (1 << 20) file_out.seek(size - 1) file_out.write(b"\0") FileManager.process_instruction_file(self.json_file) dir_b = HarnessTree.get("temp_dissRequest_B") for filename in files_list: self.assertTrue(os.path.isfile(join(dir_b, filename)))
def create_request_file(self): #compute priority priority = self.compute_priority(self.diss_info.priority, self.diss_info.SLA) out_dir = HarnessTree.get("temp_dissRequest_A") rec_dict = dict(hostname=self.hostname) rec_dict["priority"] = priority rec_dict["date_reception"] = strftime( "%Y%m%d%H%M%S", self.date_reception) rec_dict["requestid"] = self.req_id request_file = "{priority}_{date_reception}_{requestid}_{hostname}.json".format( **rec_dict) # full path to request file self.request_file = request_file = os.path.join(out_dir, request_file) # initialize the dictionary that will be dumped into the json file request_dump = dict(date=rec_dict["date_reception"], hostname=self.hostname, diffpriority=priority, uri=self.uri, req_id = self.req_id ) # update the dictionary with the rest of the request information request_diff = self.compile_request() request_dump.update(request_diff) LOGGER.debug("Attempting to write {file}".format(file=request_file)) with open(request_file, "w") as file_: json.dump(request_dump, file_, indent=4) LOGGER.info("Successfully wrote instruction file %s ", request_file)
def setUp(self): # Configuring repertories file_manager.manager.TEST_SFTP = True self.tmpdir = mkdtemp(prefix='harnais_') os.environ["TMPDIR"] = self.tmpdir self.staging_post = join(self.tmpdir, "staging_post") os.mkdir(self.staging_post) # # prepare settings SettingsManager.load_settings() SettingsManager.update(dict(harnaisLogdir=self.tmpdir, harnaisDir=self.tmpdir, harnaisAckDir=self.tmpdir, openwisStagingPath=gettempdir(), openwisHost="localhost", openwisSftpUser="******", openwisSftpPassword="******", openwisSftpPort=3373), testing=True) os.environ[ENV.settings] = join(self.tmpdir, "settings_testing.yaml") with open(os.environ[ENV.settings], "w") as file_: yaml.dump(SettingsManager._parameters, file_) setup_logging() # Start sftp server SFTPserver.create_server(self.staging_post) # create json file to process self.dir_a = HarnessTree.get("temp_dissRequest_A") self.json_file = json_file = join(self.dir_a, "test_instruction_file.json") instr = { 'hostname': socket.gethostname(), 'uri': self.staging_post, 'req_id': '123456', 'diffpriority': 81, 'date': datetime.now().strftime("%Y%m%d%H%M%S"), 'diffusion': { 'fileName': None, 'attachmentMode': 'AS_ATTACHMENT', 'dispatchMode': 'TO', 'DiffusionType': 'EMAIL', 'subject': 'dummySubject', 'headerLine': 'dummyHeaderLine', 'address': '*****@*****.**' } } # put it in cache/A_dissreq with open(json_file, "w") as file_: json.dump(instr, file_) # create corresponding record in database: ext_id = Tools.generate_random_string() diffusion = Diffusion(diff_externalid=ext_id, fullrequestId="123456" + socket.gethostname(), requestStatus=REQ_STATUS.ongoing, Date=datetime.now(), rxnotif=True, message="Created record in SQL database") with Database.get_app().app_context(): database = Database.get_database() database.session.add(diffusion) database.session.commit()
def process(cls, max_loops=0): counter = 0 instr_to_process = False # initilization cls.setup_process() loop_time = 0 while cls._running: counter +=1 cls.signal_loop(counter) cls.load_settings() cls.dir_a = dir_a = HarnessTree.get("temp_dissRequest_A") cls.dir_b = dir_b = HarnessTree.get("temp_dissRequest_B") cls.dir_c = HarnessTree.get("temp_dissRequest_C") start_time = time() # idle time idle_time = SettingsManager.get("processFileIdle") # if a loop lasted longer than the idle time, idle time is bypassed. if not loop_time > idle_time: sleep(idle_time) # get the maxDirectiveFile first files max_direc_files = SettingsManager.get("processFileDPmax") list_files_a = cls.get_file_list(dir_a, maxfiles=max_direc_files) instruction_files = cls.move_files(list_files_a, dir_b) if instruction_files == []: if instr_to_process: LOGGER.debug("No instruction file to process, moving on.") instr_to_process = False loop_time = time() - start_time cls.check_end_loop(counter, max_loops) continue else: LOGGER.debug("Fetched %i instruction files from %s", len(instruction_files), dir_a) instr_to_process = True # process instruction files diss_instructions = dict() all_files_fetched = [] for file_to_process in instruction_files: # empty the list, one item at a time process_ok, instructions, files_fetched = cls.process_instruction_file( file_to_process) # if the fetching went ok, we store the rest of # the instructions contained in the instruction files # and update the list of files fetched from the staging # post if process_ok: req_id = instructions["req_id"] hostname = instructions["hostname"] diss_instructions[req_id+hostname] = instructions all_files_fetched += [item for item in files_fetched if item not in all_files_fetched] # the files downloaded are packaged according to the # instructions stored in diss_instructions cls.package_data(all_files_fetched, diss_instructions) # removing instruction files processed cls.clear_instruction_files(instruction_files) # as files should have been packaged and instruction files removed # any file remaining is an orphan cls.clear_orphan_files(dir_b) # for testing and debugging purpose only cls.check_end_loop(counter, max_loops) loop_time = time() - start_time