def _scp_upload(self, options, tarball): # TODO: check if host is in fact this server; if so, # simply move tarball and unpack it rather than scp it; port = str(options['port']) remote_server = "{}@{}".format(options['user'], options['host']) destination = "{}:{}".format(remote_server, options['dest_dir']) # Note: there are various ways of doing this: a) call scp directly, # b) use paramiko, c) use fabric, d) etc..... # See http://stackoverflow.com/questions/68335/how-do-i-copy-a-file-to-a-remote-server-in-python-using-scp-or-ssh # for examples # Note: ssh/scp error output (ex. # 'ssh: connect to host 127.0.0.1 port 2222: Connection refused') # is captured by piping stderr to stdout in each check_output call logging.info("Creating remote destination {}".format( options['dest_dir'])) io.SubprocessExecutor().execute([ 'ssh', '-o', 'StrictHostKeyChecking=no', remote_server, '-p', port, 'mkdir', '-p', options['dest_dir'] ]) logging.info("Uploading {} via scp".format(tarball)) io.SubprocessExecutor().execute([ 'scp', '-o', 'StrictHostKeyChecking=no', '-P', port, tarball, destination ]) return { "destination": destination, "tarball": os.path.basename(tarball) }
def test_invalid_executable_post_logging(self, monkeypatch): self.monkeypatch_logging(monkeypatch) with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute(['lsdflsdf', 'sdf'], realtime_logging=False) assert e_info.value.args[ 0] == "[Errno 2] No such file or directory: 'lsdflsdf'" assert self.msgs == [((40, '%s: %s', 'lsdflsdf', "No such file or directory: 'lsdflsdf'"), {})] self.msgs = [] # reset with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute('lsdflsdf', 'sdf', realtime_logging=False) assert e_info.value.args[ 0] == "[Errno 2] No such file or directory: 'lsdflsdf'" assert self.msgs == [((40, '%s: %s', 'lsdflsdf', "No such file or directory: 'lsdflsdf'"), {})] self.msgs = [] # reset with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute('lsdflsdf sdf', realtime_logging=False) assert e_info.value.args[ 0] == "[Errno 2] No such file or directory: 'lsdflsdf'" assert self.msgs == [((40, '%s: %s', 'lsdflsdf', "No such file or directory: 'lsdflsdf'"), {})]
def test_invalid_command_realtime_logging(self, monkeypatch): self.monkeypatch_logging(monkeypatch) with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute(['ls', 'dfjre'], realtime_logging=True) assert e_info.value.args[ 0] == "ls: cannot access 'dfjre': No such file or directory" assert self.msgs == [ ((logging.ERROR, '%s: %s', 'ls', "ls: cannot access 'dfjre': No such file or directory"), {}) ] self.msgs = [] # reset with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute('ls', 'dfjre', realtime_logging=True) assert e_info.value.args[ 0] == "ls: cannot access 'dfjre': No such file or directory" assert self.msgs == [ ((logging.ERROR, '%s: %s', 'ls', "ls: cannot access 'dfjre': No such file or directory"), {}) ] self.msgs = [] # reset with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute('ls dfjre', realtime_logging=True) assert e_info.value.args[ 0] == "ls: cannot access 'dfjre': No such file or directory" assert self.msgs == [ ((logging.ERROR, '%s: %s', 'ls', "ls: cannot access 'dfjre': No such file or directory"), {}) ]
def test_success_realtime_logging(self, monkeypatch): self.monkeypatch_logging(monkeypatch) io.SubprocessExecutor().execute(['echo', 'hello'], realtime_logging=True) assert self.msgs == [((10, '%s: %s', 'echo', 'hello'), {})] self.msgs = [] # reset io.SubprocessExecutor().execute('echo', 'hello', realtime_logging=True) assert self.msgs == [((10, '%s: %s', 'echo', 'hello'), {})] self.msgs = [] # reset io.SubprocessExecutor().execute('echo hello', realtime_logging=True) assert self.msgs == [((10, '%s: %s', 'echo', 'hello'), {})]
def _scp_unpack(self, options, tarball): remote_server = "{}@{}".format(options['user'], options['host']) port = str(options['port']) tarball_filename = os.path.basename(tarball) logging.info("Extracting {} on {} in {}".format( tarball, remote_server, options['dest_dir'])) cmd = "ssh {} -p {} 'cd {} && tar xzf {}'".format( remote_server, port, options['dest_dir'], tarball_filename) io.SubprocessExecutor().execute(cmd)
def _local_cp(self, tarball, dest_dir): if not os.path.isdir(dest_dir): os.makedirs(dest_dir) # TODO: do we need to copy tarball too? shutil.copy(tarball, dest_dir) # TODO: use tarfile module io.SubprocessExecutor().execute( ['tar', 'xzf', tarball, '-C', dest_dir])
def run(self): args = [ 'ogr2ogr', '-f', 'kml', self._kml_file_name, self._geojson_file_name ] io.SubprocessExecutor().execute(*args) return { "output": { 'kml_file_name': os.path.basename(self._kml_file_name), 'directory': os.path.dirname(self._kml_file_name), } }
def test_invalid_command_post_logging(self, monkeypatch): self.monkeypatch_logging(monkeypatch) with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute(['ls', 'dfjre'], realtime_logging=False) assert e_info.value.args[ 0] == "ls: cannot access 'dfjre': No such file or directory" assert self.msgs == [ # it's DEBUG because the log-after-execution code doesn't (yet) # distinguish between stdout and stderr when the command returns # error (as opposed to trying to run an invalid executable) ((logging.DEBUG, '%s: %s', 'ls', "ls: cannot access 'dfjre': No such file or directory"), {}) ] self.msgs = [] # reset with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute('ls', 'dfjre', realtime_logging=False) assert e_info.value.args[ 0] == "ls: cannot access 'dfjre': No such file or directory" assert self.msgs == [ ((logging.DEBUG, '%s: %s', 'ls', "ls: cannot access 'dfjre': No such file or directory"), {}) ] self.msgs = [] # reset with raises(BlueSkySubprocessError) as e_info: io.SubprocessExecutor().execute('ls dfjre', realtime_logging=False) assert e_info.value.args[ 0] == "ls: cannot access 'dfjre': No such file or directory" assert self.msgs == [ ((logging.DEBUG, '%s: %s', 'ls', "ls: cannot access 'dfjre': No such file or directory"), {}) ]
def _run_process(self, fires, working_dir, tranche_num=None): hysplit_utils.ensure_tranch_has_dummy_fire(fires, self._model_start, self._num_hours, self._grid_params) logging.info("Running one HYSPLIT49 Dispersion model process") # TODO: set all but fires, working_dir, and tranche_num as instance # properties in self.run so that they don't have to be passed into # each call to _run_process. # The only things that change from call to call are working_dir, # fires, and tranche_num self._create_sym_links_for_process(working_dir) emissions_file = os.path.join(working_dir, "EMISS.CFG") control_file = os.path.join(working_dir, "CONTROL") setup_file = os.path.join(working_dir, "SETUP.CFG") message_files = [os.path.join(working_dir, "MESSAGE")] output_conc_file = os.path.join(working_dir, "hysplit.con") output_file = os.path.join(working_dir, self._output_file_name) # NINIT: sets how particle init file is to be used # 0 = no particle initialization file read (default) # 1 = read parinit file only once at initialization time # 2 = check each hour, if there is a match then read those values in # 3 = like '2' but replace emissions instead of adding to existing # particles ninit_val = int(self.config("NINIT") or 0) # need an input file if ninit_val > 0 if ninit_val > 0: # name of pardump input file, parinit (check for strftime strings) parinit = self.config("PARINIT") if "%" in parinit: parinit = self._model_start.strftime(parinit) if tranche_num is not None: parinit = parinit + "-" + str(tranche_num).zfill(2) parinitFiles = [ parinit ] # if an MPI run need to create the full list of expected files # based on the number of CPUs if self.config("MPI"): NCPUS = self.config("NCPUS") parinitFiles = ["%s.%3.3i" % ( parinit, (i+1)) for i in range(NCPUS)] # loop over parinitFiles check if exists. # for MPI runs check that all files exist...if any in the list # don't exist raise exception if STOP_IF_NO_PARINIT is True # if STOP_IF_NO_PARINIT is False and all/some files don't exist, # set ninit_val to 0 and issue warning. for f in parinitFiles: if not os.path.exists(f): if self.config("STOP_IF_NO_PARINIT"): msg = "Matching particle init file, %s, not found. Stop." % f raise Exception(msg) msg = "No matching particle initialization file, %s, found; Using no particle initialization" % f logging.warning(msg) logging.debug(msg) ninit_val = 0 self._has_parinit.append(False) else: logging.info("Using particle initialization file %s" % f) self._has_parinit.append(True) # Prepare for run ... get pardump name just in case needed pardump = self.config("PARDUMP") if "%" in pardump: pardump = self._model_start.strftime(pardump) if tranche_num is not None: pardump = pardump + '-' + str(tranche_num).zfill(2) pardumpFiles = [ pardump ] # If MPI run if self.config("MPI"): NCPUS = self.config("NCPUS") logging.info("Running MPI HYSPLIT with %s processors." % NCPUS) if NCPUS < 1: logging.warning("Invalid NCPUS specified...resetting NCPUS to 1 for this run.") NCPUS = 1 message_files = ["MESSAGE.%3.3i" % (i+1) for i in range(NCPUS)] # name of the pardump files (one for each CPU) if self.config("MAKE_INIT_FILE"): pardumpFiles = ["%s.%3.3i" % ( pardump, (i+1)) for i in range(NCPUS)] # what command do we use to issue an mpi version of hysplit # TODO: either update the following checks for self.BINARIES['MPI'] and # self.BINARIES['HYSPLIT_MPI'] to try running with -v or -h option or # something similar, or remove them # if not os.path.isfile(self.BINARIES['MPI']): # msg = "Failed to find %s. Check self.BINARIES['MPI'] setting and/or your MPICH2 installation." % mpiexec # raise AssertionError(msg) # if not os.path.isfile(self.BINARIES['HYSPLIT_MPI']): # msg = "HYSPLIT MPI executable %s not found." % self.BINARIES['HYSPLIT_MPI'] # raise AssertionError(msg) # Else single cpu run else: NCPUS = 1 self._write_emissions(fires, emissions_file) self._write_control_file(fires, control_file, output_conc_file) self._write_setup_file(fires, emissions_file, setup_file, ninit_val, NCPUS, tranche_num) try: # Run HYSPLIT if self.config("MPI"): args = [self.BINARIES['MPI']] if self.BINARIES['MPI'] == 'mpiexec': # In case docker is being used, use '--allow-run-as-root' # with `mpiexec` binary. (mpiexec.hydra doesn't need # or even support it.) args.append("--allow-run-as-root") args.extend(["-n", str(NCPUS), self.BINARIES['HYSPLIT_MPI']]) io.SubprocessExecutor().execute(*args, cwd=working_dir) else: # standard serial run io.SubprocessExecutor().execute(self.BINARIES['HYSPLIT'], cwd=working_dir) if not os.path.exists(output_conc_file): msg = "HYSPLIT failed, check MESSAGE file for details" raise AssertionError(msg) self._archive_file(output_conc_file, tranche_num=tranche_num) if self.config('CONVERT_HYSPLIT2NETCDF'): logging.info("Converting HYSPLIT output to NetCDF format: %s -> %s" % (output_conc_file, output_file)) io.SubprocessExecutor().execute(self.BINARIES['HYSPLIT2NETCDF'], "-I" + output_conc_file, "-O" + os.path.basename(output_file), "-X1000000.0", # Scale factor to convert from grams to micrograms "-D1", # Debug flag "-L-1", # Lx is x layers. x=-1 for all layers...breaks KML output for multiple layers cwd=working_dir ) if not os.path.exists(output_file): msg = "Unable to convert HYSPLIT concentration file to NetCDF format" raise AssertionError(msg) self._archive_file(output_file, tranche_num=tranche_num) finally: # Archive input files self._archive_file(emissions_file, tranche_num=tranche_num) self._archive_file(control_file, tranche_num=tranche_num) self._archive_file(setup_file, tranche_num=tranche_num) # Archive data files for f in message_files: self._archive_file(f, tranche_num=tranche_num) if self.config("MAKE_INIT_FILE") and self.config('archive_pardump_files'): for f in pardumpFiles: self._archive_file(f, tranche_num=tranche_num)
def _run_parallel(self, working_dir): runner = self class T(threading.Thread): def __init__(self, fires, config, working_dir, tranche_num): super(T, self).__init__() self.fires = fires self.config = config self.working_dir = working_dir if not os.path.exists(working_dir): os.makedirs(working_dir) self.tranche_num = tranche_num self.exc = None def run(self): # We need to set config to what was loaded in the main thread. # Otherwise, we'll just be using defaults Config().set(self.config) try: runner._run_process(self.fires, self.working_dir, self.tranche_num) except Exception as e: self.exc = e fire_tranches = hysplit_utils.create_fire_tranches( self._fire_sets, self._num_processes) threads = [] main_thread_config = Config().get() for nproc in range(len(fire_tranches)): fires = fire_tranches[nproc] # Note: no need to set _context.basedir; it will be set to workdir logging.info("Starting thread to run HYSPLIT on %d fires." % (len(fires))) t = T(fires, main_thread_config, os.path.join(working_dir, str(nproc)), nproc) t.start() threads.append(t) # If there were any exceptions, raise one of them after joining all threads exc = None for t in threads: t.join() if t.exc: exc = t.exc # TODO: just raise exception here, possibly before all threads have been joined? if exc: raise exc # 'ttl' is sum of values; see http://nco.sourceforge.net/nco.html#Operation-Types # sum together all the PM2.5 fields then append the TFLAG field from # one of the individual runs (they're all the same) # using run 0 as it should always be present regardless of how many # processes were used.... # prevents ncea from adding all the TFLAGs together and mucking up the # date output_file = os.path.join(working_dir, self._output_file_name) #ncea_args = ["-y", "ttl", "-O"] ncea_args = ["-O","-v","PM25","-y","ttl"] ncea_args.extend(["%d/%s" % (i, self._output_file_name) for i in range(self._num_processes)]) ncea_args.append(output_file) io.SubprocessExecutor().execute(self.BINARIES['NCEA'], *ncea_args, cwd=working_dir) ncks_args = ["-A","-v","TFLAG"] ncks_args.append("0/%s" % (self._output_file_name)) ncks_args.append(output_file) io.SubprocessExecutor().execute(self.BINARIES['NCKS'], *ncks_args, cwd=working_dir) self._archive_file(output_file)
def _run(self, wdir): """Runs vsmoke args: - wdir -- working directory """ self._set_input_file_vars(wdir) self._set_kml_vars(wdir) # For each fire run VSMOKE and VSMOKEGIS for fire in self._fires: # TODO: check to make sure start+num_hours is within fire's # activity windows in_var = INPUTVariables(fire) # TODO: remove following line nad just use fire.utc_offset # instead of 'timezone' in all subsequent code timezone = fire.utc_offset # Get emissions for fire if not fire.timeprofiled_emissions or not fire.consumption: continue logging.debug("%d hour run time for fireID %s", self._num_hours, fire["id"]) # Run VSMOKE GIS for each hour for hr in range(self._num_hours): local_dt = self._compute_local_dt(fire, hr) self._write_iso_input(fire, local_dt, in_var) io.SubprocessExecutor().execute(self.BINARIES['VSMOKEGIS'], cwd=wdir) # TODO: replace 'hr' with 'local_dt' suffix = "{}_hour{}".format(fire.id, str(hr + 1)) self._archive_file("vsmkgs.iso", src_dir=wdir, suffix=suffix) self._archive_file("vsmkgs.opt", src_dir=wdir, suffix=suffix) self._archive_file("vsmkgs.ipt", src_dir=wdir, suffix=suffix) iso_file = os.path.join(wdir, "vsmkgs.iso") # Make KML file kml_name = in_var.fireID + "_" + str(hr + 1) + ".kml" kml_path = os.path.join(wdir, kml_name) self._build_kml(kml_path, in_var, iso_file) self._kmz_files.append(kml_path) self._my_kmz.add_kml(kml_name, fire, hr) pm25 = fire.timeprofiled_emissions[local_dt]['PM2.5'] self._add_geo_json(in_var, iso_file, fire['id'], timezone, hr, pm25) # Write input files self._write_input(fire, in_var) # Run VSMOKE for fire io.SubprocessExecutor().execute(self.BINARIES['VSMOKE'], cwd=wdir) # Rename input and output files and archive self._archive_file("VSMOKE.IPT", src_dir=wdir, suffix=fire.id) self._archive_file("VSMOKE.OUT", src_dir=wdir, suffix=fire.id) # Make KMZ file self._my_kmz.write() z = zipfile.ZipFile(self._kmz_filename, 'w', zipfile.ZIP_DEFLATED) for kml in self._kmz_files: if os.path.exists(kml): z.write(kml, os.path.basename(kml)) else: logging.error( 'Failure while trying to write KMZ file -- KML file does not exist' ) logging.debug('File "%s" does not exist', kml) z.write(self._legend_image, os.path.basename(self._legend_image)) z.close() r = {"output": {"kmz_filename": self._kmz_filename}} json_file_name = self._create_geo_json(wdir) if json_file_name: r['output']['json_file_name'] = json_file_name # TODO: anytheing else to include in response return r
def _run_hysplit(self, wdir): io.SubprocessExecutor().execute(self._config['binary'], cwd=wdir)
def test_invalid_args(self): with raises(ValueError) as e_info: io.SubprocessExecutor().execute(123) assert e_info.value.args[ 0] == "Invalid args for SubprocessExecutor.execute: (123,)"