def execute_job(): config = JobExecutorConfig() make_needed_dirs(config) configure_logging(config) if config.prediction == 'BERT': match_predictor = BertMatchPredictor() elif config.prediction == 'KEYED_VECTORS': match_predictor = KeyedVectorsFormatPredictor() else: raise Exception("Wrong prediction mode") while True: logging.info("job iteration started") dir_in = config.dir_in files_names = [ f for f in gfile.ListDirectory(dir_in) if not gfile.IsDirectory(join(dir_in, f)) ] for file_name in files_names: logging.info(file_name) file_path = join(dir_in, file_name) try: match_predictor.predict(dir_in, file_name, config.dir_result) gfile.Rename(file_path, join(config.dir_success, file_name)) except Exception: logging.error(traceback.format_exc()) gfile.Rename(file_path, join(config.dir_error, file_name)) logging.info("job iteration finished") time.sleep(config.interval)
def download(uri, dst_dir): """Download the given URI. Args: uri: URI to copy (or download) from. dst_dir: path to the directory that will be used. Returns: The path to the downloaded file. """ # Download the URI # Should use context manager with Py3 (with urllib2.urlopen(uri) as response) response = urllib.request.urlopen(uri) filename = response.geturl().split('/')[-1] incomplete_path = os.path.join(dst_dir, '{}.incomplete'.format(filename)) dst_path = os.path.join(dst_dir, filename) # TODO(epot): Could add a shared tqdm instance across parallel download # to display a single shared progression bar. # TODO(b/119663674): Add Google Drive support (cf Ryan code) with gfile.Open(incomplete_path, 'wb') as f: f.write(response.read()) gfile.Rename(incomplete_path, dst_path) return dst_path
def use_incomplete_dir(trial): """Wrap the trial in a temporary .incomplete path while it is processed.""" # Replace the output dir by a temporary dir output_path_original = trial.output_path # Should add random string to avoid collision with local download manager ? output_path_tmp = trial.output_path + '.incomplete' trial.output_path = output_path_tmp yield if not trial.output_path.startswith(output_path_tmp): raise ValueError( 'The output path for {} has been modified to {} and do not match ' 'the original {}'.format(trial.id, trial.output_path, output_path_tmp)) gfile.Rename(output_path_tmp, output_path_original) output_path_extension = util.lchop(trial.output_path, output_path_tmp) trial.output_path = output_path_original + output_path_extension
def atomic_file(path): """Atomically saves data to a target path. Any existing data at the target path will be overwritten. Args: path: target path at which to save file Yields: file-like object """ with tempfile.NamedTemporaryFile() as tmp: yield tmp tmp.flush() # Necessary when the destination is on CNS. gfile.Copy(tmp.name, '%s.tmp' % path, overwrite=True) gfile.Rename('%s.tmp' % path, path, overwrite=True)