def srm_download_to_file(url, file_): ''' Download the file in `url` storing it in the `file_` file-like object. ''' logger = logging.getLogger('dumper.__init__') ctx = gfal2.creat_context() # pylint: disable=no-member infile = ctx.open(url, 'r') try: chunk = infile.read(CHUNK_SIZE) except GError as e: if e[1] == 70: logger.debug( 'GError(70) raised, using GRIDFTP PLUGIN:STAT_ON_OPEN=False workarround to download %s', url) ctx.set_opt_boolean('GRIDFTP PLUGIN', 'STAT_ON_OPEN', False) infile = ctx.open(url, 'r') chunk = infile.read(CHUNK_SIZE) else: raise while chunk: file_.write(chunk) chunk = infile.read(CHUNK_SIZE)
def pdm_gfal_rename(data, verbosity=logging.INFO, timeout=None): """ Rename file or directory. :param data: json-loaded dict with data {"source": url} :param verbosity: mapped from "options":{"verbosity":logging level} :param timeout: global gfal2 timeout for all operations :return: dict of a form {'Code': return code, 'Reason': reason, 'id': jobid}) """ _logger.setLevel(verbosity) rename_list = data.get('files',[]) if not rename_list: _logger.warning("No files to rename") dump_and_flush({"Reason": "No files to rename passed in", "Code": 1, 'id': ''}) return ctx = gfal2.creat_context() if timeout is not None: ctx.set_opt_integer("CORE","NAMESPACE_TIMEOUT", timeout) for jobid, src, dst in rename_list: try: res = ctx.rename(str(src), str(dst)) dump_and_flush({'Code': res, 'Reason': 'OK', 'id': jobid}) except gfal2.GError as gerror: dump_and_flush({'Code': 1, 'Reason': str(gerror), 'id': jobid}, _logger, str(gerror), logging.ERROR) return
def download(input, output): # Instantiate gfal2 ctx = gfal2.creat_context() # Set transfer parameters params = ctx.transfer_parameters() params.overwrite = True params.timeout = 300 dlCounter = 0 isOK = False print 'Try to Download ' + input while not isOK and dlCounter < 50: source = input destination = output try: r = ctx.filecopy(params, source, destination) isOK = True except Exception, e: print "Download failed : %s" % str(e) isOK = False time.sleep(20) dlCounter = dlCounter + 1
def pdm_gfal_chmod(data, permissions, verbosity=logging.INFO, timeout=None): """ Change directory/file permissions :param data: json-loaded dict with data {"source": url} :param permissions: permissions mapped from {"options":{"permissions":int}} :param verbosity: mapped from {"options":{"verbosity":logging level}} :param timeout: global gfal2 timeout for all operations :return: dict of a form {'Code': return code, 'Reason': reason, 'id': jobid}) """ _logger.setLevel(verbosity) chmod_list = data.get('files', []) if not chmod_list: _logger.warning("No files to set permissions passed in") dump_and_flush({"Reason": "No files to set permissions passed in", "Code": 1, 'id': ''}) return ctx = gfal2.creat_context() if timeout is not None: ctx.set_opt_integer("CORE","NAMESPACE_TIMEOUT", timeout) for jobid, src in chmod_list: try: res = ctx.chmod(str(src), permissions) dump_and_flush({'Code': res, 'Reason': 'OK', 'id': jobid}) except gfal2.GError as gerror: dump_and_flush({'Code': 1, 'Reason': str(gerror), 'id': jobid}, _logger, str(gerror), logging.ERROR) return
def parse_dir(remote_path, how_old=7): "Parse a single directory" context = gfal2.creat_context() to_return = {} to_return['total_size'] = 0 to_return['total_files'] = 0 to_return['old_total_size'] = 0 to_return['old_total_files'] = 0 to_return['files'] = [] files = [] st = context.stat(str(remote_path)) if not stat.S_ISDIR(st.st_mode): raise Exception("Remote file %s is not a directory" % remote_path) directory = context.opendir(str(remote_path)) st = None while True: (dirent, st) = directory.readpp() if dirent is None or dirent.d_name is None or len(dirent.d_name) == 0: break mtime = datetime.datetime.fromtimestamp(int(st.st_mtime)) if mtime > (datetime.datetime.now() - datetime.timedelta(days=how_old)): to_return['total_size'] += st.st_size to_return['total_files'] += 1 files.append({'filename': dirent.d_name, 'size': st.st_size}) if mtime < (datetime.datetime.now() - datetime.timedelta(days=how_old) ) and mtime > (datetime.datetime.now() - datetime.timedelta(days=how_old * 2)): to_return['old_total_size'] += st.st_size to_return['old_total_files'] += 1 to_return['files'] = files return to_return
def parse_dir(remote_path, how_old=7): "Parse a single directory" context = gfal2.creat_context() to_return = {} to_return['total_size'] = 0 to_return['total_files'] = 0 to_return['old_total_size'] = 0 to_return['old_total_files'] = 0 to_return['files'] = [] files = [] st = context.stat(str(remote_path)) if not stat.S_ISDIR(st.st_mode): raise Exception("Remote file %s is not a directory" % remote_path) directory = context.opendir(str(remote_path)) st = None while True: (dirent, st) = directory.readpp() if dirent is None or dirent.d_name is None or len(dirent.d_name) == 0: break mtime = datetime.datetime.fromtimestamp(int(st.st_mtime)) if mtime > (datetime.datetime.now() - datetime.timedelta(days=how_old)): to_return['total_size'] += st.st_size to_return['total_files'] += 1 files.append({'filename': dirent.d_name, 'size': st.st_size}) if mtime < (datetime.datetime.now() - datetime.timedelta(days=how_old)) and mtime > (datetime.datetime.now() - datetime.timedelta(days=how_old*2)): to_return['old_total_size'] += st.st_size to_return['old_total_files'] += 1 to_return['files'] = files return to_return
def _downloadJsonFile(self, occupancyLFN, filePath): """Download the json file at the location :param occupancyLFN: lfn for the file :param filePath: destination path for the file """ for storage in self.se.storages: try: ctx = gfal2.creat_context() params = ctx.transfer_parameters() params.overwrite = True res = storage.updateURL(occupancyLFN) if not res["OK"]: continue occupancyURL = res["Value"] ctx.filecopy(params, occupancyURL, "file://" + filePath) return except gfal2.GError as e: detailMsg = "Failed to copy file %s to destination url %s: [%d] %s" % ( occupancyURL, filePath, e.code, e.message, ) self.log.debug("Exception while copying", detailMsg) continue
def execute(self, func): def cancel(): self.context.cancel() # Set X509_ environment if --cert is used if self.params.cert: if not self.params.key: self.params.key = self.params.cert os.environ['X509_USER_CERT'] = self.params.cert os.environ['X509_USER_KEY'] = self.params.key if 'X509_USER_PROXY' in os.environ: del os.environ['X509_USER_PROXY'] #Set verbose self.__set_log_level(self.params.verbose) self.context = gfal2.creat_context() apply_option(self.context, self.params) self.context.set_user_agent("gfal2-util", VERSION) t_main = Thread(target=self.executor, args=[func]) t_main.daemon = True try: #run in another thread to be able to catch signals while C functions don't return # See rule #3 in http://docs.python.org/2/library/signal.html t_main.start() if self.params.timeout > 0: # Increment the timeout a bit so plugins have a chance to timeout themselves t_main.join(self.params.timeout + 30) else: #if join(None) is used, it doesn't catch signals while t_main.isAlive(): t_main.join(3600) #self._enable_output() if t_main.isAlive(): if self.progress_bar is not None: self.progress_bar.stop(False) sys.stderr.write('Command timed out after %d seconds!\n' % self.params.timeout) return errno.ETIMEDOUT return self.return_code except KeyboardInterrupt: sys.stderr.write("Caught keyboard interrupt. Canceling...") #ignore any other interrupt signal signal.signal(signal.SIGINT, signal.SIG_IGN) #cancel in another thread to avoid blocking us t_cancel = Thread(target=cancel) t_cancel.daemon = True # in no case hog the entire program t_cancel.start() t_cancel.join(4) if t_cancel.isAlive(): sys.stderr.write("failed to cancel after waiting some time\n") return errno.EINTR
def connect(self): """ Establishes the actual connection to the referred RSE. If we decide to use gfal, init should be done here. :raises RSEAccessDenied """ self.logger(logging.DEBUG, 'connecting to storage') gfal2.set_verbose(gfal2.verbose_level.verbose) self.__ctx = gfal2.creat_context() # pylint: disable=no-member self.__ctx.set_opt_string_list( "SRM PLUGIN", "TURL_PROTOCOLS", ["gsiftp", "rfio", "gsidcap", "dcap", "kdcap"]) self.__ctx.set_opt_string("XROOTD PLUGIN", "XRD.WANTPROT", "gsi,unix") self.__ctx.set_opt_boolean("XROOTD PLUGIN", "NORMALIZE_PATH", False) auth_configured = False if self.auth_token: self.__ctx.set_opt_string("BEARER", "TOKEN", self.auth_token) auth_configured = True # Configure gfal authentication to use the rucio client proxy if and only if gfal didn't initialize its credentials already # (https://gitlab.cern.ch/dmc/gfal2/-/blob/48cfe3476392c884b53d00799198b1238603a406/src/core/common/gfal_common.c#L79) if not auth_configured: try: self.__ctx.get_opt_string("X509", "CERT") self.__ctx.get_opt_string("X509", "KEY") auth_configured = True except gfal2.GError: # pylint: disable=no-member pass if not auth_configured: try: self.__ctx.get_opt_string("BEARER", "TOKEN") auth_configured = True except gfal2.GError: # pylint: disable=no-member pass if not auth_configured: proxy = config.config_get('client', 'client_x509_proxy', default=None, raise_exception=False) if proxy: self.logger( logging.INFO, 'Configuring authentication to use {}'.format(proxy)) self.__ctx.set_opt_string("X509", "CERT", proxy) self.__ctx.set_opt_string("X509", "KEY", proxy) if TIMEOUT: try: timeout = int(TIMEOUT) self.__ctx.set_opt_integer("HTTP PLUGIN", "OPERATION_TIMEOUT", timeout) self.__ctx.set_opt_integer("SRM PLUGIN", "OPERATION_TIMEOUT", timeout) self.__ctx.set_opt_integer("GRIDFTP PLUGIN", "OPERATION_TIMEOUT", timeout) except ValueError: self.logger(logging.ERROR, 'wrong timeout value %s', TIMEOUT)
def gfal_mkdir_main(): params = create_gfal_mkdir_parser().parse_args(sys.argv[1:]) set_verbose_mode(params) c = gfal2.creat_context() applys_option(c,params) c.mkdir(params.file_uri[0], 0755) return 0
def gfal_copy_main(): params = create_gfal_cp_parser().parse_args(sys.argv[1:]) set_verbose_mode(params) c = gfal2.creat_context() applys_option(c,params) t = c.transfer_parameters() apply_transfer_options(t,params) c.filecopy(t, params.SRC[0], params.DST[0]) return 0
def __init__(self, long=False, recursive=False, max_levels=2, context=gfal2.creat_context()): self.context = context self.long = long self.recursive = recursive self.max_levels = max_levels
def gfal_sum_main(): params = create_gfal_sum_parser().parse_args(sys.argv[1:]) set_verbose_mode(params) c = gfal2.creat_context() applys_option(c,params) my_file = params.FILE[0] r= c.checksum(my_file, params.CHECKSUM_TYPE[0]) print "%s\t%s"%(my_file, r) return 0
def copy(src_url, dst_url, overwrite=False): ctx = gfal2.creat_context() params = ctx.transfer_parameters() #params.event_callback = event_callback params.monitor_callback = ProgressMeter().__call__ params.overwrite = overwrite params.timeout = 3600 * 24 * 7 params.nbstreams = 10 # PaloAlto limits connection data rates to ~600Mbps ctx.filecopy(params, src_url, dst_url)
def gfal_rm_main(): params = create_gfal_rm_parser().parse_args(sys.argv[1:]) set_verbose_mode(params) o = sys.stdout c = gfal2.creat_context() applys_option(c,params) delete_item(c, params.file_uri[0], params, o) return 0
def __call__(self, *args, **kwargs): """ Pretty print of gfal2-util version """ version_str = "gfal2-util version %s (gfal2 %s)" % ( VERSION, gfal2.get_version()) for plugin in sorted(gfal2.creat_context().get_plugin_names()): version_str += '\n\t' + plugin print(version_str) sys.exit(0)
def test_creat_and copy(self): gfal2.set_verbose(gfal2.verbose_level.debug) c = gfal2.creat_context() c.timeout=200 self.assertTrue(t.timeout == 200 ) d = c.copy() self.assertTrue(d.timeout == 200 ) c.timeout = 20 del c self.assertTrue(d.timeout == 200 )
def __init__(self, abort_on_error=False, recursive=False, only_files=False, chmod=False): self.abort_on_error = abort_on_error self.recursive = recursive self.only_files = only_files self.chmod = chmod self.context = gfal2.creat_context()
def gfal_cat_main(): params = create_gfal_cat_parser().parse_args(sys.argv[1:]) set_verbose_mode(params) o = sys.stdout def std_writer(content): o.write(content) c = gfal2.creat_context() applys_option(c,params) internal_readder(c, params.file_uri[0], std_writer) return 0
def createGfal2Context(logLevel="normal", emulate=False): """ Create a gfal2 context object :param logLevel: string with the gfal2 log level :param emulate: boolean to be used by unit tests :return: the gfal2 context object """ if emulate: return None ctx = gfal2.creat_context() gfal2.set_verbose(gfal2.verbose_level.names[logLevel]) return ctx
def connect(self): """ Establishes the actual connection to the referred RSE. If we decide to use gfal, init should be done here. :raises RSEAccessDenied """ self.__ctx = gfal2.creat_context() # pylint: disable=no-member self.__ctx.set_opt_string_list("SRM PLUGIN", "TURL_PROTOCOLS", ["gsiftp", "rfio", "gsidcap", "dcap", "kdcap"]) self.__ctx.set_opt_string("XROOTD PLUGIN", "XRD.WANTPROT", "gsi,unix") self.__ctx.set_opt_boolean("XROOTD PLUGIN", "NORMALIZE_PATH", False)
def list_files(self, akt_grid_path=None): #Also posibble to add subfolder herer ctxt = gfal2.creat_context() if not akt_grid_path: akt_grid_path = self.gridpath try: listdir = ctxt.listdir(akt_grid_path) except: print "could not excute \ngfal-ls ", akt_grid_path, "\nShoure that this folder exists" return for f in listdir: if f.endswith('.root'): self.files.append(akt_grid_path.rstrip('/') + '/' + f)
def ls(url, dt_filter=None): ctx = gfal2.creat_context() dirp = ctx.opendir(url) ret = [] while True: dirent,stat = dirp.readpp() if dirent is None: break if dirent.d_name in ('.', '..'): continue if dt_filter is None or dirent.d_type == dt_filter: ret.append((dirent.d_name,stat)) return ret
def list_files(self, akt_grid_path = None): #Also posibble to add subfolder herer ctxt = gfal2.creat_context() if not akt_grid_path: akt_grid_path = self.gridpath try: listdir = ctxt.listdir(akt_grid_path) except: print "could not excute \ngfal-ls ",akt_grid_path,"\nShoure that this folder exists" return for f in listdir: if f.endswith('.root'): self.files.append(akt_grid_path.rstrip('/')+'/' + f)
def test_creat_and_delete_params(self): gfal2.set_verbose(gfal2.verbose_level.debug) context = gfal2.creat_context() t = context.transfer_params() self.assertTrue(t.timeout > 0 ) t.timeout = 10 self.assertTrue(t.timeout == 10 ) self.assertTrue(self.check_checksum == False) t.check_checksum = True self.assertTrue(self.check_checksum == True) self.assertTrue(t.src_spacetoken == "") t.src_spacetoken = "TOKENDTEAM" self.assertTrue(t.src_spacetoken == "TOKENDTEAM")
def context(self): pid = os.getpid() if pid not in self._contexts: self._contexts[pid] = gfal2.creat_context() # apply options for _type, args_list in self.gfal_options.items(): for args in args_list: getattr(self._contexts[pid], "set_opt_" + _type)(*args) try: yield self._contexts[pid] finally: if self.reset_context and pid in self._contexts: del self._contexts[pid] gc.collect()
def connect(self): """ Establishes the actual connection to the referred RSE. If we decide to use gfal, init should be done here. :raises RSEAccessDenied """ self.__ctx = gfal2.creat_context() # self.__ctx.set_opt_string("X509", "CERT", proxy) # self.__ctx.set_opt_string("X509", "KEY", proxy) self.__ctx.set_opt_string_list( "SRM PLUGIN", "TURL_PROTOCOLS", ["gsiftp", "rfio", "gsidcap", "dcap", "kdcap"])
def gfal_ls_main(): params = create_gfal_ls_parser().parse_args(sys.argv[1:]) set_verbose_mode(params) c = gfal2.creat_context() applys_option(c,params) # print c.get_opt_string("LFC PLUGIN","LFC_HOST") dir_content = c.listdir(params.file_uri[0]) if(params.all is False): dir_content = filter_hiden(dir_content) o = sys.stdout for file_name in dir_content: o.write("%s\n"%file_name) return 0
def gfal_save_main(): params = create_gfal_save_parser().parse_args(sys.argv[1:]) set_verbose_mode(params) def std_readder(): return sys.stdin.readline() c = gfal2.creat_context() applys_option(c,params) f = c.open(params.file_uri[0], "w") def gfal_writer(content): f.write(content) loop_save(std_readder, gfal_writer) return 0
def get_space_usage(self): """ Get RSE space usage information. :returns: a list with dict containing 'totalsize' and 'unusedsize' :raises ServiceUnavailable: if some generic error occured in the library. """ rse_name = self.rse['rse'] dest = '/tmp/rucio-gsiftp-site-size_' + rse_name space_usage_url = '' # url of space usage json, woud be nicer to have it in rse_settings agis = requests.get( 'http://atlas-agis-api.cern.ch/request/ddmendpoint/query/list/?json' ).json() agis_token = '' for res in agis: if rse_name == res['name']: agis_token = res['token'] space_usage_url = res['space_usage_url'] import gfal2 # pylint: disable=import-error gfal2.set_verbose(gfal2.verbose_level.normal) # pylint: disable=no-member try: if os.path.exists(dest): os.remove(dest) ctx = gfal2.creat_context() # pylint: disable=no-member ctx.set_opt_string_list( "SRM PLUGIN", "TURL_PROTOCOLS", ["gsiftp", "rfio", "gsidcap", "dcap", "kdcap"]) params = ctx.transfer_parameters() params.timeout = 3600 ret = ctx.filecopy(params, str(space_usage_url), str('file://' + dest)) if ret == 0: data_file = open(dest) data = json.load(data_file) data_file.close() if agis_token not in list(data.keys()): print('ERROR: space usage json has different token as key') else: totalsize = int(data[agis_token]['total_space']) used = int(data[agis_token]['used_space']) unusedsize = totalsize - used return totalsize, unusedsize except Exception as error: print(error) raise exception.ServiceUnavailable(error)
def main(args): if not args.dest.endswith("/"): raise Exception("Destination Path must be a directory " + "ending with a '/'") gfal = gfal2.creat_context() if is_dir(gfal, args.source): files = list_files(gfal, args.source) print(files) else: files = [args.source] gfal.mkdir_rec(args.dest, 0755) transfer(gfal, files, args.dest, args.output)
def context(self): # context objects are stored per pid, so create one if it does not exist yet pid = os.getpid() if pid not in self._contexts: self._contexts[pid] = ctx = gfal2.creat_context() for _type, args_list in six.iteritems(self.gfal_options): for args in args_list: getattr(ctx, "set_opt_" + _type)(*args) # yield and optionally close it which frees potentially open connections try: yield self._contexts[pid] finally: if self.atomic_contexts and pid in self._contexts: del self._contexts[pid] gc.collect()
def connect(self): """ Establishes the actual connection to the referred RSE. If we decide to use gfal, init should be done here. :raises RSEAccessDenied """ self.logger(logging.DEBUG, 'connecting to storage') gfal2.set_verbose(gfal2.verbose_level.verbose) self.__ctx = gfal2.creat_context() # pylint: disable=no-member self.__ctx.set_opt_string_list("SRM PLUGIN", "TURL_PROTOCOLS", ["gsiftp", "rfio", "gsidcap", "dcap", "kdcap"]) self.__ctx.set_opt_string("XROOTD PLUGIN", "XRD.WANTPROT", "gsi,unix") self.__ctx.set_opt_boolean("XROOTD PLUGIN", "NORMALIZE_PATH", False) if self.auth_token: self.__ctx.set_opt_string("BEARER", "TOKEN", self.auth_token)
def getFilesFromPath(self, paths, srmprefix): if 'el7' in os.uname()[2]: try: # gfal-ls from command line (i.e. subprocess) doesn't work in CC7 # fortunatley the python binding does, but it's not included in the CMSSW python libraries import gfal2 useGfal2Py = True except ImportError: if '/usr/lib64/python2.7/site-packages' not in sys.path: sys.path.append('/usr/lib64/python2.7/site-packages') try: import gfal2 except ImportError: useGfal2Py = False else: useGfal2Py = True else: useGfal2Py = False if 'X509_CERT_DIR' not in os.environ and os.path.isdir( '/etc/grid-security/certificates'): os.environ['X509_CERT_DIR'] = '/etc/grid-security/certificates' FileList = [] for path in paths: if useGfal2Py: ctx = gfal2.creat_context() dircont = ctx.listdir(srmprefix + path) files = [f for f in dircont if f.endswith('.root')] else: command = 'gfal-ls ' + srmprefix + path + " | grep root" proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) out, err = proc.communicate() if not proc.returncode == 0: print out print err exit() files = string.split(out) for file in files: FileList.append(path + "/" + file) return FileList
def connect(self): """ Establishes the actual connection to the referred RSE. If we decide to use gfal, init should be done here. :raises RSEAccessDenied """ self.__ctx = gfal2.creat_context() self.__ctx.set_opt_string_list("SRM PLUGIN", "TURL_PROTOCOLS", ["gsiftp", "rfio", "gsidcap", "dcap", "kdcap"]) endpoint_basepath = self.path2pfn(self.attributes['prefix']) try: ret = self.__gfal2_exist(endpoint_basepath) if not ret == 0: raise exception.RSEAccessDenied() except Exception as e: raise exception.RSEAccessDenied(e)
def main(): """Definition of all arguments, help function, etc. Entry point to program.""" parser = argparse.ArgumentParser( description= "List file size and chechksum. Needs a valid grid UI and proxy.", epilog= "Example: ./extract_file_data.py -d srm://gfe02.grid.hep.ph.ic.ac.uk/pnfs/hep.ph.ic.ac.uk/data/gridpp/gridpp/user/daniela.bauer -o myfiles.txt" ) parser.add_argument( "-d", "--directory", help="full path (including storage element name) to a directory") parser.add_argument( "-f", "--filename", help="full path (including storage element name) to a file") req_grp = parser.add_argument_group(title='required arguments') req_grp.add_argument('-o', "--output", required=True, help="output file name") args = parser.parse_args() # 1 is the program itself, how could I forget if len(sys.argv) != 5: print( "Please specify [either a directory or a file] and the output file for the results." ) sys.exit(0) file_descriptor = open(args.output, "w") ctxt = gfal2.creat_context() if args.directory: # print(args.directory) list_dir(ctxt, args.directory, file_descriptor) elif args.filename: single_file(ctxt, args.filename, file_descriptor) else: print("Something went wrong.") file_descriptor.close()
def add_input_files(self, dataset_nick, path_spec): _protocol = "" # local file _path = path_spec if '://' in path_spec: _protocol, _path = path_spec.split('://', 1) _files = [] if _protocol == '': _files = glob.glob(_path) elif _protocol == 'srm': import gfal2 _grid_context = gfal2.creat_context() _folder = os.path.dirname(_path) _file_pattern = os.path.basename(_path) _file_list = _grid_context.listdir(_folder) for _filename in _file_list: if fnmatch.fnmatch(_filename, _file_pattern): _files.append("{}://{}/{}".format(_protocol, _folder, _filename)) return self._input_files.setdefault(dataset_nick, []).extend(_files)
def copy_file(self, source, destination): attempts = 1 while (True): ctx = gfal2.creat_context() params = ctx.transfer_parameters() params.overwrite = True params.create_parent = True params.timeout = 300 try: print source print " attempt = " + str(attempts) r = ctx.filecopy(params, source, destination) del ctx break except Exception, e: print " !!!!!!!!!!!!---------!!!!!!!!!!!!" print "Copy failed: %s" % str(e) del ctx attempts += 1
def pdm_gfal_ls(root, depth=-1, verbosity=logging.INFO, timeout=None): """ Get a directory listing of a given depth. Depth = -1 list the filesystem for all levels. timeout is a global timeout for all gfal operations. """ # _logger.addHandler(logging.StreamHandler()) _logger.setLevel(verbosity) _logger.info("gfal listing root: %s at max depth: %d", root, depth) max_depth = max(-1, depth) ctx = gfal2.creat_context() if timeout is not None: ctx.set_opt_integer("CORE", "NAMESPACE_TIMEOUT", timeout) result = OrderedDict() # determine if the path point to a file, no recursion if True try: stat_tup = ctx.stat(root) except Exception as gfal_exc: _logger.error("Error when obtaining ctx.stat(%s) \n %s", root, gfal_exc) dump_and_flush({'Reason': str(gfal_exc), 'Code': 1, 'id': ID}) sys.exit(1) stat_dict = { k: getattr(stat_tup, k) for k, _ in inspect.getmembers(stat_tup.__class__, lambda x: isinstance(x, property)) } if stat.S_ISDIR(stat_dict['st_mode']): pdm_gfal_long_list_dir(ctx, root, result, max_depth) else: _logger.debug("Top path points to a file ...") pdm_gfal_list_file(stat_dict, root, result) if verbosity == logging.DEBUG: pp.pprint(result, stream=sys.stderr) return result
def pdm_gfal_rm(rmdict, verbosity=logging.INFO, timeout=None): """ Remove files and directories. Print json string immediately after a file is removed. :param rmdict: json-loaded dict with data {"source": url} :param verbosity: mapped from "options":{"verbosity":logging level} :param timeout: global gfal2 timeout for all operations """ # _logger.addHandler(logging.StreamHandler()) _logger.setLevel(verbosity) ctx = gfal2.creat_context() if timeout is not None: ctx.set_opt_integer("CORE", "NAMESPACE_TIMEOUT", timeout) # files file_list = rmdict.get('files', []) # list of dublets: (jobid, filename) for jobid, elem in file_list: try: res = ctx.unlink(str(elem)) dump_and_flush({'Code': res, 'Reason': 'OK', 'id': jobid}) except gfal2.GError as gerror: dump_and_flush({ 'Code': 1, 'Reason': str(gerror), 'id': jobid }, _logger, str(gerror)) # directories dir_list = rmdict.get('dirs', []) for jobid, elem in dir_list: try: res = ctx.rmdir(str(elem)) dump_and_flush({'Code': res, 'Reason': 'OK', 'id': jobid}) except gfal2.GError as gerror: dump_and_flush({ 'Code': 1, 'Reason': str(gerror), 'id': jobid }, _logger, str(gerror)) return
def check_status(surl, verbose=True): """ Obtain the status of a file from the given SURL. Args: surl (str): the SURL pointing to the file. verbose (bool): print the status to the terminal. Returns: surl, status (tuple): the SURL of a file and its status as stored in the 'user.status' attribute. Usage: >>> from state import check_status >>> filename="srm://srm.grid.sara.nl:8443/pnfs/path-to-your-file" >>> check_status(filename) """ context = gfal2.creat_context() status = context.getxattr(surl, 'user.status') if verbose: if status=='ONLINE_AND_NEARLINE' or status=='ONLINE': color="\033[32m" else: color="\033[31m" print('{:s} {:s}{:s}\033[0m'.format(surl, color, status)) return (surl, status)
def pdm_gfal_mkdir(data, permissions=0o755, verbosity=logging.INFO, timeout=None): """ Create a new directory. :param data: json-loaded dict with data {"dirs": [jobid, url]} :param permissions: directory permissions mapped from {"options":{"permissions":int}} :param verbosity: mapped from {"options":{"verbosity":logging level}} :param timeout: global gfal2 timeout for all operations :return: dict of a form {'Code': return code, 'Reason': reason, 'id': jobid}) """ _logger.setLevel(verbosity) mkdir_list = data.get('dirs', []) if not mkdir_list: _logger.warning("No directory to create passed in") dump_and_flush({ "Reason": "No directory to create passed in", "Code": 1, 'id': '' }) return ctx = gfal2.creat_context() if timeout is not None: ctx.set_opt_integer("CORE", "NAMESPACE_TIMEOUT", timeout) for jobid, elem in mkdir_list: try: res = ctx.mkdir(str(elem), permissions) dump_and_flush({'Code': res, 'Reason': 'OK', 'id': jobid}) except gfal2.GError as gerror: dump_and_flush({ 'Code': 1, 'Reason': str(gerror), 'id': jobid }, _logger, str(gerror), logging.ERROR) return
def initialize( self ): """ Setting up the crawler with a gfal2 context, a file catalog handle and an empty file dict :param self: self reference """ self.log = gLogger.getSubLogger( "readFederation", True ) self.gfal2 = gfal2.creat_context() self.rootURL = 'http://federation.desy.de/fed/lhcb/LHCb/Collision10' self.dedicatedSE = [] #['CNAF_M-DST','IN2P3_M-DST','CERN-USER'] self.fileList = [] self.history = [] res = self.__instantiateSEs() if not res['OK']: return S_ERROR('Failed to instantiate Storage Elements.') self.SEDict = res['Value'] self.successfulFiles = {} self.failedFiles = {} self.failedHostKey = {} self.failedDirectories = [] self.scannedFiles = 0 self.scannedDirectories = 0 #if a gfal2 operation fails for other reasons than NOEXIST we try again in 4 seconds self.sleepTime = 0 #maximum number of tries that gfal2 takes to get information form a server self.max_tries = 10 self.recursionLevel = 0 # check if there is a checkpoint file, if yes read it as last history self.log.debug("readFederation.initialize: Loading checkpoint if available.") if os.path.isfile('checkpoint.txt'): self.log.debug("readFederation.initialize: Loading checkpoint.") with open('checkpoint.txt') as f: self.history = f.read().splitlines() return S_OK()
def generate_torrent(src, piece_length, tracker): """ Returns a torrent file generated from the HTTP source, with the given piece length """ url_src = urlparse(src) filename = os.path.basename(url_src.path) ctx = gfal2.creat_context() log.debug("Stating %s" % src) stat = ctx.stat(src) log.info("Size %d" % stat.st_size) hash_chain = StringIO() # Poor man's approach # We could use threads to make this more efficient, but anyway, this is just # a prototype log.debug("Opening file") fd = ctx.open(src, 'r') buffer = fd.read(piece_length) total_read = 0 while buffer: chunk_read = len(buffer) total_read += chunk_read log.debug("Read %d (%d)", total_read, chunk_read) hash_chain.write(hashlib.sha1(buffer).digest()) buffer = fd.read(piece_length) torrent = { 'announce': tracker, 'url-list': src, 'info': { 'name': filename, 'length': stat.st_size, 'piece length': piece_length, 'pieces': hash_chain.getvalue(), } } return bencode.bencode(torrent)
def check_status(surl_link, verbose=True): """ Obtain the status of a file from the given surl. Args: :param surl: the SURL pointing to the file. :type surl: str :parame verbose: print the status to the terminal. :type verbose: bool Returns: :(filename, status): a tuple containing the file and status as stored in the 'user.status' attribute. """ context = gfal.creat_context() status = context.getxattr(surl_link, 'user.status') filename = surl_link.split('/')[-1] if status == 'ONLINE_AND_NEARLINE' or status == 'ONLINE': color = "\033[32m" else: color = "\033[31m" if verbose: print('{:s} is {:s}{:s}\033[0m'.format(filename, color, status)) return (filename, status.strip())
def stage_srm(surl, pintime, timeout, asynch=True): context = gfal.creat_context() (errors, token) = context.bring_online(surl, pintime, timeout, asynch) return errors, token
# ------------------------------------------------------------------------- # Parameters check # ------------------------------------------------------------------------- (options, args) = optParser.parse_args() # Check options validity if options.url = = '': optParser.error("Option --url is mandatory.") exit(1) outputToFile = options.output_file != '' # Define gfal2 context as a global variable global context context = gfal2.creat_context() # Method that format a stat.st_mode item into `ls -l` like permissions def mode_to_rights(st_mode) : # Variable containing the result permission string permstr = '' # Set the file type: # d for directory # l for symbolic link # - for files if stat.S_ISDIR(st_mode): permstr + = 'd' else: if stat.S_ISLNK(st_mode):
def setUp(self): self.context = gfal2.creat_context()
def test_creat_and_delete_instance(self): gfal2.set_verbose(gfal2.verbose_level.debug) context = gfal2.creat_context() del context
def gfal_exec(method, args, nonerrors = {}, return_value = False): """ GFAL2 execution function @param method Name of the Gfal2Context method to execute. @param args Tuple of arguments to pass to the method @param nonerrors Dictionary of error code translation for non-errors. @param return_value If True, simply return the return value of the function. @return (exit code, start time, finish time, error message, log string) """ start_time = None finish_time = None log = '' for attempt in xrange(5): # gfal2 knows to write to the logger. Redirect to StringIO and dump the full log at the end. stream = cStringIO.StringIO() LOG.handlers.pop() handler = logging.StreamHandler(stream) handler.setFormatter(logging.Formatter(fmt = '%(asctime)s: %(message)s')) LOG.addHandler(handler) start_time = int(time.time()) try: gfal2.set_verbose(gfal2.verbose_level.verbose) context = gfal2.creat_context() result = getattr(gfal2.Gfal2Context, method)(context, *args) finish_time = int(time.time()) except gfal2.GError as err: if return_value: raise exitcode, msg = err.code, str(err) c = find_msg_code(msg) if c is not None: exitcode = c if exitcode in nonerrors: return 0, start_time, int(time.time()), nonerrors[exitcode], '' elif exitcode in irrecoverable_errors: break except Exception as exc: if return_value: raise exitcode, msg = -1, str(exc) else: exitcode, msg = 0, None finally: handler.flush() log_tmp = stream.getvalue().strip() # give a nice indent to each line log = ''.join(' %s\n' % line for line in log_tmp.split('\n')) stream.close() break if return_value: return result else: # all variables would be defined even when all attempts are exhausted return exitcode, start_time, finish_time, msg, log
def __init__(self): # set up gfal self.__gfalctxt = gfal2.creat_context() # setup DIRAC file catalogue self.__fc = FileCatalog()