def start(cls, spinner_type=None, sleep=0.5, running_message='Loading ', end_message='Done.'): if spinner_type is None: spinner_type = 0 if sdconfig.config.get( 'interface', 'unicode_term') == '0' else 1 # check if not cls.is_message_correct(running_message): raise SDException('SDPROGRE-001', 'Incorrect message') if not cls.is_message_correct(end_message): raise SDException('SDPROGRE-002', 'Incorrect message') cls._stop_event.clear() cls.end_message = end_message if spinner_type == 0: spinner = Spinner(running_message) elif spinner_type == 1: spinner = K2KSpinner(running_message) spinner_thread = cls.MySpinnerThread(spinner, sleep, cls._stop_event) spinner_thread.setDaemon(True) spinner_thread.start()
def get_security_dir(): if security_dir_mode == sdconst.SECURITY_DIR_TMP: security_dir = "%s/.esg" % tmp_folder elif security_dir_mode == sdconst.SECURITY_DIR_TMPUID: security_dir = "%s/%s/.esg" % (tmp_folder, str(os.getuid())) elif security_dir_mode == sdconst.SECURITY_DIR_HOME: if 'HOME' not in os.environ: raise SDException( 'SDCONFIG-120', "HOME env. var. must be set when 'security_dir_mode' is set to %s" % sdconst.SECURITY_DIR_HOME) security_dir = "%s/.esg" % os.environ['HOME'] elif security_dir_mode == sdconst.SECURITY_DIR_MIXED: wia = sdtools.who_am_i() if wia == 'ihm': if 'HOME' not in os.environ: raise SDException( 'SDCONFIG-121', "HOME env. var. must be set when 'security_dir_mode' is set to %s in a IHM context" % sdconst.SECURITY_DIR_MIXED) security_dir = "%s/.esg" % os.environ['HOME'] elif wia == 'daemon': security_dir = "%s/.esg" % tmp_folder else: assert False else: raise SDException( 'SDCONFIG-020', "Incorrect value for security_dir_mode (%s)" % security_dir_mode) return security_dir
def check_version(conn): """Upgrade the database schema if database version does not match binary version. Note This func must be light as executed each time 'synda' starts (except for special case like 'synda -V'). """ current_db_version = sddbversionutils.get_db_version(conn) if current_db_version == None: # this case is for when starting from scratch # (first startup or when db file as been removed) c = conn.cursor() c.execute("insert into version (version) values (?)", (sdapp.version, )) conn.commit() c.close() else: if current_db_version < "2.9": raise SDException( "SDDBVERS-316", "Database version too old: cannot upgrade database") if sdapp.version == current_db_version: pass # db version matches binary version, nothing to do elif sdapp.version < current_db_version: raise SDException( "SDDBVERS-317", "Binary cannot be used with this database (binary version too old)" ) elif sdapp.version > current_db_version: upgrade_db(conn, current_db_version, sdapp.version)
def get_url(self): url="{0}{1}{2}".format(self._url,self.get_limit_filter(),self.get_offset_filter()) if sdconst.IDXHOSTMARK in url: raise SDException('SDATYPES-004','host must be set at this step (url=%s)'%url) # check if len(url)>sdconfig.url_max_buffer_size: # we limit buffer size as apache server doesnt support more than 4000 chars for HTTP GET buffer raise SDException("SDATYPES-003","url is too long (%i)"%len(url)) return url
def set_scalar(dquery,name,value): if isinstance(value,list): raise SDException("SDDQUERY-003","Incorrect type (%s)"%(name,)) if name in dquery: raise SDException("SDDQUERY-002","Key already exist (%s)"%(name,)) # tricky code because scalar facet are stored as vector dquery[name]=[value]
def get_dataset_(not_found_raise_exception=False, **search_constraints): datasets = get_datasets(**search_constraints) if len(datasets) == 0: if not_found_raise_exception: raise SDException("SYNCDDAO-003", "Dataset not found") else: return None elif len(datasets) == 1: return datasets[0] else: raise SDException("SYNCDDAO-004", "Too many results")
def run(stream=None, path=None, parameter=[], index_host=None, dry_run=False, type_=sdconst.SA_TYPE_DATASET): # type management if stream is not None: sddeferredbefore.add_forced_parameter(stream, 'type', type_) else: # if stream is None, we assume 'parameter' mode # (see TAGJFJ4R4JKFFJD for more informations) sddeferredbefore.add_forced_parameter(parameter, 'type', type_) queries = sdpipeline.build_queries(stream=stream, path=path, parameter=parameter, index_host=index_host, parallel=False, load_default=False, count=True) if len(queries) < 1: raise SDException("SDQSEARC-001", "No query to process") # we don't support multiple queries because of duplicate/intersection between queries # (i.e. which num_found attribute to use (from which query..)) if len(queries) > 1: raise SDException( "SDQSEARC-100", "Too much query (multi-query is not allowed in this module, use sdquicksearch instead)" ) query = queries[0] if dry_run: request = sdtypes.Request(url=query['url'], pagination=False) print '%s' % request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: return ws_call(query) # return Response object
def update_file(file,commit=True,conn=sddb.conn): keys=['status','error_msg','sdget_status','sdget_error_msg','start_date','end_date','duration','rate','priority'] # 'url' needs to be present when 'sdnexturl' feature is enabled if sdconfig.next_url_on_error: keys.append('url') # for future: keys.append('searchapi_host') rowcount=sdsqlutils.update(file,keys,commit,conn) # check if rowcount==0: raise SDException("SYNCDDAO-121","file not found (file_id=%i)"%(i__tr.file_id,)) elif rowcount>1: raise SDException("SYNCDDAO-120","duplicate functional primary key (file_id=%i)"%(i__tr.file_id,))
def process_rfv_parameter(parameter, selection): # rfv means 'Realm Frequency n Variable' # note # - "*" wildcard character is supported for realm and frequency and variable # # sample # variable[atmos][*]=cl ta hus hur wap ua va zg clcalipso m = re.search('variables?\[(.+)\]\[(.+)\]="?([^"=]+)"?$', parameter) if (m != None): realm = m.group(1) time_frequency = m.group(2) variables = sdtools.split_values(m.group(3)) facets = {} facets["realm"] = [realm] facets["time_frequency"] = [time_frequency] facets["variable"] = variables selection.childs.append( Selection(facets=facets, filename="rfvsp") ) # add sub-selection ("rfvsp" means "Realm Frequency Variable Special Parameter") else: raise SDException("SDPARSER-002", "incorrect parameter format (%s)" % parameter)
def process_parameter(parameter, selection): if is_sfg_parameter(parameter): if is_rfv_parameter(parameter): process_rfv_parameter(parameter, selection) elif is_ffv_parameter(parameter): process_ffv_parameter(parameter, selection) else: raise SDException("SDPARSER-012", "incorrect parameter format (%s)" % parameter) else: if '=' not in parameter: # as '=' is missing, we consider it's the parameter name that is # not present. # we keep the parameter value and we will try to guess the # corresponding parameter name using 'sdinference' module in a # downstream step. # until then, we store all those pending parameters in a dedicated key param_name = sdconst.PENDING_PARAMETER param_value = [parameter] else: # key-value parameter (param_name, param_value) = parse_parameter(parameter) add_parameter(param_name, param_value, selection)
def qualitycheck_ok(dataset_versions, d): """ based on some statistics, this method accepts or deny 'latest' promotion for the dataset 'd' return false if 'd' don't seem ready to be promoted to 'latest' true if 'd' seems ready to be promoted to 'latest' """ # retrieve stats for current latest flagged version latest_dataset = dataset_versions.get_dataset_with_latest_flag_set() current_version_stats = latest_dataset.statistics # retrieve stats for candidate version for 'latest' promotion candidate_stats = sddatasetquery.get_dataset_stats(d) # assert if latest_dataset.dataset_id == d.dataset_id: raise SDException("SYDDFLAG-140", "fatal error (%i)" % d.dataset_id) # variable number quality check if candidate_stats['variable_count'] < ( current_version_stats['variable_count'] * 0.5): # if variable number drops sdlog.info("SYDDFLAG-730", "%s" % d.get_full_local_path()) return False # total file number quality check """ if candidate_stats.getFilesCount() < current_version_stats.getFilesCount(): # if file number decrease sdlog.info("SYDDFLAG-734","%s"%d.get_full_local_path()) return False """ return True
def get_scalar(facets_group,name,default=None,type_=None): """ Args type_: helper used to cast before returning the value. Returns value as scalar """ # tricky code because scalar facet are stored as vector if name in facets_group: value=facets_group[name] if isinstance(value,list): if len(value)>1: raise SDException("SDDQUERY-001","Too much values for '%s' parameter (value='%s')"%(name,str(value))) v=value[0] else: v=value casted_value=sdutils.cast(v,type_) return casted_value else: return default
def complete(files): for f in files: # the if/else block below is because this module can be used to process different metadata type (File and Dataset). if f["type"]==sdconst.SA_TYPE_FILE: transfer=sdfiledao.get_file(f['file_functional_id']) if transfer<>None: f['status']=transfer.status if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). It is usefull here for example when user wants to change priority (YES, a search-API request is needed in this case!). f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority') else: f['priority']=transfer.priority else: f['status']=sdconst.TRANSFER_STATUS_NEW if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). This is usefull here to set special priority for new files. f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority') else: f['priority']=sdconst.DEFAULT_PRIORITY elif f["type"]==sdconst.SA_TYPE_DATASET: dataset=sddatasetdao.get_dataset(dataset_functional_id=f['dataset_functional_id']) if dataset<>None: f['status']=dataset.status else: f['status']=sdconst.DATASET_STATUS_NEW else: raise SDException('SDCOMPLE-001','Incorrect type (%s)'%f["type"]) return files
def set_variable_when_empty(): transfers = [] rege = re.compile("^(.+)/([^/]+)/([^/]+)/[^/]+$") i = 0 transfers = sdrebuildquery.get_transfers__variable_null() while len(transfers ) > 0: # loop while there are still rows with variable not set # extract variable name from local_path for t in transfers: # loop over block of 1000 (optimisation not to load 300000 File objects in memory..) rege_result = rege.match( t.getLocalPath() ) # sample => MOHC/HadGEM2-ES/piControl/day/atmos/day/r1i1p1/v20110202/sfcWind/sfcWind_day_HadGEM2-ES_piControl_r1i1p1_19091201-19191130.nc if rege_result != None: t.variable = rege_result.group(3) # sample => sfcWind else: raise SDException("SDREBUIL-010", "incorrect format") # update for t in transfers: # loop over block of 1000 (optimisation not to load 300000 CTransfer objects in memory..) sdfiledao.update(t, sddb.conn) i += 1 sddb.conn.commit() transfers = sdrebuildquery.get_transfers__variable_null() SDProgressDot.print_char("|") print "" print "%i record updated" % i
def add_selection(us): # compute selection checksum from scratch l__file_checksum=sdutils.compute_checksum(us.get_selection_file_full_path()) if not exists_selection(us): # add selection in database if missing us.set_checksum(l__file_checksum) us.set_status(sdconst.SELECTION_STATUS_NEW) us.set_fullscan(True) insertSelection(us) # warning: this modify us object (set PK) else: # selection already in database from_db_us=fetch_selection(us.get_filename()) # retrieve us from DB us.set_selection_id(from_db_us.get_selection_id()) # copy DB id # check if same checksums if l__file_checksum==from_db_us.get_checksum(): # same checksum # retrieve status us.set_status(from_db_us.get_status()) us.set_checksum(from_db_us.get_checksum()) if us.get_status()==sdconst.SELECTION_STATUS_NORMAL: # nothing to do here (let (a) and (b) decide if we need fullscan) pass elif us.get_status()==sdconst.SELECTION_STATUS_MODIFIED: us.set_fullscan(True) elif us.get_status()==sdconst.SELECTION_STATUS_NEW: us.set_fullscan(True) else: raise SDException("SYNDATSEL-071","unknown status") else: # same checksum # checksum differ sdlog.info("SYNDASEL-197","%s selection has been modified (marked for fullscan)"%us.get_filename()) us.set_checksum(l__file_checksum) # update checksum us.set_status(sdconst.SELECTION_STATUS_MODIFIED) # update status update_selection(us) # add selection in selection list # TODO _selections[us.get_filename()]=us
def run(stream=None, path=None, parameter=None, index_host=None, post_pipeline_mode='file', dry_run=False): if parameter is None: parameter = [] queries = sdpipeline.build_queries(stream=stream, path=path, parameter=parameter, index_host=index_host, parallel=False, load_default=False) if len(queries) < 1: raise SDException("SDQSEARC-001", "No query to process") progress = sdsqueries.get_scalar( queries, 'progress', False, type_=bool ) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly) searchapi_host = sdsqueries.get_scalar(queries, 'searchapi_host') if dry_run: for query in queries: request = sdtypes.Request(url=query['url'], pagination=False) print '%s' % request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: try: if progress: sdtools.print_stderr( sdi18n.m0003(searchapi_host) ) # waiting message => TODO: move into ProgressThread class ProgressThread.start( sleep=0.1, running_message='', end_message='Search completed.') # spinner start mqr = process_queries(queries) metadata = mqr.to_metadata() sdlog.debug("SDQSEARC-002", "files-count=%d" % metadata.count()) metadata = sdpipeline.post_pipeline(metadata, post_pipeline_mode) sdlog.debug("SDQSEARC-004", "files-count=%d" % metadata.count()) return metadata finally: if progress: ProgressThread.stop() # spinner stop
def run(files): for file in files: protocol = sdpostpipelineutils.get_attached_parameter( file, 'protocol', sdconst.TRANSFER_PROTOCOL_HTTP) if protocol not in sdconst.TRANSFER_PROTOCOLS: raise SDException("SYNPROTO-004", "Incorrect protocol (%s)" % protocol) if protocol == sdconst.TRANSFER_PROTOCOL_GLOBUS: if 'url_globus' in file: file['url'] = file['url_globus'] elif 'url_gridftp' in file: file['url'] = file['url_gridftp'] elif 'url_http' in file: sdlog.warning('SYNPROTO-005', 'Fallback to http as globus url is missing') file['url'] = file['url_http'] elif protocol == sdconst.TRANSFER_PROTOCOL_GRIDFTP: if 'url_gridftp' in file: file['url'] = file['url_gridftp'] elif 'url_http' in file: sdlog.debug( 'SYNPROTO-002', 'Fallback to http as gridftp url is missing (%s)' % file["title"]) file['url'] = file['url_http'] elif protocol == sdconst.TRANSFER_PROTOCOL_HTTP: if 'url_http' in file: file['url'] = file['url_http'] elif 'url_gridftp' in file: sdlog.warning('SYNPROTO-001', 'Fallback to gridftp as http url is missing') file['url'] = file['url_gridftp'] else: raise SDException("SYNPROTO-003", "Incorrect protocol (%s)" % protocol) sdtools.remove_dict_items( file, ['url_globus', 'url_gridftp', 'url_http', 'url_opendap']) return files
def check_fields(files): """This func is to prevent user to set 'fields' attribute (this attribute is set only by the program, in specific cases).""" for f in files: if 'fields' in f: raise SDException( 'SDFIPIPE-002', "'fields' parameter can't be used in 'file' pipeline (fields=%s)" % f['fields'])
def compute_RTT(remote_host,count=1): """ Args count: how many ping used to compute the average RTT """ rtt=0.0 (status,stdout,stderr)=sdutils.get_status_output('ping -q -c %i %s'%(count,remote_host),shell=True) if status==0: m = re.search('.*min/avg/max/mdev = ([0-9.]+)/([0-9.]+)/([0-9.]+)/([0-9.]+) ms.*', stdout,re.MULTILINE|re.DOTALL) if m: rtt=float(m.group(2)) else: raise SDException("SYNDARTT-001","'ping' output parsing error (%s)"%(stdout,)) else: raise SDException("SYNDARTT-002","'ping' command failed (remote_host=%s,status=%i)"%(remote_host,status,)) return rtt
def compare_dn(datanode_1, datanode_2): mode = sdconfig.config.get('behaviour', 'nearest_mode') if mode == 'geolocation': return (get_distance(datanode_1) < get_distance(datanode_2)) elif mode == 'rtt': return (get_RTT(datanode_1) < get_RTT(datanode_2)) else: raise SDException("SDNEARES-001", "Incorrect nearest mode (%s)" % mode)
def __init__(self,url=None,pagination=True,limit=sdconst.SEARCH_API_CHUNKSIZE): self._url=url self.pagination=pagination if self.pagination: if sdtools.url_contains_limit_keyword(self._url): raise SDException("SDATYPES-008","assert error (url=%s)"%self._url) self.offset=0 self.limit=limit
def get_dataset(path=None, dataset_id=None, dataset_functional_id=None, conn=sddb.conn): """ TODO: if possible, remove this func and use get_dataset_() instead """ d = None c = conn.cursor() # Raise exception if having to much search keys count = 0 for va in (path, dataset_id, dataset_functional_id): if va is not None: count = count + 1 if count > 1: raise SDException( "SYNCDDAO-123", "Too much arguments (path=%s,dataset_id=%s,dataset_functional_id=%s)" % ( path, dataset_id, dataset_functional_id, )) if path is not None: q = "select * from dataset where path = '%s'" % path elif dataset_id is not None: q = "select * from dataset where dataset_id = %i" % dataset_id elif dataset_functional_id is not None: q = "select * from dataset where dataset_functional_id = '%s'" % dataset_functional_id else: raise SDException("SYNCDDAO-124", "incorrect arguments") c.execute(q) rs = c.fetchone() if rs is not None: d = sdsqlutils.get_object_from_resultset(rs, Dataset) c.close() return d
def parse_parameter(parameter): m = re.search('^([^=]+)="?([^"=]+)"?$', parameter) if (m != None): param_name = m.group(1).strip() param_value = sdtools.split_values(m.group(2)) return (param_name, param_value) else: raise SDException("SDPARSER-001", "incorrect format (%s)" % (parameter, ))
def run(files): for file in files: protocol=sdpostpipelineutils.get_attached_parameter(file,'protocol',sdconst.TRANSFER_PROTOCOL_HTTP) if protocol not in sdconst.TRANSFER_PROTOCOLS: raise SDException("SYNPROTO-004","Incorrect protocol (%s)"%protocol) if 'url_gridftp' in file and 'url_http' in file: if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP: file['url']=file['url_gridftp'] elif protocol==sdconst.TRANSFER_PROTOCOL_HTTP: file['url']=file['url_http'] else: raise SDException("SYNPROTO-003","Incorrect protocol (%s)"%protocol) elif 'url_gridftp' in file: # only gridftp if protocol==sdconst.TRANSFER_PROTOCOL_HTTP: sdlog.warning('SYNPROTO-001','Fallback to gridftp as http url is missing') file['url']=file['url_gridftp'] elif 'url_http' in file: # only http if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP: sdlog.debug('SYNPROTO-002','Fallback to http as gridftp url is missing (%s)'%file["title"]) file['url']=file['url_http'] else: # no url available to download the file # (should not be here as sdremoverow takes care of those cases) assert False sdtools.remove_dict_items(file,['url_gridftp', 'url_http', 'url_opendap']) return files
def list_to_scalar(value): if isinstance(value, list): if len(value) != 1: raise SDException("SYNDAXML-002", "Incorrect scalar value (%s,%s)" % value, key) else: return value[0] # transform to scalar and return else: # already scalar, return as is return value
def run(metadata,filter_name,filter_value,mode): if mode=='keep': po=sdpipelineprocessing.ProcessingObject(keep_matching_files,filter_name,filter_value) metadata=sdpipelineprocessing.run_pipeline(metadata,po) elif mode=='remove': po=sdpipelineprocessing.ProcessingObject(remove_matching_files,filter_name,filter_value) metadata=sdpipelineprocessing.run_pipeline(metadata,po) else: raise SDException("SDSIMPLF-002","Incorrect mode (%s)"%mode) return metadata
def selection_builder(filename): """Dev.""" # check if file exists in "selection" folder fullpath_file="%s/%s"%(g__selection_folder,filename) if not os.path.exists(fullpath_file): raise SDException("SYNDATSEL-099","file not found: %s (use \"-l\" option to list available selections)"%fullpath_file) # create selection object (from file) us=Selection(filename=filename,logger=get_logger()) return us
def exists_parameter_value(name,value,conn=sddb.conn): c = conn.cursor() c.execute("select count(1) from param where name = ? and value = ?",(name,value,)) rs=c.fetchone() count=rs[0] c.close() if count==1: return True elif count==0: return False else: raise SDException("SYNCDDAO-829","fatal error")
def use_file_timestamp_if_dataset_timestamp_is_missing(d): if 'timestamp' not in d: # timestamp doesn't exist in ESGF for this dataset # hack # # Use a dataset's (random (i.e. files have not always the same even # timestmap in one dataset, so we take one randomly)) file timestamp # as dataset's timestamp is missing in ESGF ! # Note # We do not filter replica in the query below in case the master host is not up result = sdquicksearch.run(parameter=[ 'limit=1', 'fields=%s' % timestamp_fields, 'type=File', 'dataset_id=%s' % d['instance_id'] ], post_pipeline_mode=None) li = result.get_files() if len(li) > 0: file = li[0] if 'timestamp' in file: d['timestamp'] = file['timestamp'] sdlog.info( "SDTIMEST-001", "Dataset timestamp set from one dataset's file's timestamp (dataset_functional_id=%s,file_functional_id=%s)" % (d['instance_id'], file['instance_id'])) else: raise SDException( "SDTIMEST-008", "Timestamp missing in both dataset and dataset's file(s) (%s)" % d['instance_id']) else: raise SDException( "SDTIMEST-011", "Dataset exist in ESGF, but is empty (%s)" % d['instance_id'])
def parse_metadata(buffer): """Parse result for both type (Dataset and File).""" xmldoc = None l__files = [ ] # can be real file or dataset, depending on "type" input facet if buffer is None: raise SDException("SYNDAXML-001", "Buffer is empty") try: xmldoc = json.loads(buffer) except Exception, e: raise