def add_missing_attributes(files): # For some files, there is no 'tracking_id' attribute set. We have to set # it to NULL as this informations is needed during database insertion # (otherwise KeyError exception occurs). for f in files: if 'tracking_id' not in f: f['tracking_id'] = None # For some files, there is no 'checksum' attribute set. We have to set it # to NULL as this informations is needed during database insertion # (otherwise KeyError exception occurs). for f in files: if 'checksum' not in f: if sdconfig.log_domain_inconsistency: sdlog.warning("SDPRFIAT-001", "File have no checksum (%s)" % (f["file_functional_id"], ), logger_name=sdconst.LOGGER_DOMAIN) f['checksum'] = None f['checksum_type'] = None return files
def filter(files): keep=[] reject=[] if len(files)>0: # retrieve type file_=files[0] # 'type' is the same for all files type_=file_['type'] # 'type' itself IS scalar if type_=='File': for f in files: variable=f.get('variable',[]) assert isinstance(variable,list) if len(variable)==1: keep.append(f) else: reject.append(f) sdlog.warning("SDPOSXPC-002","WARNING: '%s' file is malformed ('variable' attribute contains too much values)."%f['id'],stderr=True) elif type_=='Dataset': # currently, there is no reject rules for Dataset type, so we keep all of them for f in files: keep.append(f) return (keep,reject)
def run_helper(queries): """ notes - "queries" is non-threadsafe (i.e. not a Queue), but doesn't matter as threads do not use it """ total_query_to_process=len(queries) sdlog.debug("SDPROXMT-003","%d search-API queries to process (max_thread_per_host=%d,timeout=%d)"%(total_query_to_process,max_thread_per_host,sdconst.SEARCH_API_HTTP_TIMEOUT)) while True: if sdconfig.proxymt_progress_stat: sdlog.info("SDPROXMT-033","threads per host: %s"%",".join(['%s=%s'%(host,len(searchAPIServices[host]['threadlist'])) for host in searchAPIServices.keys()])) if len(queries)>0: distribute_queries(queries) else: # leave the loop only if all threads completed if all_threads_completed(): break # remove completed threads from list for host in searchAPIServices.keys(): li=[] for t in searchAPIServices[host]['threadlist']: if t.is_alive(): li.append(t) searchAPIServices[host]['threadlist']=li # log total_query_already_processed = total_query_to_process - len(queries) if total_query_to_process > 0: # display progress only when there are a lot of queries if len(queries) > 0: # display progress only when still query to process sdlog.info("SDPROXMT-004","total_queries=%d, running_or_done_queries=%d, waiting_queries=%d"%(total_query_to_process,total_query_already_processed,len(queries))) # if all services are busy, we sleep to limit loop speed # (note that all the code around the "sleep" call is to detect system overload) sleep_time=10 warning_threshold=5 # threshold not to emit warning for every small load exceedance befo=time.time() time.sleep(sleep_time) afte=time.time() diff=afte-befo if diff>sleep_time+warning_threshold: sdlog.warning("SDPROXMT-005","WARNING: system overload detected (sleep takes %d second to complete)."%diff) # retrieve result from output queue metadata=sdtypes.Metadata() while not __result_queue.empty(): success=__result_queue.get(False) # retrieve result from ONE successful search-API call success.connect() # TAGKLK434L3K34K metadata.slurp(success) # warning: success is modified here # retrieve error from output queue and insert them into a list errors=[] while not __error_queue.empty(): query=__error_queue.get(False) errors.append(query) return (metadata,errors)
def dataset_complete_event(project,model,dataset,commit=True): sdlog.log("SYDEVENT-004","'dataset_complete_event' triggered (%s)"%dataset.dataset_functional_id,event_triggered_log_level) if project=='CMIP5': (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2): d1=sddatasetdao.get_dataset(path=ds_path_output1) d2=sddatasetdao.get_dataset(path=ds_path_output2) if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE: dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path) dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) if d1.latest and d2.latest: latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) elif not d1.latest and not d2.latest: non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) else: sdlog.warning("SYDEVENT-032","Event not triggered as one product is latest while the other product is not") # TODO: is this the right way to handle this case ? else: dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path) dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) if dataset.latest: latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) else: non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) # <<<--- 'latest' flag management related code begin # store current 'latest' flag state old_latest=dataset.latest # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0) # tag4342342 # compute new 'latest' flag if not old_latest: # old state is not latest sddatasetflag.update_latest_flag(dataset) # warning: this method modifies the dataset in memory (and in database too) else: # nothing to do concerning the 'latest' flag as the current dataset is already the latest # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!) pass # store new 'latest' flag state new_latest=dataset.latest # --->>> 'latest' flag management related code end # cascade 2 if (not old_latest) and new_latest: dataset_latest_event(project,model,dataset.path,commit=commit) # trigger 'dataset_latest' event
def check_coherency(facets_groups): for facets_group in facets_groups: if 'time_frequency' in facets_group: if 'ensemble' in facets_group: if "fx" in facets_group['time_frequency']: if "r0i0p0" not in facets_group['ensemble']: # print a warning, because 'r0i0p0' is the only choice for fx frequency sdlog.warning("SYDCHECK-003","'fx' frequency is used, but ensemble do not include 'r0i0p0'") return facets_groups
def remove_malformed_dataset_version(files): keep=[] reject=[] # not used for f in files: if sdidtest.is_version_number(f["dataset_version"]): keep.append(f) else: sdlog.warning("SDPREPAR-003","Incorrect dataset version ('%s')"%(f["dataset_functional_id"],),stderr=False) reject.append(f) return keep
def remove_empty_files(path): for p in sdtools.walk_backward_without_sibling(path): for name in os.listdir(p): f = '%s/%s' % (p,name) if not ignore(f): # this is not to remove files at top of the tree, not related with synda (e.g. every hidden file in HOME dir) if os.path.isfile(f): if not os.path.islink(f): if os.path.getsize(f)==0: try: sdlog.info("SYNCLEAN-090","Remove empty file (%s)"%(f,)) os.remove(f) except Exception as e: sdlog.warning("SYNCLEAN-040","Error occurs during file deletion (%s,%s)"%(f,str(e)))
def _reload_parameters(parameters): for pname, pvalues in parameters.iteritems(): for i, item in enumerate(pvalues): try: if item is None: sddao.add_parameter_value(pname, None, commit=False) else: sddao.add_parameter_value(pname, item.name, commit=False) except IntegrityError: sdlog.warning( 'SDDCACHE-003', 'Value {} has duplicate in the db. Warn your datanode manager' .format(item.name)) sddb.conn.commit()
def remove_malformed_dataset_version(files): keep = [] reject = [] # not used for f in files: if sdidtest.is_version_number(f["dataset_version"]): keep.append(f) else: sdlog.warning("SDPREPAR-003", "Incorrect dataset version ('%s')" % (f["dataset_functional_id"], ), stderr=False) reject.append(f) return keep
def run(files): for file in files: protocol = sdpostpipelineutils.get_attached_parameter( file, 'protocol', sdconst.TRANSFER_PROTOCOL_HTTP) if protocol not in sdconst.TRANSFER_PROTOCOLS: raise SDException("SYNPROTO-004", "Incorrect protocol (%s)" % protocol) if protocol == sdconst.TRANSFER_PROTOCOL_GLOBUS: if 'url_globus' in file: file['url'] = file['url_globus'] elif 'url_gridftp' in file: file['url'] = file['url_gridftp'] elif 'url_http' in file: sdlog.warning('SYNPROTO-005', 'Fallback to http as globus url is missing') file['url'] = file['url_http'] elif protocol == sdconst.TRANSFER_PROTOCOL_GRIDFTP: if 'url_gridftp' in file: file['url'] = file['url_gridftp'] elif 'url_http' in file: sdlog.debug( 'SYNPROTO-002', 'Fallback to http as gridftp url is missing (%s)' % file["title"]) file['url'] = file['url_http'] elif protocol == sdconst.TRANSFER_PROTOCOL_HTTP: if 'url_http' in file: file['url'] = file['url_http'] elif 'url_gridftp' in file: sdlog.warning('SYNPROTO-001', 'Fallback to gridftp as http url is missing') file['url'] = file['url_gridftp'] else: raise SDException("SYNPROTO-003", "Incorrect protocol (%s)" % protocol) sdtools.remove_dict_items( file, ['url_globus', 'url_gridftp', 'url_http', 'url_opendap']) return files
def remove_empty_files(path): for p in sdtools.walk_backward_without_sibling(path): for name in os.listdir(p): f = '%s/%s' % (p, name) if not ignore( f ): # this is not to remove files at top of the tree, not related with synda (e.g. every hidden file in HOME dir) if os.path.isfile(f): if not os.path.islink(f): if os.path.getsize(f) == 0: try: sdlog.info("SYNCLEAN-090", "Remove empty file (%s)" % (f, )) os.remove(f) except Exception as e: sdlog.warning( "SYNCLEAN-040", "Error occurs during file deletion (%s,%s)" % (f, str(e)))
def remove_malformed_dataset_functional_id(files): """Remove files with malformed dataset_functional_id. Note If this func fails to extract dataset version from dataset_functional_id, file is rejected. """ keep=[] reject=[] # not used for f in files: m=re.search("^(.*)\.([^.]*)$",f["dataset_functional_id"]) if m!=None: keep.append(f) else: sdlog.warning("SDPREPAR-002","Incorrect dataset_functional_id ('%s')"%(f["dataset_functional_id"],),stderr=False) reject.append(f) return keep
def check_DRS_consistency(files): def remove_version_from_path(dataset_functional_id): """ BEWARE: we expect in this func that the last field of the "dataset_functional_id" is the dataset version, no matter what the project is. """ return re.sub('\.[^.]+$','',dataset_functional_id) # remove last field (version) for f in files: if "dataset_template" in f: # For some project, template is missing. In this case, we don"t do the check. # TODO: maybe replace '.' with '/' character in code below (i.e. misleading because variables below are called path, but do not contain '/') path_from_id=remove_version_from_path(f["dataset_functional_id"]) path_from_template=f["dataset_template"]%f if path_from_id!=path_from_template: sdlog.warning("SDCHKFIL-001","inconsistency detected between metadata and search-API facet (path_from_id=%s,path_from_template=%s)"%(path_from_id,path_from_template))
def add_missing_attributes(files): # For some files, there is no 'tracking_id' attribute set. We have to set # it to NULL as this informations is needed during database insertion # (otherwise KeyError exception occurs). for f in files: if 'tracking_id' not in f: f['tracking_id']=None # For some files, there is no 'checksum' attribute set. We have to set it # to NULL as this informations is needed during database insertion # (otherwise KeyError exception occurs). for f in files: if 'checksum' not in f: sdlog.warning("SDPRFIAT-001","File have no checksum (%s)"%(f["file_functional_id"],)) f['checksum']=None f['checksum_type']=None return files
def extract_info_from_openid(openid): """Retrieve username,host,port informations from ESGF openID.""" # openid check (see #44 for more info) for openid_host in invalid_openids: if openid_host in openid: sdlog.warning("SDOPENID-210", "Invalid openid (%s)" % openid) try: xrds_buf = sdnetutils.HTTP_GET_2(openid, timeout=10, verify=False) (hostname, port) = parse_XRDS(xrds_buf) username = parse_openid(openid) return (hostname, port, username) except Exception, e: sdlog.error("SDOPENID-200", "Error occured while processing OpenID (%s)" % str(e)) raise OpenIDProcessingException( 'SDOPENID-002', 'Error occured while processing OpenID')
def run(files): for file in files: protocol=sdpostpipelineutils.get_attached_parameter(file,'protocol',sdconst.TRANSFER_PROTOCOL_HTTP) if protocol not in sdconst.TRANSFER_PROTOCOLS: raise SDException("SYNPROTO-004","Incorrect protocol (%s)"%protocol) if 'url_gridftp' in file and 'url_http' in file: if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP: file['url']=file['url_gridftp'] elif protocol==sdconst.TRANSFER_PROTOCOL_HTTP: file['url']=file['url_http'] else: raise SDException("SYNPROTO-003","Incorrect protocol (%s)"%protocol) elif 'url_gridftp' in file: # only gridftp if protocol==sdconst.TRANSFER_PROTOCOL_HTTP: sdlog.warning('SYNPROTO-001','Fallback to gridftp as http url is missing') file['url']=file['url_gridftp'] elif 'url_http' in file: # only http if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP: sdlog.debug('SYNPROTO-002','Fallback to http as gridftp url is missing (%s)'%file["title"]) file['url']=file['url_http'] else: # no url available to download the file # (should not be here as sdremoverow takes care of those cases) assert False sdtools.remove_dict_items(file,['url_gridftp', 'url_http', 'url_opendap']) return files
def check_DRS_consistency(files): def remove_version_from_path(dataset_functional_id): """ BEWARE: we expect in this func that the last field of the "dataset_functional_id" is the dataset version, no matter what the project is. """ return re.sub('\.[^.]+$', '', dataset_functional_id) # remove last field (version) for f in files: if "dataset_template" in f: # For some project, template is missing. In this case, we don"t do the check. # TODO: maybe replace '.' with '/' character in code below (i.e. misleading because variables below are called path, but do not contain '/') path_from_id = remove_version_from_path(f["dataset_functional_id"]) path_from_template = f["dataset_template"] % f if path_from_id != path_from_template: sdlog.warning( "SDCHKFIL-001", "inconsistency detected between metadata and search-API facet (path_from_id=%s,path_from_template=%s)" % (path_from_id, path_from_template))
def remove_malformed_dataset_functional_id(files): """Remove files with malformed dataset_functional_id. Note If this func fails to extract dataset version from dataset_functional_id, file is rejected. """ keep = [] reject = [] # not used for f in files: m = re.search("^(.*)\.([^.]*)$", f["dataset_functional_id"]) if m != None: keep.append(f) else: sdlog.warning("SDPREPAR-002", "Incorrect dataset_functional_id ('%s')" % (f["dataset_functional_id"], ), stderr=False) reject.append(f) return keep
def dataset_complete_event(project, model, dataset, commit=True): sdlog.log( "SYDEVENT-004", "'dataset_complete_event' triggered (%s)" % dataset.dataset_functional_id, event_triggered_log_level) # not used for now """ event=Event(name=sdconst.EVENT_DATASET_COMPLETE) event.project=project event.model=model event.dataset_pattern=dataset_pattern event.variable='' event.filename_pattern='' event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=commit) """ # <<<--- 'latest' flag management related code begin # store current 'latest' flag state old_latest = dataset.latest # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0) # tag4342342 # compute new 'latest' flag if not old_latest: # old state is not latest sddatasetflag.update_latest_flag( dataset ) # warning: this method modifies the dataset object in memory (and in database too) else: # nothing to do concerning the 'latest' flag as the current dataset is already the latest # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!) pass # store new 'latest' flag state new_latest = dataset.latest # --->>> 'latest' flag management related code end # cascade 1 (trigger dataset latest switch event) if (not old_latest) and new_latest: # latest flag has been switched from false to true dataset_latest_event(project, model, dataset.path, commit=commit) # trigger 'dataset_latest' event # cascade 2 (trigger latest dataset complete event) if dataset.latest: latest_dataset_complete_event(project, model, dataset.local_path, commit=commit) else: non_latest_dataset_complete_event(project, model, dataset.local_path, commit=commit) # cascade 3 (trigger output12 dataset complete event) if project == 'CMIP5': (ds_path_output1, ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset( path=ds_path_output1) and sddatasetdao.exists_dataset( path=ds_path_output2): d1 = sddatasetdao.get_dataset(path=ds_path_output1) d2 = sddatasetdao.get_dataset(path=ds_path_output2) if d1.status == sdconst.DATASET_STATUS_COMPLETE and d2.status == sdconst.DATASET_STATUS_COMPLETE: dataset_pattern = sdproduct.replace_output12_product_with_wildcard( dataset.local_path) dataset_complete_output12_event(project, model, dataset_pattern, commit=commit) else: # only one product exists for this dataset # not sure if this code is required. # basically, it says that if only one product is present (output1 or output2) # then the 'output12' is considered ready to be triggered # (i.e. output12 does not require output1 and output2 to be present, # it only require that if there are, they must both be complete) # dataset_pattern = sdproduct.replace_output12_product_with_wildcard( dataset.local_path) dataset_complete_output12_event(project, model, dataset_pattern, commit=commit) # cascade 4 (trigger latest output12 dataset complete event) if project == 'CMIP5': (ds_path_output1, ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset( path=ds_path_output1) and sddatasetdao.exists_dataset( path=ds_path_output2): d1 = sddatasetdao.get_dataset(path=ds_path_output1) d2 = sddatasetdao.get_dataset(path=ds_path_output2) if d1.status == sdconst.DATASET_STATUS_COMPLETE and d2.status == sdconst.DATASET_STATUS_COMPLETE: if d1.latest and d2.latest: latest_output12_dataset_complete_event(project, model, dataset_pattern, commit=commit) elif not d1.latest and not d2.latest: non_latest_dataset_complete_output12_event(project, model, dataset_pattern, commit=commit) else: sdlog.warning( "SYDEVENT-032", "Event not triggered as one product is latest while the other product is not" ) # TODO: is this the right way to handle this case ? else: # only one product exists for this dataset # not sure if this code is required. # basically, it says that if only one product is present (output1 or output2) # then the 'output12' is considered ready to be triggered # (i.e. output12 does not require output1 and output2 to be present, # it only require that if there are, they must both be complete) # if dataset.latest: latest_output12_dataset_complete_event(project, model, dataset_pattern, commit=commit) else: non_latest_dataset_complete_output12_event(project, model, dataset_pattern, commit=commit)
def dataset_complete_event(project,model,dataset,commit=True): sdlog.log("SYDEVENT-004","'dataset_complete_event' triggered (%s)"%dataset.dataset_functional_id,event_triggered_log_level) # not used for now """ event=Event(name=sdconst.EVENT_DATASET_COMPLETE) event.project=project event.model=model event.dataset_pattern=dataset_pattern event.variable='' event.filename_pattern='' event.crea_date=sdtime.now() event.priority=sdconst.DEFAULT_PRIORITY sdeventdao.add_event(event,commit=commit) """ # <<<--- 'latest' flag management related code begin # store current 'latest' flag state old_latest=dataset.latest # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0) # tag4342342 # compute new 'latest' flag if not old_latest: # old state is not latest sddatasetflag.update_latest_flag(dataset) # warning: this method modifies the dataset object in memory (and in database too) else: # nothing to do concerning the 'latest' flag as the current dataset is already the latest # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!) pass # store new 'latest' flag state new_latest=dataset.latest # --->>> 'latest' flag management related code end # cascade 1 (trigger dataset latest switch event) if (not old_latest) and new_latest: # latest flag has been switched from false to true dataset_latest_event(project,model,dataset.path,commit=commit) # trigger 'dataset_latest' event # cascade 2 (trigger latest dataset complete event) if dataset.latest: latest_dataset_complete_event(project,model,dataset.local_path,commit=commit) else: non_latest_dataset_complete_event(project,model,dataset.local_path,commit=commit) # cascade 3 (trigger output12 dataset complete event) if project=='CMIP5': (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2): d1=sddatasetdao.get_dataset(path=ds_path_output1) d2=sddatasetdao.get_dataset(path=ds_path_output2) if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE: dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path) dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) else: # only one product exists for this dataset # not sure if this code is required. # basically, it says that if only one product is present (output1 or output2) # then the 'output12' is considered ready to be triggered # (i.e. output12 does not require output1 and output2 to be present, # it only require that if there are, they must both be complete) # dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path) dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) # cascade 4 (trigger latest output12 dataset complete event) if project=='CMIP5': (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path) if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2): d1=sddatasetdao.get_dataset(path=ds_path_output1) d2=sddatasetdao.get_dataset(path=ds_path_output2) if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE: if d1.latest and d2.latest: latest_output12_dataset_complete_event(project,model,dataset_pattern,commit=commit) elif not d1.latest and not d2.latest: non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit) else: sdlog.warning("SYDEVENT-032","Event not triggered as one product is latest while the other product is not") # TODO: is this the right way to handle this case ? else: # only one product exists for this dataset # not sure if this code is required. # basically, it says that if only one product is present (output1 or output2) # then the 'output12' is considered ready to be triggered # (i.e. output12 does not require output1 and output2 to be present, # it only require that if there are, they must both be complete) # if dataset.latest: latest_output12_dataset_complete_event(project,model,dataset_pattern,commit=commit) else: non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
def _update_parameters(parameters): for pname, pvalues in parameters.iteritems(): if len(pvalues) == 0: # This case means this is a parameter without any associated value. # It is likely to be a NON-free parameter which is present in solar # parameters, but not used by any dataset (TBC). # e.g. 'realm' and 'driving_ensemble' in the query below are of that kind # https://esg-devel.nsc.liu.se/esg-search/search?limit=0&facets=*&type=Dataset&fields=*&format=application%2Fsolr%2Bxml # # When we are here, items likely come from TAG4353453453 step # # We DON'T add the parameter name as it seems not to be used # (another reason we don't store this parameter is that currently, # non-free parameter can only be added in param table if they are # associated with at least two values. If they are associated with # only one value, it has to be None, and it means it's a free parameter. # Maybe we can associate 'non-free parameter without value' with # NULL or '', but it's a hacky way to solve this issue. Maybe best # is to redesign 'param' table from scratch) pass elif len(pvalues) == 1: # This case means this is a free parameter (i.e. without predefined # value choices) e.g. 'title'. This is because a NON-free parameter # have at least two values (e.g. true or false), else it's free # parameter aka a constant... # # When we are here, items likely come from TAG543534563 step # # We add the parameter name if not exist. if not sddao.exists_parameter_name(pname): sdtools.print_stderr('Add new parameter: %s' % pname) try: sddao.add_parameter_value( pname, None) # value is always None in this case except IntegrityError: sdlog.warning( 'SDDCACHE-003', 'Value has duplicate in the db. Warn your datanode manager' ) elif len(pvalues) > 1: # This case means this is a NON-free parameter (i.e. with predefined # value choices) e.g. 'experiment'. # # When we are here, items likely come from TAG4353453453 step # # We add the parameter name if not exist. for item in pvalues: if not sddao.exists_parameter_value(pname, item.name): sdtools.print_stderr('Add new value for %s parameter: %s' % (pname, item.name)) try: sddao.add_parameter_value(pname, item.name) except IntegrityError: sdlog.warning( 'SDDCACHE-003', 'Value {} has duplicate in the db. Warn your datanode manager' .format(item.name))