示例#1
0
def add_missing_attributes(files):

    # For some files, there is no 'tracking_id' attribute set. We have to set
    # it to NULL as this informations is needed during database insertion
    # (otherwise KeyError exception occurs).
    for f in files:
        if 'tracking_id' not in f:
            f['tracking_id'] = None

    # For some files, there is no 'checksum' attribute set. We have to set it
    # to NULL as this informations is needed during database insertion
    # (otherwise KeyError exception occurs).
    for f in files:
        if 'checksum' not in f:

            if sdconfig.log_domain_inconsistency:
                sdlog.warning("SDPRFIAT-001",
                              "File have no checksum (%s)" %
                              (f["file_functional_id"], ),
                              logger_name=sdconst.LOGGER_DOMAIN)

            f['checksum'] = None
            f['checksum_type'] = None

    return files
示例#2
0
def filter(files):
    keep=[]
    reject=[]

    if len(files)>0:

        # retrieve type
        file_=files[0]      # 'type' is the same for all files
        type_=file_['type'] # 'type' itself IS scalar

        if type_=='File':

            for f in files:

                variable=f.get('variable',[])
                assert isinstance(variable,list)

                if len(variable)==1:
                    keep.append(f)
                else:
                    reject.append(f)

                    sdlog.warning("SDPOSXPC-002","WARNING: '%s' file is malformed ('variable' attribute contains too much values)."%f['id'],stderr=True)

        elif type_=='Dataset':
            # currently, there is no reject rules for Dataset type, so we keep all of them

            for f in files:
                keep.append(f)

    return (keep,reject)
示例#3
0
def run_helper(queries):
    """
    notes
      - "queries" is non-threadsafe (i.e. not a Queue), but doesn't matter as threads do not use it
    """
    total_query_to_process=len(queries)

    sdlog.debug("SDPROXMT-003","%d search-API queries to process (max_thread_per_host=%d,timeout=%d)"%(total_query_to_process,max_thread_per_host,sdconst.SEARCH_API_HTTP_TIMEOUT))

    while True:
        if sdconfig.proxymt_progress_stat:
            sdlog.info("SDPROXMT-033","threads per host: %s"%",".join(['%s=%s'%(host,len(searchAPIServices[host]['threadlist'])) for host in searchAPIServices.keys()]))

        if len(queries)>0:
            distribute_queries(queries)
        else:
            # leave the loop only if all threads completed
            if all_threads_completed():
                break

        # remove completed threads from list
        for host in searchAPIServices.keys():
            li=[]
            for t in searchAPIServices[host]['threadlist']:
                if t.is_alive():
                    li.append(t)
            searchAPIServices[host]['threadlist']=li

        # log
        total_query_already_processed = total_query_to_process - len(queries)
        if total_query_to_process > 0: # display progress only when there are a lot of queries
            if len(queries) > 0: # display progress only when still query to process
                sdlog.info("SDPROXMT-004","total_queries=%d, running_or_done_queries=%d, waiting_queries=%d"%(total_query_to_process,total_query_already_processed,len(queries)))

        # if all services are busy, we sleep to limit loop speed
        # (note that all the code around the "sleep" call is to detect system overload)
        sleep_time=10
        warning_threshold=5 # threshold not to emit warning for every small load exceedance
        befo=time.time()
        time.sleep(sleep_time)
        afte=time.time()
        diff=afte-befo
        if diff>sleep_time+warning_threshold:
            sdlog.warning("SDPROXMT-005","WARNING: system overload detected (sleep takes %d second to complete)."%diff)

    # retrieve result from output queue
    metadata=sdtypes.Metadata()
    while not __result_queue.empty():
        success=__result_queue.get(False) # retrieve result from ONE successful search-API call
        success.connect() # TAGKLK434L3K34K
        metadata.slurp(success) # warning: success is modified here

    # retrieve error from output queue and insert them into a list
    errors=[]
    while not __error_queue.empty():
        query=__error_queue.get(False)
        errors.append(query)

    return (metadata,errors)
示例#4
0
def dataset_complete_event(project,model,dataset,commit=True):
    sdlog.log("SYDEVENT-004","'dataset_complete_event' triggered (%s)"%dataset.dataset_functional_id,event_triggered_log_level)

    if project=='CMIP5':
        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE:
                dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
                dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)

                if d1.latest and d2.latest:
                    latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
                elif not d1.latest and not d2.latest:
                    non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
                else:
                    sdlog.warning("SYDEVENT-032","Event not triggered as one product is latest while the other product is not") # TODO: is this the right way to handle this case ?
        else:
            dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
            dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)

            if dataset.latest:
                latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
            else:
                non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)

    # <<<--- 'latest' flag management related code begin

    # store current 'latest' flag state
    old_latest=dataset.latest

    # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0)
    # tag4342342

    # compute new 'latest' flag
    if not old_latest:
        # old state is not latest

        sddatasetflag.update_latest_flag(dataset) # warning: this method modifies the dataset in memory (and in database too)
    else:
        # nothing to do concerning the 'latest' flag as the current dataset is already the latest
        # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
        pass

    # store new 'latest' flag state
    new_latest=dataset.latest

    # --->>> 'latest' flag management related code end


    # cascade 2
    if (not old_latest) and new_latest:
        dataset_latest_event(project,model,dataset.path,commit=commit) # trigger 'dataset_latest' event
示例#5
0
def check_coherency(facets_groups):
    for facets_group in facets_groups:
        if 'time_frequency' in facets_group:
            if 'ensemble' in facets_group:
                if "fx" in facets_group['time_frequency']:
                    if "r0i0p0" not in facets_group['ensemble']:

                        # print a warning, because 'r0i0p0' is the only choice for fx frequency
                        sdlog.warning("SYDCHECK-003","'fx' frequency is used, but ensemble do not include 'r0i0p0'")

    return facets_groups
示例#6
0
def remove_malformed_dataset_version(files):
    keep=[]
    reject=[] # not used

    for f in files:
        if sdidtest.is_version_number(f["dataset_version"]):
            keep.append(f)
        else:
            sdlog.warning("SDPREPAR-003","Incorrect dataset version ('%s')"%(f["dataset_functional_id"],),stderr=False)
            reject.append(f)

    return keep
示例#7
0
def remove_empty_files(path):
    for p in sdtools.walk_backward_without_sibling(path):
        for name in os.listdir(p):
            f = '%s/%s' % (p,name)
            if not ignore(f): # this is not to remove files at top of the tree, not related with synda (e.g. every hidden file in HOME dir)
                if os.path.isfile(f):
                    if not os.path.islink(f):
                        if os.path.getsize(f)==0:
                            try:
                                sdlog.info("SYNCLEAN-090","Remove empty file (%s)"%(f,))
                                os.remove(f)
                            except Exception as e:
                                sdlog.warning("SYNCLEAN-040","Error occurs during file deletion (%s,%s)"%(f,str(e)))
示例#8
0
def _reload_parameters(parameters):
    for pname, pvalues in parameters.iteritems():
        for i, item in enumerate(pvalues):
            try:
                if item is None:
                    sddao.add_parameter_value(pname, None, commit=False)
                else:
                    sddao.add_parameter_value(pname, item.name, commit=False)
            except IntegrityError:
                sdlog.warning(
                    'SDDCACHE-003',
                    'Value {} has duplicate in the db. Warn your datanode manager'
                    .format(item.name))
    sddb.conn.commit()
def remove_malformed_dataset_version(files):
    keep = []
    reject = []  # not used

    for f in files:
        if sdidtest.is_version_number(f["dataset_version"]):
            keep.append(f)
        else:
            sdlog.warning("SDPREPAR-003",
                          "Incorrect dataset version ('%s')" %
                          (f["dataset_functional_id"], ),
                          stderr=False)
            reject.append(f)

    return keep
示例#10
0
def run(files):
    for file in files:
        protocol = sdpostpipelineutils.get_attached_parameter(
            file, 'protocol', sdconst.TRANSFER_PROTOCOL_HTTP)

        if protocol not in sdconst.TRANSFER_PROTOCOLS:
            raise SDException("SYNPROTO-004",
                              "Incorrect protocol (%s)" % protocol)

        if protocol == sdconst.TRANSFER_PROTOCOL_GLOBUS:
            if 'url_globus' in file:
                file['url'] = file['url_globus']
            elif 'url_gridftp' in file:
                file['url'] = file['url_gridftp']
            elif 'url_http' in file:
                sdlog.warning('SYNPROTO-005',
                              'Fallback to http as globus url is missing')
                file['url'] = file['url_http']

        elif protocol == sdconst.TRANSFER_PROTOCOL_GRIDFTP:
            if 'url_gridftp' in file:
                file['url'] = file['url_gridftp']
            elif 'url_http' in file:
                sdlog.debug(
                    'SYNPROTO-002',
                    'Fallback to http as gridftp url is missing (%s)' %
                    file["title"])
                file['url'] = file['url_http']

        elif protocol == sdconst.TRANSFER_PROTOCOL_HTTP:
            if 'url_http' in file:
                file['url'] = file['url_http']
            elif 'url_gridftp' in file:
                sdlog.warning('SYNPROTO-001',
                              'Fallback to gridftp as http url is missing')
                file['url'] = file['url_gridftp']

        else:
            raise SDException("SYNPROTO-003",
                              "Incorrect protocol (%s)" % protocol)

        sdtools.remove_dict_items(
            file, ['url_globus', 'url_gridftp', 'url_http', 'url_opendap'])

    return files
示例#11
0
def remove_empty_files(path):
    for p in sdtools.walk_backward_without_sibling(path):
        for name in os.listdir(p):
            f = '%s/%s' % (p, name)
            if not ignore(
                    f
            ):  # this is not to remove files at top of the tree, not related with synda (e.g. every hidden file in HOME dir)
                if os.path.isfile(f):
                    if not os.path.islink(f):
                        if os.path.getsize(f) == 0:
                            try:
                                sdlog.info("SYNCLEAN-090",
                                           "Remove empty file (%s)" % (f, ))
                                os.remove(f)
                            except Exception as e:
                                sdlog.warning(
                                    "SYNCLEAN-040",
                                    "Error occurs during file deletion (%s,%s)"
                                    % (f, str(e)))
示例#12
0
def remove_malformed_dataset_functional_id(files):
    """Remove files with malformed dataset_functional_id.

    Note
        If this func fails to extract dataset version from dataset_functional_id, file
        is rejected.
    """
    keep=[]
    reject=[] # not used

    for f in files:
        m=re.search("^(.*)\.([^.]*)$",f["dataset_functional_id"])
        if m!=None:
            keep.append(f)
        else:
            sdlog.warning("SDPREPAR-002","Incorrect dataset_functional_id ('%s')"%(f["dataset_functional_id"],),stderr=False)
            reject.append(f)

    return keep
示例#13
0
def check_DRS_consistency(files):

    def remove_version_from_path(dataset_functional_id):
        """
        BEWARE: we expect in this func that the last field of the "dataset_functional_id" is
                the dataset version, no matter what the project is.
        """
        return re.sub('\.[^.]+$','',dataset_functional_id) # remove last field (version)

    for f in files:
        if "dataset_template" in f: # For some project, template is missing. In this case, we don"t do the check.

            # TODO: maybe replace '.' with '/' character in code below (i.e. misleading because variables below are called path, but do not contain '/')

            path_from_id=remove_version_from_path(f["dataset_functional_id"])
            path_from_template=f["dataset_template"]%f

            if path_from_id!=path_from_template:
                sdlog.warning("SDCHKFIL-001","inconsistency detected between metadata and search-API facet (path_from_id=%s,path_from_template=%s)"%(path_from_id,path_from_template))
示例#14
0
def add_missing_attributes(files):

    # For some files, there is no 'tracking_id' attribute set. We have to set
    # it to NULL as this informations is needed during database insertion
    # (otherwise KeyError exception occurs).
    for f in files:
        if 'tracking_id' not in f:
            f['tracking_id']=None

    # For some files, there is no 'checksum' attribute set. We have to set it
    # to NULL as this informations is needed during database insertion
    # (otherwise KeyError exception occurs).
    for f in files:
        if 'checksum' not in f:
            sdlog.warning("SDPRFIAT-001","File have no checksum (%s)"%(f["file_functional_id"],))
            f['checksum']=None
            f['checksum_type']=None

    return files
示例#15
0
def extract_info_from_openid(openid):
    """Retrieve username,host,port informations from ESGF openID."""

    # openid check (see #44 for more info)
    for openid_host in invalid_openids:
        if openid_host in openid:
            sdlog.warning("SDOPENID-210", "Invalid openid (%s)" % openid)

    try:
        xrds_buf = sdnetutils.HTTP_GET_2(openid, timeout=10, verify=False)
        (hostname, port) = parse_XRDS(xrds_buf)
        username = parse_openid(openid)
        return (hostname, port, username)
    except Exception, e:
        sdlog.error("SDOPENID-200",
                    "Error occured while processing OpenID (%s)" % str(e))

        raise OpenIDProcessingException(
            'SDOPENID-002', 'Error occured while processing OpenID')
示例#16
0
def run(files):
    for file in files:
        protocol=sdpostpipelineutils.get_attached_parameter(file,'protocol',sdconst.TRANSFER_PROTOCOL_HTTP)

        if protocol not in sdconst.TRANSFER_PROTOCOLS:
            raise SDException("SYNPROTO-004","Incorrect protocol (%s)"%protocol)

        if 'url_gridftp' in file and 'url_http' in file:

            if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP:
                file['url']=file['url_gridftp']
            elif protocol==sdconst.TRANSFER_PROTOCOL_HTTP:
                file['url']=file['url_http']
            else:
                raise SDException("SYNPROTO-003","Incorrect protocol (%s)"%protocol)

        elif 'url_gridftp' in file:
            # only gridftp

            if protocol==sdconst.TRANSFER_PROTOCOL_HTTP:
                sdlog.warning('SYNPROTO-001','Fallback to gridftp as http url is missing')

            file['url']=file['url_gridftp']

        elif 'url_http' in file:
            # only http
    
            if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP:
                sdlog.debug('SYNPROTO-002','Fallback to http as gridftp url is missing (%s)'%file["title"])

            file['url']=file['url_http']

        else:
            # no url available to download the file
            # (should not be here as sdremoverow takes care of those cases)

            assert False


        sdtools.remove_dict_items(file,['url_gridftp', 'url_http', 'url_opendap'])

    return files
示例#17
0
def check_DRS_consistency(files):
    def remove_version_from_path(dataset_functional_id):
        """
        BEWARE: we expect in this func that the last field of the "dataset_functional_id" is
                the dataset version, no matter what the project is.
        """
        return re.sub('\.[^.]+$', '',
                      dataset_functional_id)  # remove last field (version)

    for f in files:
        if "dataset_template" in f:  # For some project, template is missing. In this case, we don"t do the check.

            # TODO: maybe replace '.' with '/' character in code below (i.e. misleading because variables below are called path, but do not contain '/')

            path_from_id = remove_version_from_path(f["dataset_functional_id"])
            path_from_template = f["dataset_template"] % f

            if path_from_id != path_from_template:
                sdlog.warning(
                    "SDCHKFIL-001",
                    "inconsistency detected between metadata and search-API facet (path_from_id=%s,path_from_template=%s)"
                    % (path_from_id, path_from_template))
def remove_malformed_dataset_functional_id(files):
    """Remove files with malformed dataset_functional_id.

    Note
        If this func fails to extract dataset version from dataset_functional_id, file
        is rejected.
    """
    keep = []
    reject = []  # not used

    for f in files:
        m = re.search("^(.*)\.([^.]*)$", f["dataset_functional_id"])
        if m != None:
            keep.append(f)
        else:
            sdlog.warning("SDPREPAR-002",
                          "Incorrect dataset_functional_id ('%s')" %
                          (f["dataset_functional_id"], ),
                          stderr=False)
            reject.append(f)

    return keep
示例#19
0
def dataset_complete_event(project, model, dataset, commit=True):
    sdlog.log(
        "SYDEVENT-004", "'dataset_complete_event' triggered (%s)" %
        dataset.dataset_functional_id, event_triggered_log_level)

    # not used for now
    """
    event=Event(name=sdconst.EVENT_DATASET_COMPLETE)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
    """

    # <<<--- 'latest' flag management related code begin

    # store current 'latest' flag state
    old_latest = dataset.latest

    # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0)
    # tag4342342

    # compute new 'latest' flag
    if not old_latest:
        # old state is not latest

        sddatasetflag.update_latest_flag(
            dataset
        )  # warning: this method modifies the dataset object in memory (and in database too)
    else:
        # nothing to do concerning the 'latest' flag as the current dataset is already the latest
        # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
        pass

    # store new 'latest' flag state
    new_latest = dataset.latest

    # --->>> 'latest' flag management related code end

    # cascade 1 (trigger dataset latest switch event)
    if (not old_latest) and new_latest:
        # latest flag has been switched from false to true

        dataset_latest_event(project, model, dataset.path,
                             commit=commit)  # trigger 'dataset_latest' event

    # cascade 2 (trigger latest dataset complete event)
    if dataset.latest:
        latest_dataset_complete_event(project,
                                      model,
                                      dataset.local_path,
                                      commit=commit)
    else:
        non_latest_dataset_complete_event(project,
                                          model,
                                          dataset.local_path,
                                          commit=commit)

    # cascade 3 (trigger output12 dataset complete event)
    if project == 'CMIP5':
        (ds_path_output1,
         ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(
                path=ds_path_output1) and sddatasetdao.exists_dataset(
                    path=ds_path_output2):

            d1 = sddatasetdao.get_dataset(path=ds_path_output1)
            d2 = sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status == sdconst.DATASET_STATUS_COMPLETE and d2.status == sdconst.DATASET_STATUS_COMPLETE:
                dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                    dataset.local_path)
                dataset_complete_output12_event(project,
                                                model,
                                                dataset_pattern,
                                                commit=commit)
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                dataset.local_path)
            dataset_complete_output12_event(project,
                                            model,
                                            dataset_pattern,
                                            commit=commit)

    # cascade 4 (trigger latest output12 dataset complete event)
    if project == 'CMIP5':
        (ds_path_output1,
         ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(
                path=ds_path_output1) and sddatasetdao.exists_dataset(
                    path=ds_path_output2):

            d1 = sddatasetdao.get_dataset(path=ds_path_output1)
            d2 = sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status == sdconst.DATASET_STATUS_COMPLETE and d2.status == sdconst.DATASET_STATUS_COMPLETE:
                if d1.latest and d2.latest:
                    latest_output12_dataset_complete_event(project,
                                                           model,
                                                           dataset_pattern,
                                                           commit=commit)
                elif not d1.latest and not d2.latest:
                    non_latest_dataset_complete_output12_event(project,
                                                               model,
                                                               dataset_pattern,
                                                               commit=commit)
                else:
                    sdlog.warning(
                        "SYDEVENT-032",
                        "Event not triggered as one product is latest while the other product is not"
                    )  # TODO: is this the right way to handle this case ?
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            if dataset.latest:
                latest_output12_dataset_complete_event(project,
                                                       model,
                                                       dataset_pattern,
                                                       commit=commit)
            else:
                non_latest_dataset_complete_output12_event(project,
                                                           model,
                                                           dataset_pattern,
                                                           commit=commit)
示例#20
0
def dataset_complete_event(project,model,dataset,commit=True):
    sdlog.log("SYDEVENT-004","'dataset_complete_event' triggered (%s)"%dataset.dataset_functional_id,event_triggered_log_level)

    # not used for now
    """
    event=Event(name=sdconst.EVENT_DATASET_COMPLETE)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
    """

    # <<<--- 'latest' flag management related code begin

    # store current 'latest' flag state
    old_latest=dataset.latest

    # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0)
    # tag4342342

    # compute new 'latest' flag
    if not old_latest:
        # old state is not latest

        sddatasetflag.update_latest_flag(dataset) # warning: this method modifies the dataset object in memory (and in database too)
    else:
        # nothing to do concerning the 'latest' flag as the current dataset is already the latest
        # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
        pass

    # store new 'latest' flag state
    new_latest=dataset.latest

    # --->>> 'latest' flag management related code end


    # cascade 1 (trigger dataset latest switch event)
    if (not old_latest) and new_latest:
        # latest flag has been switched from false to true

        dataset_latest_event(project,model,dataset.path,commit=commit) # trigger 'dataset_latest' event


    # cascade 2 (trigger latest dataset complete event)
    if dataset.latest:
        latest_dataset_complete_event(project,model,dataset.local_path,commit=commit)
    else:
        non_latest_dataset_complete_event(project,model,dataset.local_path,commit=commit)


    # cascade 3 (trigger output12 dataset complete event)
    if project=='CMIP5':
        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE:
                dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
                dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
            dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)


    # cascade 4 (trigger latest output12 dataset complete event)
    if project=='CMIP5':
        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE:
                if d1.latest and d2.latest:
                    latest_output12_dataset_complete_event(project,model,dataset_pattern,commit=commit)
                elif not d1.latest and not d2.latest:
                    non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
                else:
                    sdlog.warning("SYDEVENT-032","Event not triggered as one product is latest while the other product is not") # TODO: is this the right way to handle this case ?
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            if dataset.latest:
                latest_output12_dataset_complete_event(project,model,dataset_pattern,commit=commit)
            else:
                non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
示例#21
0
def _update_parameters(parameters):
    for pname, pvalues in parameters.iteritems():
        if len(pvalues) == 0:
            # This case means this is a parameter without any associated value.

            # It is likely to be a NON-free parameter which is present in solar
            # parameters, but not used by any dataset (TBC).
            # e.g. 'realm' and 'driving_ensemble' in the query below are of that kind
            # https://esg-devel.nsc.liu.se/esg-search/search?limit=0&facets=*&type=Dataset&fields=*&format=application%2Fsolr%2Bxml
            #
            # When we are here, items likely come from TAG4353453453 step
            #
            # We DON'T add the parameter name as it seems not to be used
            # (another reason we don't store this parameter is that currently,
            # non-free parameter can only be added in param table if they are
            # associated with at least two values. If they are associated with
            # only one value, it has to be None, and it means it's a free parameter.
            # Maybe we can associate 'non-free parameter without value' with
            # NULL or '', but it's a hacky way to solve this issue. Maybe best
            # is to redesign 'param' table from scratch)

            pass
        elif len(pvalues) == 1:
            # This case means this is a free parameter (i.e. without predefined
            # value choices) e.g. 'title'. This is because a NON-free parameter
            # have at least two values (e.g. true or false), else it's free
            # parameter aka a constant...
            #
            # When we are here, items likely come from TAG543534563 step
            #
            # We add the parameter name if not exist.

            if not sddao.exists_parameter_name(pname):
                sdtools.print_stderr('Add new parameter: %s' % pname)
                try:
                    sddao.add_parameter_value(
                        pname, None)  # value is always None in this case
                except IntegrityError:
                    sdlog.warning(
                        'SDDCACHE-003',
                        'Value has duplicate in the db. Warn your datanode manager'
                    )

        elif len(pvalues) > 1:
            # This case means this is a NON-free parameter (i.e. with predefined
            # value choices) e.g. 'experiment'.
            #
            # When we are here, items likely come from TAG4353453453 step
            #
            # We add the parameter name if not exist.

            for item in pvalues:
                if not sddao.exists_parameter_value(pname, item.name):
                    sdtools.print_stderr('Add new value for %s parameter: %s' %
                                         (pname, item.name))
                    try:
                        sddao.add_parameter_value(pname, item.name)
                    except IntegrityError:
                        sdlog.warning(
                            'SDDCACHE-003',
                            'Value {} has duplicate in the db. Warn your datanode manager'
                            .format(item.name))