Пример #1
0
def variable_complete_event(project,model,dataset,variable):
    sdlog.info("SYDEVENT-002","'variable_complete_event' triggered (%s,%s)"%(dataset.dataset_functional_id,variable))

    # cascade 1
    if dataset.status==sdconst.DATASET_STATUS_COMPLETE:
        dataset_complete_event(project,model,dataset) # trigger 'dataset complete' event

    # cascade 2
    if project=='CMIP5':

        assert '/output/' not in dataset.path

        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if sdvariable.is_variable_complete(d1.dataset_id,variable) and sdvariable.is_variable_complete(d2.dataset_id,variable):
                dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset.local_path)
                variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
        else:
            # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true)

            dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset.local_path)
            variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
Пример #2
0
def dataset_latest_event(project,model,dataset_path,commit=True):
    # this event means one dataset has been granted latest (i.e. was not latest before and now is)

    sdlog.log("SYDEVENT-008","'dataset_latest_event' triggered (%s)"%dataset_path,event_triggered_log_level)

    # not used for now
    """
    event=Event(name=sdconst.EVENT_DATASET_LATEST)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
    """

    # cascade
    if project=='CMIP5':
        assert '/output/' not in dataset_path

        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset_path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.latest and d2.latest:
                dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset_path)
                output12_dataset_latest_event(project,model,dataset_pattern,commit=commit) # trigger event
        else:
            dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset_path)
            output12_dataset_latest_event(project,model,dataset_pattern,commit=commit) # trigger event
Пример #3
0
def dataset_complete_event(project,model,dataset,commit=True):
    sdlog.log("SYDEVENT-004","'dataset_complete_event' triggered (%s)"%dataset.dataset_functional_id,event_triggered_log_level)

    if project=='CMIP5':
        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE:
                dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
                dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)

                if d1.latest and d2.latest:
                    latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
                elif not d1.latest and not d2.latest:
                    non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
                else:
                    sdlog.warning("SYDEVENT-032","Event not triggered as one product is latest while the other product is not") # TODO: is this the right way to handle this case ?
        else:
            dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
            dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)

            if dataset.latest:
                latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
            else:
                non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)

    # <<<--- 'latest' flag management related code begin

    # store current 'latest' flag state
    old_latest=dataset.latest

    # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0)
    # tag4342342

    # compute new 'latest' flag
    if not old_latest:
        # old state is not latest

        sddatasetflag.update_latest_flag(dataset) # warning: this method modifies the dataset in memory (and in database too)
    else:
        # nothing to do concerning the 'latest' flag as the current dataset is already the latest
        # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
        pass

    # store new 'latest' flag state
    new_latest=dataset.latest

    # --->>> 'latest' flag management related code end


    # cascade 2
    if (not old_latest) and new_latest:
        dataset_latest_event(project,model,dataset.path,commit=commit) # trigger 'dataset_latest' event
Пример #4
0
def variable_complete_event(project, model, dataset, variable, commit=True):
    sdlog.log(
        "SYDEVENT-002", "'variable_complete_event' triggered (%s,%s)" %
        (dataset.dataset_functional_id, variable), event_triggered_log_level)

    if sdconfig.is_event_enabled(sdconst.EVENT_VARIABLE_COMPLETE, project):
        event = Event(name=sdconst.EVENT_VARIABLE_COMPLETE)
        event.project = project
        event.model = model
        event.dataset_pattern = dataset.local_path
        event.variable = variable
        event.filename_pattern = ''
        event.crea_date = sdtime.now()
        event.priority = sdconst.DEFAULT_PRIORITY
        sdeventdao.add_event(event, commit=commit)

    # cascade 1 (trigger dataset event)
    if dataset.status == sdconst.DATASET_STATUS_COMPLETE:
        dataset_complete_event(project, model,
                               dataset)  # trigger 'dataset complete' event

    # cascade 2 (trigger variable output12 event)
    if project == 'CMIP5':

        if '/output/' in dataset.path:
            return

        (ds_path_output1,
         ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(
                path=ds_path_output1) and sddatasetdao.exists_dataset(
                    path=ds_path_output2):

            d1 = sddatasetdao.get_dataset(path=ds_path_output1)
            d2 = sddatasetdao.get_dataset(path=ds_path_output2)

            if sdvariable.is_variable_complete(
                    d1.dataset_id,
                    variable) and sdvariable.is_variable_complete(
                        d2.dataset_id, variable):
                dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                    dataset.local_path)
                variable_complete_output12_event(
                    project, model, dataset_pattern,
                    variable)  # trigger event (cross dataset event)
        else:
            # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true)

            dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                dataset.local_path)
            variable_complete_output12_event(
                project, model, dataset_pattern,
                variable)  # trigger event (cross dataset event)
Пример #5
0
def dataset_latest_event(project, model, dataset_path, commit=True):
    # this event means one dataset has been granted latest (i.e. was not latest before and now is)

    sdlog.log("SYDEVENT-008",
              "'dataset_latest_event' triggered (%s)" % dataset_path,
              event_triggered_log_level)

    # not used for now
    """
    event=Event(name=sdconst.EVENT_DATASET_LATEST)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
    """

    # cascade
    if project == 'CMIP5':
        if '/output/' in dataset_path:
            return

        (ds_path_output1,
         ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset_path)
        if sddatasetdao.exists_dataset(
                path=ds_path_output1) and sddatasetdao.exists_dataset(
                    path=ds_path_output2):

            d1 = sddatasetdao.get_dataset(path=ds_path_output1)
            d2 = sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.latest and d2.latest:
                dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                    dataset_path)
                output12_dataset_latest_event(project,
                                              model,
                                              dataset_pattern,
                                              commit=commit)  # trigger event
        else:
            dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                dataset_path)
            output12_dataset_latest_event(project,
                                          model,
                                          dataset_pattern,
                                          commit=commit)  # trigger event
Пример #6
0
def set_timestamp_when_empty__BATCH_MODE_1():
    """
    Retrieve *all* datasets from ESGF, then update local timestamp.

    Not used.
    """
    datasets=sddump.dump_ESGF(['type=Dataset','searchapi_host=esgf-data.dkrz.de'],'timestamp')

    sdlog.info("SDREBUIL-008","%i dataset(s) retrieved from ESGF."%len(datasets))
    sdlog.info("SDREBUIL-012","Start updating timestamp in local database.")

    for i,d in enumerate(datasets):

        if 'instance_id' in d: # this is because some dataset have no instance_id in ESGF !
            dataset=sddatasetdao.get_dataset(dataset_functional_id=d['instance_id'])
            if dataset is not None:
                if 'timestamp' in d: # this is because some dataset have no timestamp in ESGF !
                    dataset.timestamp=d['timestamp']
                    sddatasetdao.update_dataset(dataset,commit=False,keys=['timestamp'])

        SDProgressBar.print_progress_bar(len(datasets),i,title="Updating dataset's timestamp.. ")

    SDProgressBar.progress_complete()

    sddb.conn.commit()
Пример #7
0
def complete(files):
    for f in files:

        # the if/else block below is because this module can be used to process different metadata type (File and Dataset).
        if f["type"]==sdconst.SA_TYPE_FILE:
            transfer=sdfiledao.get_file(f['file_functional_id'])

            if transfer<>None:
                f['status']=transfer.status

                if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). It is usefull here for example when user wants to change priority (YES, a search-API request is needed in this case!).
                    f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority')
                else:
                    f['priority']=transfer.priority
            else:
                f['status']=sdconst.TRANSFER_STATUS_NEW

                if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). This is usefull here to set special priority for new files.
                    f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority')
                else:
                    f['priority']=sdconst.DEFAULT_PRIORITY

        elif f["type"]==sdconst.SA_TYPE_DATASET:
            dataset=sddatasetdao.get_dataset(dataset_functional_id=f['dataset_functional_id'])

            if dataset<>None:
                f['status']=dataset.status
            else:
                f['status']=sdconst.DATASET_STATUS_NEW
        else:
            raise SDException('SDCOMPLE-001','Incorrect type (%s)'%f["type"])

    return files
Пример #8
0
def set_timestamp_when_empty__BATCH_MODE_1():
    """
    Retrieve *all* datasets from ESGF, then update local timestamp.

    Not used.
    """
    datasets = sddump.dump_ESGF(parameter=['searchapi_host=esgf-data.dkrz.de'],
                                fields=sdfields.get_timestamp_fields())

    sdlog.info("SDREBUIL-008",
               "%i dataset(s) retrieved from ESGF." % len(datasets))
    sdlog.info("SDREBUIL-012", "Start updating timestamp in local database.")

    for i, d in enumerate(datasets):

        if 'instance_id' in d:  # this is because some dataset have no instance_id in ESGF !
            dataset = sddatasetdao.get_dataset(
                dataset_functional_id=d['instance_id'])
            if dataset is not None:
                if 'timestamp' in d:  # this is because some dataset have no timestamp in ESGF !
                    dataset.timestamp = d['timestamp']
                    sddatasetdao.update_dataset(dataset,
                                                commit=False,
                                                keys=['timestamp'])

        SDProgressBar.print_progress_bar(
            len(datasets), i, title="Updating dataset's timestamp.. ")

    SDProgressBar.progress_complete()

    sddb.conn.commit()
Пример #9
0
def populate_selection_transfer_junction():
    """
    populate "selection__transfer" association table

    WARNING: this method is only CMIP5 DRS compatible
    """
    sdlargequery.get_files_pagination__reset()

    transfer_without_selection=0
    transfer_without_dataset=0
    i=0
    transfers=sdlargequery.get_files_pagination() # loop over block (trick not to load 300000 CTransfer objects in memory..). Size is given by pagination_block_size
    while len(transfers)>0:
        for t in transfers:
            d=sddatasetdao.get_dataset(dataset_id=t.dataset_id)
            if d is not None:
                t.setDataset(d)
            else:
                insert_transfer_without_dataset(t)
                transfer_without_dataset+=1

                # we can't go on without dataset (contains() method needs it)
                continue

            # selection<=>transfer mapping and insertion in assoc table
            orphan=1 # this is to detect orphan transfer (i.e. don't belong to any selection)
            for us in get_Selections():

                # debug
                #print "%s<=>%s"%(t.getTransferID(),us.getSelectionID())

                if us.contains(t):

                    sddao.insert_selection_transfer_junction(t,us,_conn) # no commit inside
                    orphan=0

            if orphan==1:
                inserttransferwithoutselection(t)
                transfer_without_selection+=1


        _conn.commit() # commit block of insertSelectionTransferJunction

        # display progress
        #if i%100==0:
        SDProgressDot.print_char(".")

        i+=1



        transfers=sdlargequery.get_files_pagination()


    if transfer_without_selection>0:
        sdlog.warning("SDOPERAQ-032","%d transfer(s) not matching any selection found"%transfer_without_selection)

    if transfer_without_dataset>0:
        sdlog.warning("SDOPERAQ-033","%d missing dataset found (file exists but corresponding dataset is missing)"%transfer_without_dataset)
Пример #10
0
def _get_dataset_details(dataset_functional_id):
    """Helper func."""
    d=sddatasetdao.get_dataset(dataset_functional_id=dataset_functional_id)

    d.dataset_versions=sdstatquery.get_dataset_versions(d,True) # retrieves all the versions of the dataset
    d.stats=sdstatquery.get_dataset_stats(d) 
    d.variables=sdvariable.get_variables_progress(d)
    d.files=sdfiledao.get_dataset_files(d)

    return d
Пример #11
0
def _get_dataset_details(dataset_functional_id):
    """Helper func."""
    d=sddatasetdao.get_dataset(dataset_functional_id=dataset_functional_id)

    d.dataset_versions=sddatasetquery.get_dataset_versions(d,True) # retrieves all the versions of the dataset
    d.stats=sddatasetquery.get_dataset_stats(d) 
    d.variables=sdvariable.get_variables_progress(d)
    d.files=sdfiledao.get_dataset_files(d)

    return d
Пример #12
0
def set_latest_flag(path):
    """This method is used to manually set the 'latest' flag."""

    d=sddatasetdao.get_dataset(path=path,raise_exception_if_not_found=False) # retrieve dataset from database
    if d is not None:
        if d.latest==True:
            print "'latest' flag is already set for this dataset"
        else:
            sddatasetflag.update_latest_flag(d,force_latest=True) # warning: this method modifies the dataset in memory (and in database too)
    else:
        print "Dataset not found"
Пример #13
0
def dataset_latest_event(project,model,dataset_path,commit=True):
    # this event means one dataset has been granted latest (i.e. was not latest before and now is)

    sdlog.info("SYDEVENT-008","'dataset_latest_event' triggered (%s)"%dataset_path)

    # cascade
    if project=='CMIP5':
        assert '/output/' not in dataset_path

        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset_path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.latest and d2.latest:
                dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset_path)
                dataset_latest_output12_event(project,model,dataset_pattern,commit=commit) # trigger event
        else:
            dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset_path)
            dataset_latest_output12_event(project,model,dataset_pattern,commit=commit) # trigger event
Пример #14
0
def get_file(file_functional_id=None):
    li = sdfiledao.get_files(file_functional_id=file_functional_id)

    if len(li) == 0:
        raise FileNotFoundException()
    else:
        f = li[0]

    # retrieve the dataset
    d = sddatasetdao.get_dataset(dataset_id=f.dataset_id)
    f.dataset = d

    return f
Пример #15
0
def get_file(file_functional_id=None):
    li=sdfiledao.get_files(file_functional_id=file_functional_id)

    if len(li)==0:
        raise FileNotFoundException()
    else:
        f=li[0]

    # retrieve the dataset
    d=sddatasetdao.get_dataset(dataset_id=f.dataset_id)
    f.dataset=d

    return f
Пример #16
0
def get_one_waiting_transfer():
    li=get_files(limit=1,status=sdconst.TRANSFER_STATUS_WAITING)

    if len(li)==0:
        raise NoTransferWaitingException()
    else:
        t=li[0]

    # retrieve the dataset
    d=sddatasetdao.get_dataset(dataset_id=t.dataset_id)
    t.dataset=d

    return t
Пример #17
0
def get_one_waiting_transfer():
    li=sdfiledao.get_files(limit=1,status=sdconst.TRANSFER_STATUS_WAITING)

    if len(li)==0:
        raise NoTransferWaitingException()
    else:
        t=li[0]

    # retrieve the dataset
    d=sddatasetdao.get_dataset(dataset_id=t.dataset_id)
    t.dataset=d

    return t
Пример #18
0
def variable_complete_event(project,model,dataset,variable,commit=True):
    sdlog.log("SYDEVENT-002","'variable_complete_event' triggered (%s,%s)"%(dataset.dataset_functional_id,variable),event_triggered_log_level)

    if sdconfig.is_event_enabled(sdconst.EVENT_VARIABLE_COMPLETE,project):
        event=Event(name=sdconst.EVENT_VARIABLE_COMPLETE)
        event.project=project
        event.model=model
        event.dataset_pattern=dataset.local_path
        event.variable=variable
        event.filename_pattern=''
        event.crea_date=sdtime.now()
        event.priority=sdconst.DEFAULT_PRIORITY
        sdeventdao.add_event(event,commit=commit)

    # cascade 1 (trigger dataset event)
    if dataset.status==sdconst.DATASET_STATUS_COMPLETE:
        dataset_complete_event(project,model,dataset) # trigger 'dataset complete' event

    # cascade 2 (trigger variable output12 event)
    if project=='CMIP5':

        assert '/output/' not in dataset.path

        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if sdvariable.is_variable_complete(d1.dataset_id,variable) and sdvariable.is_variable_complete(d2.dataset_id,variable):
                dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
                variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
        else:
            # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true)

            dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
            variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
Пример #19
0
def fill_missing_dataset_timestamp(dataset_without_timestamp):
    """This funcs set the dataset timestamp.

    Notes
        - This func DO NOT commit.
        - In ESFG, timestamp differs from replica to replica, and so, as there
          is no dataset replica concept in 'sdt', it's really a hack, because
          we set the timestamp randomly (i.e. dataset's timestamp in
          Synda installation at user A may differ to dataset's timestamp
          in Synda installation at user B (because the timestamp for the
          dataset may have been retrieved from replica X in the case of user A
          and from replica Y in the case of user B (and X replica's timestamp
          may differ from Y replica's timestamp))). Anyway, in the end, we
          hope that the timestamp random is on a much smaller scale than the
          version-to-version time interval scale, so to be able to detect which
          version is the latest ! And yes: all this mess is because version exists
          in different formats ('v1', 'v20140318'..).
    """

    # Retrieve timestamps from ESGF
    # Note
    #     We do not filter replica in the query below in case the master host is not up
    result = sdquicksearch.run(parameter=[
        'limit=1',
        'fields=%s' % timestamp_fields, 'type=Dataset',
        'instance_id=%s' % dataset_without_timestamp.dataset_functional_id
    ],
                               post_pipeline_mode=None)
    li = result.get_files()

    # check if dataset has been found in ESGF
    if len(li) > 0:
        d = li[0]
    else:
        raise SDException(
            "SDTIMEST-800",
            "%s dataset does not exist in ESGF (or the index used does not list it)"
            % dataset_without_timestamp.dataset_functional_id)

    # use file's timestamp if dataset's timestamp is not set in ESGF
    # (this is needed, because some dataset in ESGF have NO timestamp...)
    use_file_timestamp_if_dataset_timestamp_is_missing(d)

    # update timestamp in DB
    dataset = sddatasetdao.get_dataset(dataset_functional_id=d['instance_id'])
    dataset.timestamp = d['timestamp']
    sddatasetdao.update_dataset(dataset, commit=False, keys=['timestamp'])
Пример #20
0
def file_():
    """This func perform a fake 'end of transfer' event."""

    sdlog.info("SDEVENTB-002", "Reset 'end of transfer' events")

    # check that only files with 'done' status exist
    li = sdfilequery.get_download_status()
    if len(li) > 1:
        raise SDException(
            'SDEVENTB-001',
            "Incorrect files status (status must be 'done' for all files before running this func)"
        )

    # reset files status from done to waiting
    sdmodifyquery.change_status(sdconst.TRANSFER_STATUS_DONE,
                                sdconst.TRANSFER_STATUS_WAITING)

    # reset dataset status to empty, and dataset 'latest' flag to false
    sdmodifyquery.wipeout_datasets_flags(status=sdconst.DATASET_STATUS_EMPTY)

    # mimic end of transfer
    dbpagination = sddbpagination.DBPagination()
    files = dbpagination.get_files()
    while len(files) > 0:
        for f in files:

            sdlog.info("SDEVENTB-003",
                       "trigger eot event on %s" % f.file_functional_id)

            # PAYLOAD

            # set status to done
            f.status = sdconst.TRANSFER_STATUS_DONE
            sdfiledao.update_file(f)

            # retrieve the dataset
            d = sddatasetdao.get_dataset(dataset_id=f.dataset_id)
            f.dataset = d

            # trigger end of transfer file event for all files
            sdevent.file_complete_event(f)

        sddb.conn.commit()  # commit block
        files = dbpagination.get_files()  # next block

        sdprogress.SDProgressDot.print_char(".")
Пример #21
0
def set_latest_flag(path):
    """This method is used to manually set the 'latest' flag.

    Note
        Not used.
    """

    d = sddatasetdao.get_dataset(
        path=path,
        raise_exception_if_not_found=False)  # retrieve dataset from database
    if d is not None:
        if d.latest == True:
            print "'latest' flag is already set for this dataset"
        else:
            sddatasetflag.update_latest_flag(
                d, force_latest=True
            )  # warning: this method modifies the dataset in memory (and in database too)
    else:
        sdtools.print_stderr('Dataset not found')
Пример #22
0
def file_():
    """This func perform a fake 'end of transfer' event."""

    sdlog.info("SDEVENTB-002","Reset 'end of transfer' events")

    # check that only files with 'done' status exist
    li=sdfilequery.get_download_status()
    if len(li)>1:
        raise SDException('SDEVENTB-001',"Incorrect files status (status must be 'done' for all files before running this func)")

    # reset files status from done to waiting
    sdmodifyquery.change_status(sdconst.TRANSFER_STATUS_DONE,sdconst.TRANSFER_STATUS_WAITING)

    # reset dataset status to empty, and dataset 'latest' flag to false
    sdmodifyquery.wipeout_datasets_flags(status=sdconst.DATASET_STATUS_EMPTY)

    # mimic end of transfer
    dbpagination=sddbpagination.DBPagination()
    files=dbpagination.get_files()
    while len(files)>0:
        for f in files:

            sdlog.info("SDEVENTB-003","trigger eot event on %s"%f.file_functional_id)

            # PAYLOAD

            # set status to done
            f.status=sdconst.TRANSFER_STATUS_DONE
            sdfiledao.update_file(f)

            # retrieve the dataset
            d=sddatasetdao.get_dataset(dataset_id=f.dataset_id)
            f.dataset=d

            # trigger end of transfer file event for all files
            sdevent.file_complete_event(f)


        sddb.conn.commit()             # commit block
        files=dbpagination.get_files() # next block

        sdprogress.SDProgressDot.print_char(".")
Пример #23
0
def fill_missing_dataset_timestamp(dataset_without_timestamp):
    """This funcs set the dataset timestamp.

    Notes
        - This func DO NOT commit.
        - In ESFG, timestamp differs from replica to replica, and so, as there
          is no dataset replica concept in 'sdt', it's really a hack, because
          we set the timestamp randomly (i.e. dataset's timestamp in
          Synda installation at user A may differ to dataset's timestamp
          in Synda installation at user B (because the timestamp for the
          dataset may have been retrieved from replica X in the case of user A
          and from replica Y in the case of user B (and X replica's timestamp
          may differ from Y replica's timestamp))). Anyway, in the end, we
          hope that the timestamp random is on a much smaller scale than the
          version-to-version time interval scale, so to be able to detect which
          version is the latest ! And yes: all this mess is because version exists
          in different formats ('v1', 'v20140318'..).
    """

    # Retrieve timestamps from ESGF
    # Note
    #     We do not filter replica in the query below in case the master host is not up
    result=sdquicksearch.run(parameter=['limit=1','fields=%s'%timestamp_fields,'type=Dataset','instance_id=%s'%dataset_without_timestamp.dataset_functional_id],post_pipeline_mode=None)
    li=result.get_files()

    # check if dataset has been found in ESGF
    if len(li)>0:
        d=li[0]
    else:
        raise SDException("SDTIMEST-800","%s dataset does not exist in ESGF (or the index used does not list it)"%dataset_without_timestamp.dataset_functional_id)

    # use file's timestamp if dataset's timestamp is not set in ESGF
    # (this is needed, because some dataset in ESGF have NO timestamp...)
    use_file_timestamp_if_dataset_timestamp_is_missing(d)

    # update timestamp in DB
    dataset=sddatasetdao.get_dataset(dataset_functional_id=d['instance_id'])
    dataset.timestamp=d['timestamp']
    sddatasetdao.update_dataset(dataset,commit=False,keys=['timestamp'])
Пример #24
0
def add_dataset(f):
    """
    Returns:
        dataset_id
    """
    d = sddatasetdao.get_dataset(dataset_functional_id=f.dataset_functional_id)
    if d is not None:

        # check dataset local path format
        #
        # (once a dataset has been created using one local_path format, it
        # cannot be changed anymore without removing the all dataset /
        # restarting the dataset from scratch).
        #
        if d.local_path != f.dataset_local_path:
            raise SDException(
                "SDENQUEU-008",
                "Incorrect local path format (existing_format=%s,new_format=%s)"
                % (d.local_path, f.dataset_local_path))

        # compute new dataset status
        if d.status == sdconst.DATASET_STATUS_IN_PROGRESS:
            d.status = sdconst.DATASET_STATUS_IN_PROGRESS

        elif d.status == sdconst.DATASET_STATUS_EMPTY:
            d.status = sdconst.DATASET_STATUS_EMPTY

        elif d.status == sdconst.DATASET_STATUS_COMPLETE:
            d.status = sdconst.DATASET_STATUS_IN_PROGRESS  # this means that a dataset may be "in-progress" and also "latest"

        # Note related to the "latest" dataset column
        #
        # Adding new files to a datasets may change the status, but don't
        # change dataset "latest" flag.  This is because a dataset can only
        # downgrade here ("complete" => "in-progress"), or stay the same. And
        # when a dataset downgrade, "latest" flag, if true, stay as is, and if
        # false, stay as is also.

        # "last_mod_date" is only modified here (i.e. it is not modified when
        # dataset's files status change). in other words, it changes only when
        # adding new files to it using this script.
        #
        d.last_mod_date = sdtime.now()

        sddatasetdao.update_dataset(d, commit=False)

        return d.dataset_id

    else:
        sdlog.info("SDENQUEU-002",
                   "create dataset (dataset_path=%s)" % (f.dataset_path))

        d = Dataset()

        d.local_path = f.dataset_local_path
        d.path = f.dataset_path
        d.path_without_version = f.dataset_path_without_version
        d.dataset_functional_id = f.dataset_functional_id
        d.template = f.dataset_template
        d.version = f.dataset_version
        d.project = f.project
        d.status = sdconst.DATASET_STATUS_EMPTY
        d.latest = False
        d.crea_date = sdtime.now()
        d.last_mod_date = sdtime.now()

        # non-mandatory attributes
        d.timestamp = f.dataset_timestamp if hasattr(
            f, 'dataset_timestamp') else None
        d.model = f.model if hasattr(f, 'model') else None

        return sddatasetdao.add_dataset(d, commit=False)
Пример #25
0
def dataset_complete_event(project,model,dataset,commit=True):
    sdlog.log("SYDEVENT-004","'dataset_complete_event' triggered (%s)"%dataset.dataset_functional_id,event_triggered_log_level)

    # not used for now
    """
    event=Event(name=sdconst.EVENT_DATASET_COMPLETE)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
    """

    # <<<--- 'latest' flag management related code begin

    # store current 'latest' flag state
    old_latest=dataset.latest

    # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0)
    # tag4342342

    # compute new 'latest' flag
    if not old_latest:
        # old state is not latest

        sddatasetflag.update_latest_flag(dataset) # warning: this method modifies the dataset object in memory (and in database too)
    else:
        # nothing to do concerning the 'latest' flag as the current dataset is already the latest
        # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
        pass

    # store new 'latest' flag state
    new_latest=dataset.latest

    # --->>> 'latest' flag management related code end


    # cascade 1 (trigger dataset latest switch event)
    if (not old_latest) and new_latest:
        # latest flag has been switched from false to true

        dataset_latest_event(project,model,dataset.path,commit=commit) # trigger 'dataset_latest' event


    # cascade 2 (trigger latest dataset complete event)
    if dataset.latest:
        latest_dataset_complete_event(project,model,dataset.local_path,commit=commit)
    else:
        non_latest_dataset_complete_event(project,model,dataset.local_path,commit=commit)


    # cascade 3 (trigger output12 dataset complete event)
    if project=='CMIP5':
        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE:
                dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
                dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
            dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)


    # cascade 4 (trigger latest output12 dataset complete event)
    if project=='CMIP5':
        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status==sdconst.DATASET_STATUS_COMPLETE and d2.status==sdconst.DATASET_STATUS_COMPLETE:
                if d1.latest and d2.latest:
                    latest_output12_dataset_complete_event(project,model,dataset_pattern,commit=commit)
                elif not d1.latest and not d2.latest:
                    non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
                else:
                    sdlog.warning("SYDEVENT-032","Event not triggered as one product is latest while the other product is not") # TODO: is this the right way to handle this case ?
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            if dataset.latest:
                latest_output12_dataset_complete_event(project,model,dataset_pattern,commit=commit)
            else:
                non_latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=commit)
Пример #26
0
def dataset_complete_event(project, model, dataset, commit=True):
    sdlog.log(
        "SYDEVENT-004", "'dataset_complete_event' triggered (%s)" %
        dataset.dataset_functional_id, event_triggered_log_level)

    # not used for now
    """
    event=Event(name=sdconst.EVENT_DATASET_COMPLETE)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
    """

    # <<<--- 'latest' flag management related code begin

    # store current 'latest' flag state
    old_latest = dataset.latest

    # TODO: check if we we switch latest flag independently for each product (meaning output1 latest can be 1 while output2 latest is 0)
    # tag4342342

    # compute new 'latest' flag
    if not old_latest:
        # old state is not latest

        sddatasetflag.update_latest_flag(
            dataset
        )  # warning: this method modifies the dataset object in memory (and in database too)
    else:
        # nothing to do concerning the 'latest' flag as the current dataset is already the latest
        # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
        pass

    # store new 'latest' flag state
    new_latest = dataset.latest

    # --->>> 'latest' flag management related code end

    # cascade 1 (trigger dataset latest switch event)
    if (not old_latest) and new_latest:
        # latest flag has been switched from false to true

        dataset_latest_event(project, model, dataset.path,
                             commit=commit)  # trigger 'dataset_latest' event

    # cascade 2 (trigger latest dataset complete event)
    if dataset.latest:
        latest_dataset_complete_event(project,
                                      model,
                                      dataset.local_path,
                                      commit=commit)
    else:
        non_latest_dataset_complete_event(project,
                                          model,
                                          dataset.local_path,
                                          commit=commit)

    # cascade 3 (trigger output12 dataset complete event)
    if project == 'CMIP5':
        (ds_path_output1,
         ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(
                path=ds_path_output1) and sddatasetdao.exists_dataset(
                    path=ds_path_output2):

            d1 = sddatasetdao.get_dataset(path=ds_path_output1)
            d2 = sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status == sdconst.DATASET_STATUS_COMPLETE and d2.status == sdconst.DATASET_STATUS_COMPLETE:
                dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                    dataset.local_path)
                dataset_complete_output12_event(project,
                                                model,
                                                dataset_pattern,
                                                commit=commit)
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                dataset.local_path)
            dataset_complete_output12_event(project,
                                            model,
                                            dataset_pattern,
                                            commit=commit)

    # cascade 4 (trigger latest output12 dataset complete event)
    if project == 'CMIP5':
        (ds_path_output1,
         ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(
                path=ds_path_output1) and sddatasetdao.exists_dataset(
                    path=ds_path_output2):

            d1 = sddatasetdao.get_dataset(path=ds_path_output1)
            d2 = sddatasetdao.get_dataset(path=ds_path_output2)

            if d1.status == sdconst.DATASET_STATUS_COMPLETE and d2.status == sdconst.DATASET_STATUS_COMPLETE:
                if d1.latest and d2.latest:
                    latest_output12_dataset_complete_event(project,
                                                           model,
                                                           dataset_pattern,
                                                           commit=commit)
                elif not d1.latest and not d2.latest:
                    non_latest_dataset_complete_output12_event(project,
                                                               model,
                                                               dataset_pattern,
                                                               commit=commit)
                else:
                    sdlog.warning(
                        "SYDEVENT-032",
                        "Event not triggered as one product is latest while the other product is not"
                    )  # TODO: is this the right way to handle this case ?
        else:
            # only one product exists for this dataset

            # not sure if this code is required.
            # basically, it says that if only one product is present (output1 or output2)
            # then the 'output12' is considered ready to be triggered
            # (i.e. output12 does not require output1 and output2 to be present,
            # it only require that if there are, they must both be complete)
            #
            if dataset.latest:
                latest_output12_dataset_complete_event(project,
                                                       model,
                                                       dataset_pattern,
                                                       commit=commit)
            else:
                non_latest_dataset_complete_output12_event(project,
                                                           model,
                                                           dataset_pattern,
                                                           commit=commit)
Пример #27
0
def populate_selection_transfer_junction():
    """
    populate "selection__transfer" association table

    WARNING: this method is only CMIP5 DRS compatible

    TODO: not tested: check this method before use
    """
    dbpagination = sddbpagination.DBPagination()

    transfer_without_selection = 0
    transfer_without_dataset = 0
    i = 0
    transfers = dbpagination.get_files(
    )  # loop over block (trick not to load 300000 CTransfer objects in memory..). Size is given by pagination_block_size
    while len(transfers) > 0:
        for t in transfers:
            d = sddatasetdao.get_dataset(dataset_id=t.dataset_id)
            if d is not None:
                t.setDataset(d)
            else:
                insert_transfer_without_dataset(t)
                transfer_without_dataset += 1

                # we can't go on without dataset (contains() method needs it)
                continue

            # selection<=>transfer mapping and insertion in assoc table
            orphan = 1  # this is to detect orphan transfer (i.e. don't belong to any selection)
            for us in get_Selections():

                # debug
                #print "%s<=>%s"%(t.get_transfer_id(),us.get_selection_id())

                if us.contains(t):

                    sddao.insert_selection_transfer_junction(
                        t, us, _conn)  # no commit inside
                    orphan = 0

            if orphan == 1:
                insert_transfer_without_selection(t)
                transfer_without_selection += 1

        _conn.commit()  # commit block

        # display progress
        #if i%100==0:
        SDProgressDot.print_char(".")

        i += 1

        transfers = dbpagination.get_files()

    if transfer_without_selection > 0:
        sdlog.warning(
            "SDOPERAQ-032", "%d transfer(s) not matching any selection found" %
            transfer_without_selection)

    if transfer_without_dataset > 0:
        sdlog.warning(
            "SDOPERAQ-033",
            "%d missing dataset found (file exists but corresponding dataset is missing)"
            % transfer_without_dataset)
Пример #28
0
def add_dataset(f):
    """
    Returns:
        dataset_id
    """
    d=sddatasetdao.get_dataset(dataset_functional_id=f.dataset_functional_id)
    if d is not None:

        # check dataset local path format
        #
        # (once a dataset has been created using one local_path format, it
        # cannot be changed anymore without removing the all dataset /
        # restarting the dataset from scratch).
        #
        if d.local_path!=f.dataset_local_path:
            raise SDException("SDENQUEU-008","Incorrect local path format (existing_format=%s,new_format=%s)"%(d.local_path,f.dataset_local_path))

        # compute new dataset status
        if d.status==sdconst.DATASET_STATUS_IN_PROGRESS:
            d.status=sdconst.DATASET_STATUS_IN_PROGRESS

        elif d.status==sdconst.DATASET_STATUS_EMPTY:
            d.status=sdconst.DATASET_STATUS_EMPTY

        elif d.status==sdconst.DATASET_STATUS_COMPLETE:
            d.status=sdconst.DATASET_STATUS_IN_PROGRESS # this means that a dataset may be "in-progress" and also "latest"


        # Note related to the "latest" dataset column
        #
        # Adding new files to a datasets may change the status, but don't
        # change dataset "latest" flag.  This is because a dataset can only
        # downgrade here ("complete" => "in-progress"), or stay the same. And
        # when a dataset downgrade, "latest" flag, if true, stay as is, and if
        # false, stay as is also.

        # "last_mod_date" is only modified here (i.e. it is not modified when
        # dataset's files status change). in other words, it changes only when
        # adding new files to it using this script.
        #
        d.last_mod_date=sdtime.now()


        sddatasetdao.update_dataset(d,commit=False)

        return d.dataset_id

    else:
        sdlog.info("SDENQUEU-002","create dataset (dataset_path=%s)"%(f.dataset_path))

        d=Dataset()

        d.local_path=f.dataset_local_path
        d.path=f.dataset_path
        d.path_without_version=f.dataset_path_without_version
        d.dataset_functional_id=f.dataset_functional_id
        d.template=f.dataset_template
        d.version=f.dataset_version
        d.project=f.project
        d.status=sdconst.DATASET_STATUS_EMPTY
        d.latest=False
        d.crea_date=sdtime.now()
        d.last_mod_date=sdtime.now()

        # non-mandatory attributes
        d.timestamp=f.dataset_timestamp if hasattr(f,'dataset_timestamp') else None
        d.model=f.model if hasattr(f,'model') else None

        return sddatasetdao.add_dataset(d,commit=False)