Пример #1
0
def update_datasets_status():
    """
    Update status flag for all datasets.

    Notes
        - This func is used to fix inconsistencies.
        - This func doesn't handle the 'latest' flag
    """
    sdlog.info("SYDDFLAG-186","Update status for all datasets")
    datasets=sddatasetdao.get_datasets()
    update_datasets_status_HELPER(datasets)
Пример #2
0
def update_datasets_status():
    """
    Update status flag for all datasets.

    Notes
        - This func is used to fix inconsistencies.
        - This func doesn't handle the 'latest' flag
    """
    sdlog.info("SYDDFLAG-186", "Update status for all datasets")
    datasets = sddatasetdao.get_datasets()
    update_datasets_status_HELPER(datasets)
Пример #3
0
def update_complete_datasets_status():
    """
    Update status flag for datasets with complete status.

    Notes
        - This func is used to fix inconsistencies, when dataset have complete status,
          but some of its files are not 'done' yet.
        - This func doesn't handle the 'latest' flag
    """
    sdlog.info("SYDDFLAG-184","Update complete datasets status")
    complete_datasets=sddatasetdao.get_datasets(status=sdconst.DATASET_STATUS_COMPLETE)
    update_datasets_status_HELPER(complete_datasets)
Пример #4
0
def PROC0001():
    """Print obsolete versions of datasets.

    Notes
     - use shell expansion pattern in the path, as dataset can be split over the two product output1 and output2, and can also be in output !!
     - basic algo using get_datasets() method
     - also see PROC0005
    """
    for d in sddatasetdao.get_datasets():
        datasetVersions=sddatasetquery.get_dataset_versions(d,True) # retrieves all the versions of the dataset
        if not datasetVersions.ismostrecentversionnumber(d.version): # basic test (for smarter version selection, use PROC0005 which use getoldversionsdatasets())
            print d.get_full_local_path('output{,1,2}')
Пример #5
0
def set_model_when_empty():
    """Fix B0025 bug."""
    datasets=sddatasetdao.get_datasets()

    for d in datasets:
        m=re.search('^[^/]*/([^/]*)/.*$',d.name) # sample => MOHC/HadGEM2-ES/rcp26/day/atmos/day/r1i1p1/v20110524
        if m!=None:
            model=m.group(1)
            d.model(model)
            sdrebuildquery.update_dataset(d)
        else:
            raise SDException("SDREBUIL-120","incorrect dataset format (%s)"%d.getName())
        SDProgressDot.print_char(".")
Пример #6
0
def update_complete_datasets_status():
    """
    Update status flag for datasets with complete status.

    Notes
        - This func is used to fix inconsistencies, when dataset have complete status,
          but some of its files are not 'done' yet.
        - This func doesn't handle the 'latest' flag
    """
    sdlog.info("SYDDFLAG-184", "Update complete datasets status")
    complete_datasets = sddatasetdao.get_datasets(
        status=sdconst.DATASET_STATUS_COMPLETE)
    update_datasets_status_HELPER(complete_datasets)
Пример #7
0
def update_incomplete_datasets_status():
    """
    Set status flag for datasets with incomplete status.

    When removing error and waiting transfers (e.g. with 'synda reset' func),
    the dataset status become incorrect (i.e. it remains on 'empty' or
    'in-progress', while all transfers are now 'done'). This func fix this
    problem.

    Notes
        - This func doesn't handle the 'latest' flag
        - This func is quite the same as 'update_complete_datasets_status'
          func, but is faster as it doesn't processes complete dataset (which
          are the largest part of all the datasets).

    TODO
        Also handle the 'latest' flag in this func
    """
    sdlog.info("SYDDFLAG-182", "Update incomplete datasets status")
    incomplete_datasets = sddatasetdao.get_datasets(
        status=sdconst.DATASET_STATUS_EMPTY) + sddatasetdao.get_datasets(
            status=sdconst.DATASET_STATUS_IN_PROGRESS)
    update_datasets_status_HELPER(incomplete_datasets)
Пример #8
0
def update_datasets__status_and_latest():
    """
    Set status and latest flag for all datasets.

    Return value
        Returns how many datasets have been modified

    Note
        This procedure must be run until no modifications remain (a run makes
        changes, which impact the next one, and so one. after a few runs, the
        graph traversal must be complete)
    """
    datasets_modified_count = 0

    i = 0
    for d in sddatasetdao.get_datasets():

        # store dataset current state
        l__latest = d.latest
        l__status = d.status

        # compute new 'status' flag
        d.status = compute_dataset_status(d)
        sddatasetdao.update_dataset(d)

        # compute new 'latest' flag
        if not d.latest:  # we check here the current value for 'latest' flag
            update_latest_flag(
                d
            )  # warning: this method modifies the dataset in memory (and in database too)
        else:
            # nothing to do concerning the 'latest' flag as the current dataset is already the latest
            # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
            pass

        # check if the dataset has changed
        if l__latest != d.latest or l__status != d.status:
            datasets_modified_count += 1

        # display progress
        if i % 2 == 0:
            SDProgressDot.print_char(".")

        i += 1

    print ""
    sdlog.info("SYDDFLAG-630",
               "modified datasets: %i" % datasets_modified_count)

    return datasets_modified_count
Пример #9
0
def PROC0001():
    """Print obsolete versions of datasets.

    Notes
     - use shell expansion pattern in the path, as dataset can be split over the two product output1 and output2, and can also be in output !!
     - basic algo using get_datasets() method
     - also see PROC0005
    """
    for d in sddatasetdao.get_datasets():
        datasetVersions = sddatasetquery.get_dataset_versions(
            d, True)  # retrieves all the versions of the dataset
        if not datasetVersions.ismostrecentversionnumber(
                d.version
        ):  # basic test (for smarter version selection, use PROC0005 which use getoldversionsdatasets())
            print d.get_full_local_path('output{,1,2}')
Пример #10
0
def fix_timestamp():

    # HACK 1
    #
    # Once all insertions are done, we update 'dataset.timestamp' column (this
    # cannot be done in one step, because dataset 'timestamp' attribute doesn't
    # exist in file's attributes).
    #
    # 'timestamp' is mainly (only ?) needed by sddatasetversion.compare() func
    #
    # Indeed, this code is a hack that makes the workflow less readable
    # (i.e. 'search' then 'enqueue' then 'search' again). Maybe try to improve
    # this in the future. Still, it not as bad as if 'search' triggers 'search'
    # recursively, because in our case, when the second search starts, the
    # first search is completed (AFAIR sdsearch is protected not to permit
    # recursion anyway).
    # But if needed, there is a way to trigger search recursively: use
    # sdquicksearch (also in this case, sdsearch can still be used for the top
    # level search (so resulting with a mix of sdsearch and sdquicksearch)).
    #
    datasets_without_timestamp = sddatasetdao.get_datasets(
        timestamp=None)  # retrieve datasets with timestamp not set

    # HACK 2
    recent_datasets_without_timestamp = keep_recent_datasets(
        datasets_without_timestamp)

    if len(recent_datasets_without_timestamp) > 0:
        sdlog.info(
            "SDENQUEU-004", "Retrieving timestamp for %i dataset(s)." %
            len(recent_datasets_without_timestamp))

        for dataset_without_timestamp in recent_datasets_without_timestamp:

            try:
                sdtimestamp.fill_missing_dataset_timestamp(
                    dataset_without_timestamp)
            except SDException, e:
                if e.code in ['SDTIMEST-011', 'SDTIMEST-008', 'SDTIMEST-800']:
                    sdlog.info(
                        "SDENQUEU-909",
                        "Timestamp not set for '%s' dataset (%s)" %
                        (dataset_without_timestamp.dataset_functional_id,
                         str(e)))
                else:
                    # fatal error come here

                    raise
Пример #11
0
def set_timestamp_when_empty__BATCH_MODE_2(project='CMIP5'):
    """
    Retrieve datasets from local database, then retrieve datasets from ESGF, then update local timestamp.
    """
    datasets_without_timestamp=sddatasetdao.get_datasets(project=project,timestamp=None) # retrieve datasets with timestamp not set
    sdlog.info("SDREBUIL-004","Updating %i dataset(s) timestamp."%len(datasets_without_timestamp))
    for dataset_without_timestamp in datasets_without_timestamp:
        try:
            sdtimestamp.fill_missing_dataset_timestamp(dataset_without_timestamp)
        except SDException, e:
            if e.code in ['SDTIMEST-011','SDTIMEST-008','SDTIMEST-800']:
                sdlog.info("SDREBUIL-694","Timestamp not set for dataset (reason=%s,dataset=%s)"%(e.code,dataset_without_timestamp.dataset_functional_id))
            else:
                # fatal error come here

                raise
Пример #12
0
def set_model_when_empty():
    """Fix B0025 bug."""
    datasets = sddatasetdao.get_datasets()

    for d in datasets:
        m = re.search(
            '^[^/]*/([^/]*)/.*$', d.name
        )  # sample => MOHC/HadGEM2-ES/rcp26/day/atmos/day/r1i1p1/v20110524
        if m != None:
            model = m.group(1)
            d.model(model)
            sdrebuildquery.update_dataset(d)
        else:
            raise SDException("SDREBUIL-120",
                              "incorrect dataset format (%s)" % d.getName())
        SDProgressDot.print_char(".")
Пример #13
0
def get_old_versions_datasets():
    """Return old versions datasets list."""
    lst=[]

    for d in sddatasetdao.get_datasets():
        datasetVersions=sddatasetquery.get_dataset_versions(d,True) # retrieves all the versions of the dataset
        if d.latest==False: # this version is not the latest
            if datasetVersions.exists_version_with_latest_flag_set_to_true(): # latest exists
                if not datasetVersions.is_version_higher_than_latest(d): # version is not higher than latest
                    # assert
                    if datasetVersions.is_most_recent_version_number(d): # should never occurs because of the previous tests
                        raise SDException("SDSTAT-ERR042","fatal error (version=%s,path_without_version=%s)"%(d.version,d.get_name_without_version()))

                    lst.append(d)

    return lst
Пример #14
0
def update_datasets__status_and_latest():
    """
    Set status and latest flag for all datasets.

    Return value
        Returns how many datasets have been modified

    Note
        This procedure must be run until no modifications remain (a run makes
        changes, which impact the next one, and so one. after a few runs, the
        graph traversal must be complete)
    """
    datasets_modified_count=0

    i=0
    for d in sddatasetdao.get_datasets():

        # store dataset current state
        l__latest=d.latest
        l__status=d.status

        # compute new 'status' flag
        d.status=compute_dataset_status(d)
        sddatasetdao.update_dataset(d)

        # compute new 'latest' flag
        if not d.latest: # we check here the current value for 'latest' flag
            update_latest_flag(d) # warning: this method modifies the dataset in memory (and in database too)
        else:
            # nothing to do concerning the 'latest' flag as the current dataset is already the latest
            # (the latest flag can only be switched off (i.e. to False) by *other* datasets versions, not by himself !!!)
            pass

        # check if the dataset has changed
        if l__latest!=d.latest or l__status!=d.status:
            datasets_modified_count+=1

        # display progress
        if i%2==0:
            SDProgressDot.print_char(".")

        i+=1

    print ""
    sdlog.info("SYDDFLAG-630","modified datasets: %i"%datasets_modified_count)

    return datasets_modified_count
Пример #15
0
def add_files(files):
    for f in files:
        add_file(File(**f))

    # HACK 1
    #
    # Once all insertions are done, we update 'dataset.timestamp' column (this
    # cannot be done in one step, because dataset 'timestamp' attribute doesn't
    # exist in file's attributes).
    #
    # 'timestamp' is mainly (only ?) needed by sddatasetversion.compare() func
    #
    # Indeed, this code is a hack that makes the workflow less readable
    # (i.e. 'search' then 'enqueue' then 'search' again). Maybe try to improve
    # this in the future. Still, it not as bad as if 'search' triggers 'search'
    # recursively, because in our case, when the second search starts, the
    # first search is completed (AFAIR sdsearch is protected not to permit
    # recursion anyway). 
    # But if needed, there is a way to trigger search recursively: use
    # sdquicksearch (also in this case, sdsearch can still be used for the top
    # level search (so resulting with a mix of sdsearch and sdquicksearch)).
    #
    datasets_without_timestamp=sddatasetdao.get_datasets(timestamp=None) # retrieve datasets with timestamp not set

    # HACK 2
    recent_datasets_without_timestamp=keep_recent_datasets(datasets_without_timestamp)

    if len(recent_datasets_without_timestamp)>0:
        sdlog.info("SDENQUEU-004","Retrieving timestamp for %i dataset(s)."%len(recent_datasets_without_timestamp))

        for dataset_without_timestamp in recent_datasets_without_timestamp:

            try:
                sdtimestamp.fill_missing_dataset_timestamp(dataset_without_timestamp)
            except SDException, e:
                if e.code in ['SDTIMEST-011','SDTIMEST-008','SDTIMEST-800']:
                    sdlog.info("SDENQUEU-909","Timestamp not set for dataset (reason=%s,dataset=%s)"%(e.code,dataset_without_timestamp.dataset_functional_id))
                else:
                    # fatal error come here

                    raise
Пример #16
0
def update_incomplete_datasets_status():
    """
    Set status flag for datasets with incomplete status.

    When removing error and waiting transfers (e.g. with 'synda reset' func),
    the dataset status become incorrect (i.e. it remains on 'empty' or
    'in-progress', while all transfers are now 'done'). This func fix this
    problem.

    Notes
        - This func doesn't handle the 'latest' flag
        - This func is quite the same as 'update_complete_datasets_status'
          func, but is faster as it doesn't processes complete dataset (which
          are the largest part of all the datasets).

    TODO
        Also handle the 'latest' flag in this func
    """
    sdlog.info("SYDDFLAG-182","Update incomplete datasets status")
    incomplete_datasets=sddatasetdao.get_datasets(status=sdconst.DATASET_STATUS_EMPTY)+sddatasetdao.get_datasets(status=sdconst.DATASET_STATUS_IN_PROGRESS)
    update_datasets_status_HELPER(incomplete_datasets)
Пример #17
0
def set_timestamp_when_empty__BATCH_MODE_2(project='CMIP5'):
    """
    Retrieve datasets from local database, then retrieve datasets from ESGF, then update local timestamp.
    """
    datasets_without_timestamp = sddatasetdao.get_datasets(
        project=project,
        timestamp=None)  # retrieve datasets with timestamp not set
    sdlog.info(
        "SDREBUIL-004",
        "Updating %i dataset(s) timestamp." % len(datasets_without_timestamp))
    for dataset_without_timestamp in datasets_without_timestamp:
        try:
            sdtimestamp.fill_missing_dataset_timestamp(
                dataset_without_timestamp)
        except SDException, e:
            if e.code in ['SDTIMEST-011', 'SDTIMEST-008', 'SDTIMEST-800']:
                sdlog.info(
                    "SDREBUIL-694",
                    "Timestamp not set for dataset (reason=%s,dataset=%s)" %
                    (e.code, dataset_without_timestamp.dataset_functional_id))
            else:
                # fatal error come here

                raise
Пример #18
0
def get_old_versions_datasets():
    """Return old versions datasets list."""
    lst = []

    for d in sddatasetdao.get_datasets():
        datasetVersions = sddatasetquery.get_dataset_versions(
            d, True)  # retrieves all the versions of the dataset
        if d.latest == False:  # this version is not the latest
            if datasetVersions.exists_version_with_latest_flag_set_to_true(
            ):  # latest exists
                if not datasetVersions.is_version_higher_than_latest(
                        d):  # version is not higher than latest
                    # assert
                    if datasetVersions.is_most_recent_version_number(
                            d
                    ):  # should never occurs because of the previous tests
                        raise SDException(
                            "SDSTAT-042",
                            "fatal error (version=%s,path_without_version=%s)"
                            % (d.version, d.get_name_without_version()))

                    lst.append(d)

    return lst