Пример #1
0
def get_datasets(
    stream=None,
    parameter=None,
    post_pipeline_mode='dataset',
    dry_run=False
):  # TODO: maybe remove parameter argument everywhere as there is a mess in get_selection_file_buffer, because of default/forced parameter (i.e. len(parameter) is non-zero even if non parameter args set on CLI !)

    if parameter is None:
        parameter = []

    assert (stream is None) or (
        len(parameter) < 1
    )  # this is to prevent using stream and parameter together
    assert post_pipeline_mode != 'file'

    if len(parameter) > 0:
        sddeferredbefore.add_forced_parameter(parameter, 'type', 'Dataset')
    elif stream is not None:
        sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset')

    result = sdquicksearch.run(stream=stream,
                               parameter=parameter,
                               post_pipeline_mode=post_pipeline_mode,
                               dry_run=dry_run)
    return result.get_files()
Пример #2
0
def get_files(stream=None,parameter=[],post_pipeline_mode='file',dry_run=False): # TODO: maybe remove parameter argument everywhere as there is a mess in get_selection_file_buffer, because of default/forced parameter (i.e. len(parameter) is non-zero even if non parameter args set on CLI !)

    assert (stream is None) or (len(parameter)<1) # this is to prevent using stream and parameter together

    if len(parameter)>0:
        sddeferredbefore.add_forced_parameter(parameter,'type','File')
    elif stream is not None:
        sddeferredbefore.add_forced_parameter(stream,'type','File')

    result=sdquicksearch.run(stream=stream,parameter=parameter,post_pipeline_mode=post_pipeline_mode,dry_run=dry_run)
    return result.files
Пример #3
0
def get_files(stream=None,parameter=[],dry_run=False):

    assert (stream is None) or (len(parameter)<1) # this is to prevent using stream and parameter together

    if len(parameter)>0:
        sddeferredbefore.add_forced_parameter(parameter,'type','File')
    elif stream is not None:
        sddeferredbefore.add_forced_parameter(stream,'type','File')

    files=sdlsearch.run(stream=stream,parameter=parameter,dry_run=dry_run)

    return files
Пример #4
0
def run(stream=None,
        path=None,
        parameter=[],
        index_host=None,
        dry_run=False,
        type_=sdconst.SA_TYPE_DATASET):

    # type management
    if stream is not None:
        sddeferredbefore.add_forced_parameter(stream, 'type', type_)
    else:

        # if stream is None, we assume 'parameter' mode
        # (see TAGJFJ4R4JKFFJD for more informations)
        sddeferredbefore.add_forced_parameter(parameter, 'type', type_)

    queries = sdpipeline.build_queries(stream=stream,
                                       path=path,
                                       parameter=parameter,
                                       index_host=index_host,
                                       parallel=False,
                                       load_default=False,
                                       count=True)

    if len(queries) < 1:
        raise SDException("SDQSEARC-001", "No query to process")

    # we don't support multiple queries because of duplicate/intersection between queries
    # (i.e. which num_found attribute to use (from which query..))
    if len(queries) > 1:
        raise SDException(
            "SDQSEARC-100",
            "Too much query (multi-query is not allowed in this module, use sdquicksearch instead)"
        )

    query = queries[0]

    if dry_run:
        request = sdtypes.Request(url=query['url'], pagination=False)

        print '%s' % request.get_url()

        # debug
        #print 'Url: %s'%request.get_url()
        #print 'Attached parameters: %s'%query.get('attached_parameters')

        return sdtypes.Response()
    else:
        return ws_call(query)  # return Response object
Пример #5
0
def set_stream_type(args):
    import sddeferredbefore

    # Set the sdtream type (aka search-API 'type').
    #
    # Note that arg.type_ is NOT the same thing as the stream type (aka
    # search-API type). arg.type_ is only used locally to format the
    # listing presented to user, while the stream type is the one sent
    # to the ESGF service to retrieve data. For example,
    # SA_TYPE_AGGREGATION is used by arg.type_ to make some change in
    # the output, but search-API don't know about this type (i.e. for
    # most project, you can't list anything by using this type). Also
    # most modules of Synda behave the same way as search-API: they
    # don't know about SA_TYPE_AGGREGATION. SA_TYPE_AGGREGATION is ONLY
    # used in Synda upstream code to make some local display
    # modifications.
    #
    # So what we do here is choose which is the search-API type we need
    # (dataset, file) for the listing type asked by user (i.e.
    # variable, dataset, file)
    #
    # But note that in most case, search-API 'type' will be overrided
    # later anyway as it is forced in dedicated modules (e.g. in
    # sdrdataset, sdrfile, etc..).
    #
    # Also note that we 'force' (i.e. not 'default') the parameter here, so to
    # prevent user to set it. We do this because if user use '-f' option and
    # type=Dataset, the display type will not fit the type of data fetched from
    # search-API).
    #
    if args.type_ in (sdconst.SA_TYPE_AGGREGATION, sdconst.SA_TYPE_DATASET):
        sddeferredbefore.add_forced_parameter(args.stream, 'type',
                                              sdconst.SA_TYPE_DATASET)
    elif args.type_ in (sdconst.SA_TYPE_FILE, ):
        sddeferredbefore.add_forced_parameter(args.stream, 'type',
                                              sdconst.SA_TYPE_FILE)
    else:
        raise sdexception.SDException('SDASYNDA-001',
                                      'Unknown type (%s)' % args.type_)
Пример #6
0
def set_stream_type(args):
    import sddeferredbefore

    # Set the sdtream type (aka search-API 'type').
    #
    # Note that arg.type_ is NOT the same thing as the stream type (aka
    # search-API type). arg.type_ is only used locally to format the
    # listing presented to user, while the stream type is the one sent
    # to the ESGF service to retrieve data. For example,
    # SA_TYPE_AGGREGATION is used by arg.type_ to make some change in
    # the output, but search-API don't know about this type (i.e. for
    # most project, you can't list anything by using this type). Also
    # most modules of Synda behave the same way as search-API: they
    # don't know about SA_TYPE_AGGREGATION. SA_TYPE_AGGREGATION is ONLY
    # used in Synda upstream code to make some local display
    # modifications.
    #
    # So what we do here is choose which is the search-API type we need
    # (dataset, file) for the listing type asked by user (i.e.
    # variable, dataset, file)
    #
    # But note that in most case, search-API 'type' will be overrided
    # later anyway as it is forced in dedicated modules (e.g. in
    # sdrdataset, sdrfile, etc..).
    #
    # Also note that we 'force' (i.e. not 'default') the parameter here, so to
    # prevent user to set it. We do this because if user use '-f' option and
    # type=Dataset, the display type will not fit the type of data fetched from
    # search-API).
    #
    if args.type_ in (sdconst.SA_TYPE_AGGREGATION,sdconst.SA_TYPE_DATASET):
        sddeferredbefore.add_forced_parameter(args.stream,'type',sdconst.SA_TYPE_DATASET)
    elif args.type_ in (sdconst.SA_TYPE_FILE,):
        sddeferredbefore.add_forced_parameter(args.stream,'type',sdconst.SA_TYPE_FILE)
    else:
        from sdexception import SDException
        raise SDException('SDASYNDA-001','Unknown type (%s)'%args.type_)
Пример #7
0
def run(stream=None,path=None,parameter=[],index_host=None,dry_run=False,type_=sdconst.SA_TYPE_DATASET):


    # type management
    if stream is not None:
        sddeferredbefore.add_forced_parameter(stream,'type',type_)
    else:

        # if stream is None, we assume 'parameter' mode
        # (see TAGJFJ4R4JKFFJD for more informations)
        sddeferredbefore.add_forced_parameter(parameter,'type',type_)


    queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False,count=True)

    if len(queries)<1:
        raise SDException("SDQSEARC-001","No query to process")

    # we don't support multiple queries because of duplicate/intersection between queries
    # (i.e. which num_found attribute to use (from which query..))
    if len(queries)>1:
        raise SDException("SDQSEARC-100","Too much query (multi-query is not allowed in this module, use sdquicksearch instead)")

    query=queries[0]

    if dry_run:
        request=sdtypes.Request(url=query['url'],pagination=False)

        print '%s'%request.get_url()

        # debug
        #print 'Url: %s'%request.get_url()
        #print 'Attached parameters: %s'%query.get('attached_parameters')

        return sdtypes.Response()
    else:
        return ws_call(query) # return Response object
Пример #8
0
            #
            # So what we do here is choose which is the search-API type we need
            # (dataset, file) for the listing type asked by user (i.e.
            # variable, dataset, file)
            #
            # But note that in most case, search-API 'type' will be overrided
            # later anyway as it is forced in dedicated modules (e.g. in
            # sdrdataset, sdrfile, etc..).
            #
            # Also note that we 'force' (i.e. not 'default') the parameter
            # here, so to prevent user to set it. We do this because if user
            # use '-f' option and type=Dataset, it will not do as the display
            # type will not fit the type of data fetched from search-API).
            #
            if args.type_ in (sdconst.SA_TYPE_AGGREGATION,sdconst.SA_TYPE_DATASET):
                sddeferredbefore.add_forced_parameter(stream,'type',sdconst.SA_TYPE_DATASET)
            elif args.type_ in (sdconst.SA_TYPE_FILE,):
                sddeferredbefore.add_forced_parameter(stream,'type',sdconst.SA_TYPE_FILE)
            else:
                from sdexception import SDException
                raise SDException('SDASYNDA-001','Unknown type (%s)'%args.type_)

            args.stream=stream # hack: pass 'stream' object downstream as a standalone argument (not inside args)

            import sdtsaction
            sdtsaction.actions[args.action](args)

        elif args.action in ['remove','install','stat']:
            # those actions systematically trigger full search (i.e. limit keyword cannot be used here)

            # check
Пример #9
0
def pexec(args):
    import sdsearch, sdpporder, sddb, syndautils, sdconst, sdpostpipelineutils, sdhistorydao, sddeferredbefore, sddomainutils

    if args.order_name=='cdf':
        selection_filename=None

        # use search-api operator to build datasets list
        stream=syndautils.get_stream(subcommand=args.subcommand,selection_file=args.selection_file,no_default=args.no_default)
        sddeferredbefore.add_forced_parameter(stream,'type','Dataset')

        dataset_found_count=0
        order_variable_count=0
        order_dataset_count=0
        for facets_group in stream: # we need to process each facets_group one by one because of TAG45345JK3J53K
            
            metadata=sdsearch.run(stream=[facets_group],post_pipeline_mode='dataset') # TAGJ43KJ234JK

            dataset_found_count+=metadata.count()

            if metadata.count() > 0:

                # WART
                # (gets overwritten at each iteration, but not a big deal as always the same value)
                if selection_filename is None: # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None))

                    dataset=metadata.get_one_file()
                    selection_filename=sdpostpipelineutils.get_attached_parameter__global([dataset],'selection_filename') # note that if no files are found at all for this selection (no matter the status), then the filename will be blank

                for d in metadata.get_files(): # warning: load list in memory
                    if d['status']==sdconst.DATASET_STATUS_COMPLETE:

                        # TAG45J4K45JK

                        # first, send cdf variable order
                        # (note: total number of variable event is given by: "total+=#variable for each ds")
                        for v in d['variable']:
                            if v in facets_group['variable']: # TAG45345JK3J53K (we check here that the variable has been asked for in the first place)
                                order_variable_count+=1

                                # hack
                                if sddomainutils.is_one_var_per_ds(d['project']): # maybe move this test at TAG45J4K45JK line, and replace 'EVENT_CDF_VARIABLE_O' by a dataset level event (note however that the choice about passing 'EVENT_CDF_VARIABLE_O' event as variable or dataset is arbitrary, both work. But passing as variable is a bit strange as variable appears in both dataset_pattern and variable columns)
                                    e_names=[sdconst.EVENT_CDF_INT_VARIABLE_O, sdconst.EVENT_CDF_COR_VARIABLE_O]

                                    # this case is a bit awkward as we have 'variable' in both dataset_pattern and variable columns..

                                else:
                                    e_names=[sdconst.EVENT_CDF_INT_VARIABLE_N, sdconst.EVENT_CDF_COR_VARIABLE_N]

                                for e_name in e_names:
                                    sdpporder.submit(e_name,d['project'],d['model'],d['local_path'],variable=v,commit=False)

                        # second, send cdf dataset order
                        if d['project'] in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET:

                            # we do not trigger 'dataset' level event in this case
                            pass
                        else:                        

                            order_dataset_count+=1

                            e_names=[sdconst.EVENT_CDF_INT_DATASET, sdconst.EVENT_CDF_COR_DATASET]
                            for e_name in e_names:
                                    sdpporder.submit(e_name,d['project'],d['model'],d['local_path'],commit=False)

        sddb.conn.commit()

        if dataset_found_count>0:
            if order_dataset_count==0 and order_variable_count==0:
                print_stderr("Data not ready (data must be already downloaded before performing pexec task): operation cancelled")   
            else:
                sdhistorydao.add_history_line(sdconst.ACTION_PEXEC,selection_filename)

                print_stderr("Post-processing task successfully submitted (order_dataset_count=%d,order_variable_count=%d)"%(order_dataset_count,order_variable_count))
        else:
            print_stderr('Data not found')

    elif args.order_name=='cds':
        selection_filename = None

        # use search-api operator to build datasets list
        stream = syndautils.get_stream(subcommand=args.subcommand, selection_file=args.selection_file, no_default=args.no_default)
        sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset')

        dataset_found_count = 0
        order_variable_count = 0
        for facets_group in stream:  # we need to process each facets_group one by one because of TAG45345JK3J53K

            metadata = sdsearch.run(stream=[facets_group], post_pipeline_mode='dataset')  # TAGJ43KJ234JK

            dataset_found_count += metadata.count()

            if metadata.count() > 0:

                # WART
                # (gets overwritten at each iteration, but not a big deal as always the same value)
                if selection_filename is None:  # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None))

                    dataset = metadata.get_one_file()
                    selection_filename = sdpostpipelineutils.get_attached_parameter__global([dataset], 'selection_filename')  # note that if no files are found at all for this selection (no matter the status), then the filename will be blank

                for d in metadata.get_files():  # warning: load list in memory
                    if d['status'] == sdconst.DATASET_STATUS_COMPLETE:

                        # TAG45J4K45JK

                        # send cds variable order
                        # (note: total number of variable event is given by: "total+=#variable for each ds")
                        for v in d['variable']:
                            if v in facets_group['variable']:  # TAG45345JK3J53K (we check here that the variable has been asked for in the first place)
                                order_variable_count += 1
                                sdpporder.submit(sdconst.EVENT_CDS_VARIABLE, d['project'], d['model'], d['local_path'], variable=v, commit=False)

        sddb.conn.commit()

        if dataset_found_count > 0:
            if order_variable_count == 0:
                print_stderr("Data not ready (data must be already downloaded before performing pexec task): operation cancelled")
            else:
                sdhistorydao.add_history_line(sdconst.ACTION_PEXEC, selection_filename)

                print_stderr(
                    "Post-processing task successfully submitted (order_variable_count=%d)" % (order_variable_count))
        else:
            print_stderr('Data not found')

    else:
        print_stderr("Invalid order name ('%s')"%args.order_name)
        return 1

    return 0
Пример #10
0
def pexec(args):
    import sdsearch, sdpporder, sddb, syndautils, sdconst, sdpostpipelineutils, sdhistorydao, sddeferredbefore, sddomainutils

    if args.order_name == 'cdf':
        selection_filename = None

        # use search-api operator to build datasets list
        stream = syndautils.get_stream(subcommand=args.subcommand,
                                       selection_file=args.selection_file,
                                       no_default=args.no_default)
        sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset')

        dataset_found_count = 0
        order_variable_count = 0
        order_dataset_count = 0
        for facets_group in stream:  # we need to process each facets_group one by one because of TAG45345JK3J53K

            metadata = sdsearch.run(
                stream=[facets_group],
                post_pipeline_mode='dataset')  # TAGJ43KJ234JK

            dataset_found_count += metadata.count()

            if metadata.count() > 0:

                # WART
                # (gets overwritten at each iteration, but not a big deal as always the same value)
                if selection_filename is None:  # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None))

                    dataset = metadata.get_one_file()
                    selection_filename = sdpostpipelineutils.get_attached_parameter__global(
                        [dataset], 'selection_filename'
                    )  # note that if no files are found at all for this selection (no matter the status), then the filename will be blank

                for d in metadata.get_files():  # warning: load list in memory
                    if d['status'] == sdconst.DATASET_STATUS_COMPLETE:

                        # TAG45J4K45JK

                        # first, send cdf variable order
                        # (note: total number of variable event is given by: "total+=#variable for each ds")
                        for v in d['variable']:
                            if v in facets_group[
                                    'variable']:  # TAG45345JK3J53K (we check here that the variable has been asked for in the first place)
                                order_variable_count += 1

                                # hack
                                if sddomainutils.is_one_var_per_ds(
                                        d['project']
                                ):  # maybe move this test at TAG45J4K45JK line, and replace 'EVENT_CDF_VARIABLE_O' by a dataset level event (note however that the choice about passing 'EVENT_CDF_VARIABLE_O' event as variable or dataset is arbitrary, both work. But passing as variable is a bit strange as variable appears in both dataset_pattern and variable columns)
                                    e_names = [
                                        sdconst.EVENT_CDF_INT_VARIABLE_O,
                                        sdconst.EVENT_CDF_COR_VARIABLE_O
                                    ]

                                    # this case is a bit awkward as we have 'variable' in both dataset_pattern and variable columns..

                                else:
                                    e_names = [
                                        sdconst.EVENT_CDF_INT_VARIABLE_N,
                                        sdconst.EVENT_CDF_COR_VARIABLE_N
                                    ]

                                for e_name in e_names:
                                    sdpporder.submit(e_name,
                                                     d['project'],
                                                     d['model'],
                                                     d['local_path'],
                                                     variable=v,
                                                     commit=False)

                        # second, send cdf dataset order
                        if d['project'] in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET:

                            # we do not trigger 'dataset' level event in this case
                            pass
                        else:

                            order_dataset_count += 1

                            e_names = [
                                sdconst.EVENT_CDF_INT_DATASET,
                                sdconst.EVENT_CDF_COR_DATASET
                            ]
                            for e_name in e_names:
                                sdpporder.submit(e_name,
                                                 d['project'],
                                                 d['model'],
                                                 d['local_path'],
                                                 commit=False)

        sddb.conn.commit()

        if dataset_found_count > 0:
            if order_dataset_count == 0 and order_variable_count == 0:
                print_stderr(
                    "Data not ready (data must be already downloaded before performing pexec task): operation cancelled"
                )
            else:
                sdhistorydao.add_history_line(sdconst.ACTION_PEXEC,
                                              selection_filename)

                print_stderr(
                    "Post-processing task successfully submitted (order_dataset_count=%d,order_variable_count=%d)"
                    % (order_dataset_count, order_variable_count))
        else:
            print_stderr('Data not found')

    elif args.order_name == 'cds':
        selection_filename = None

        # use search-api operator to build datasets list
        stream = syndautils.get_stream(subcommand=args.subcommand,
                                       selection_file=args.selection_file,
                                       no_default=args.no_default)
        sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset')

        dataset_found_count = 0
        order_variable_count = 0
        for facets_group in stream:  # we need to process each facets_group one by one because of TAG45345JK3J53K

            metadata = sdsearch.run(
                stream=[facets_group],
                post_pipeline_mode='dataset')  # TAGJ43KJ234JK

            dataset_found_count += metadata.count()

            if metadata.count() > 0:

                # WART
                # (gets overwritten at each iteration, but not a big deal as always the same value)
                if selection_filename is None:  # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None))

                    dataset = metadata.get_one_file()
                    selection_filename = sdpostpipelineutils.get_attached_parameter__global(
                        [dataset], 'selection_filename'
                    )  # note that if no files are found at all for this selection (no matter the status), then the filename will be blank

                for d in metadata.get_files():  # warning: load list in memory
                    if d['status'] == sdconst.DATASET_STATUS_COMPLETE:

                        # TAG45J4K45JK

                        # send cds variable order
                        # (note: total number of variable event is given by: "total+=#variable for each ds")
                        for v in d['variable']:
                            if v in facets_group[
                                    'variable']:  # TAG45345JK3J53K (we check here that the variable has been asked for in the first place)
                                order_variable_count += 1
                                sdpporder.submit(sdconst.EVENT_CDS_VARIABLE,
                                                 d['project'],
                                                 d['model'],
                                                 d['local_path'],
                                                 variable=v,
                                                 commit=False)

        sddb.conn.commit()

        if dataset_found_count > 0:
            if order_variable_count == 0:
                print_stderr(
                    "Data not ready (data must be already downloaded before performing pexec task): operation cancelled"
                )
            else:
                sdhistorydao.add_history_line(sdconst.ACTION_PEXEC,
                                              selection_filename)

                print_stderr(
                    "Post-processing task successfully submitted (order_variable_count=%d)"
                    % (order_variable_count))
        else:
            print_stderr('Data not found')

    else:
        print_stderr("Invalid order name ('%s')" % args.order_name)
        return 1

    return 0
Пример #11
0
def force_type(stream,type_):
    import sddeferredbefore

    # we 'force' (i.e. we do not just set as 'default') the parameter here, so
    # to prevent user to set it
    sddeferredbefore.add_forced_parameter(stream,'type',type_)