示例#1
0
def search(args):
    import sdearlystreamutils,sdstream

    if args.replica:
        import sddeferredafter
        sdstream.set_scalar(args.stream,'keep_replica','true')
        sddeferredafter.add_forced_parameter(args.stream,'nearest','false')

    # timestamp filters
    if args.timestamp_left_boundary is not None:
        sdstream.set_scalar(args.stream,'from',args.timestamp_left_boundary)
    if args.timestamp_right_boundary is not None:
        sdstream.set_scalar(args.stream,'to',args.timestamp_right_boundary)

    if args.type_==sdconst.SA_TYPE_FILE:
        file_search(args)
    elif args.type_==sdconst.SA_TYPE_AGGREGATION:
        move_to_dataset_printing_routine=sdearlystreamutils.is_one_variable_per_dataset_project(args.stream) # HACK
        if move_to_dataset_printing_routine:
            # one var exist per dataset for this project

            dataset_search(args)
        else:
            # many var exist per dataset for this project

            variable_search(args)

    elif args.type_==sdconst.SA_TYPE_DATASET:
        dataset_search(args)
示例#2
0
def build_queries(stream=None,selection=None,path=None,parameter=None,index_host=None,load_default=None,query_type='remote',dry_run=False,parallel=True,count=False):
    """This pipeline add 'path', 'parameter' and 'selection' input type to the
    standalone query pipeline.

    Returns:
        squeries (Serialized queries) # TODO: maybe rename stream to dqueries
    """

    if parameter is None:
        parameter=[]

    if stream is None:

        if selection is None:
            buffer=sdbuffer.get_selection_file_buffer(path=path,parameter=parameter)
            selection=sdparse.build(buffer,load_default=load_default)

        stream=selection.merge_facets()


    # at this point, stream contains all possible parameters sources (file,stdin,cli..)


    if count:
        # in this mode, we don't want to return any files, so we force limit to
        # 0 just in case this option has been set by the user

        sddeferredafter.add_forced_parameter(stream,'limit','0')


    queries=sdquerypipeline.run(stream,index_host=index_host,query_type=query_type,dry_run=dry_run,parallel=parallel)
    return queries
示例#3
0
def search(args):
    import sdearlystreamutils, sdstream

    if args.replica:
        import sddeferredafter
        sdstream.set_scalar(args.stream, 'keep_replica', 'true')
        sddeferredafter.add_forced_parameter(args.stream, 'nearest', 'false')

    # timestamp filters
    if args.timestamp_left_boundary is not None:
        sdstream.set_scalar(args.stream, 'from', args.timestamp_left_boundary)
    if args.timestamp_right_boundary is not None:
        sdstream.set_scalar(args.stream, 'to', args.timestamp_right_boundary)

    if args.type_ == sdconst.SA_TYPE_FILE:
        file_search(args)
    elif args.type_ == sdconst.SA_TYPE_AGGREGATION:
        move_to_dataset_printing_routine = sdearlystreamutils.is_one_variable_per_dataset_project(
            args.stream)  # HACK
        if move_to_dataset_printing_routine:
            # one var exist per dataset for this project

            dataset_search(args)
        else:
            # many var exist per dataset for this project

            variable_search(args)

    elif args.type_ == sdconst.SA_TYPE_DATASET:
        dataset_search(args)
示例#4
0
def variable_search(args):
    import sddeferredafter, sdrdataset, sdrvariable

    sddeferredafter.add_default_parameter(args.stream,'limit',15) # note: in variable mode, total number of row is given by: "total+=#variable for each ds"
    sddeferredafter.add_forced_parameter(args.stream,'fields',variable_light_fields)

    datasets=sdrdataset.get_datasets(stream=args.stream,dry_run=args.dry_run)

    if len(datasets)==0:
        print "Variable not found"
    else:
        sdrvariable.print_list(datasets)
示例#5
0
def variable_search(args):
    import sddeferredafter, sdrdataset, sdrvariable, sdfields, sdearlystreamutils

    sddeferredafter.add_default_parameter(args.stream,'limit',args.limit) # TAGJ43JK3J43

    lpcme=sdearlystreamutils.test_facet_value_early(args.stream,'local_path_format','custom') # lpcme means 'Local Path Custom Mode Enabled'
    fields_=sdfields.get_all_variable_fields() if lpcme else sdfields.get_variable_light_fields()
    sddeferredafter.add_forced_parameter(args.stream,'fields',fields_)

    datasets=sdrdataset.get_datasets(stream=args.stream,dry_run=args.dry_run)

    if len(datasets)==0:
        print "Variable not found"
    else:
        sdrvariable.print_list(datasets,args.limit) # TAGJ43JK3J43
示例#6
0
def dataset_search(args):
    import sddeferredafter, sdrdataset, sdstream

    sddeferredafter.add_default_parameter(args.stream,'limit',100) # add default limit
    sddeferredafter.add_forced_parameter(args.stream,'fields',dataset_light_fields)

    datasets=sdrdataset.get_datasets(stream=args.stream,dry_run=args.dry_run)

    if not args.dry_run:
        if len(datasets)==0:
            print_stderr('Dataset not found')
        else:
            if args.replica:
                sdrdataset.print_replica_list(datasets)
            else:
                sdrdataset.print_list(datasets)
示例#7
0
def build_queries(stream=None,
                  selection=None,
                  path=None,
                  parameter=None,
                  index_host=None,
                  load_default=None,
                  query_type='remote',
                  dry_run=False,
                  parallel=True,
                  count=False):
    """This pipeline add 'path', 'parameter' and 'selection' input type to the
    standalone query pipeline.

    Returns:
        squeries (Serialized queries) # TODO: maybe rename stream to dqueries
    """

    if parameter is None:
        parameter = []

    if stream is None:

        if selection is None:
            buffer = sdbuffer.get_selection_file_buffer(path=path,
                                                        parameter=parameter)
            selection = sdparse.build(buffer, load_default=load_default)

        stream = selection.merge_facets()

    # at this point, stream contains all possible parameters sources (file,stdin,cli..)

    if count:
        # in this mode, we don't want to return any files, so we force limit to
        # 0 just in case this option has been set by the user

        sddeferredafter.add_forced_parameter(stream, 'limit', '0')

    queries = sdquerypipeline.run(stream,
                                  index_host=index_host,
                                  query_type=query_type,
                                  dry_run=dry_run,
                                  parallel=parallel)
    return queries
示例#8
0
def dataset_search(args):
    import sddeferredafter, sdrdataset, sdstream, sdfields, sdearlystreamutils

    sddeferredafter.add_default_parameter(args.stream,'limit',args.limit)

    lpcme=sdearlystreamutils.test_facet_value_early(args.stream,'local_path_format','custom') # lpcme means 'Local Path Custom Mode Enabled'
    fields_=sdfields.get_all_dataset_fields() if lpcme else sdfields.get_dataset_light_fields()
    sddeferredafter.add_forced_parameter(args.stream,'fields',fields_)

    datasets=sdrdataset.get_datasets(stream=args.stream,dry_run=args.dry_run)

    if not args.dry_run:
        if len(datasets)==0:
            print_stderr('Dataset not found')
        else:
            if args.replica:
                sdrdataset.print_replica_list(datasets)
            else:
                sdrdataset.print_list(datasets)
示例#9
0
def variable_search(args):
    import sddeferredafter, sdrdataset, sdrvariable, sdfields, sdearlystreamutils

    sddeferredafter.add_default_parameter(args.stream, 'limit',
                                          args.limit)  # TAGJ43JK3J43

    lpcme = sdearlystreamutils.test_facet_value_early(
        args.stream, 'local_path_format',
        'custom')  # lpcme means 'Local Path Custom Mode Enabled'
    fields_ = sdfields.get_all_variable_fields(
    ) if lpcme else sdfields.get_variable_light_fields()
    sddeferredafter.add_forced_parameter(args.stream, 'fields', fields_)

    datasets = sdrdataset.get_datasets(stream=args.stream,
                                       dry_run=args.dry_run)

    if len(datasets) == 0:
        print "Variable not found"
    else:
        sdrvariable.print_list(datasets, args.limit)  # TAGJ43JK3J43
示例#10
0
def dump_ESGF(parameter=None,
              selection_file=None,
              fields=None,
              dry_run=False,
              playback=None,
              record=None,
              no_default=True,
              type_='Dataset'):
    """This func dumps fields for all ESGF matching files/datasets.

    Initially designed to batch update attribute in Synda database
    (e.g. when a new attribute is decided to be stored in Synda,
    all already downloaded files metadata must be updated).
    """
    stream = sdstreamutils.get_stream(parameter=parameter,
                                      selection_file=selection_file,
                                      no_default=no_default)

    sddeferredafter.add_forced_parameter(stream, 'replica', 'false')

    sddeferredafter.add_forced_parameter(stream, 'type', type_)

    assert fields is not None
    sddeferredafter.add_forced_parameter(stream, 'fields', fields)

    metadata = sdsearch.run(stream=stream,
                            post_pipeline_mode=None,
                            dry_run=dry_run,
                            playback=playback,
                            record=record)
    return metadata.get_files()
示例#11
0
def search(args):

    if args.replica:
        import sdstream, sddeferredafter
        sdstream.set_scalar(args.stream,'keep_replica','true')
        sddeferredafter.add_forced_parameter(args.stream,'nearest','false')

    if args.type_==sdconst.SA_TYPE_FILE:
        file_search(args)
    elif args.type_==sdconst.SA_TYPE_AGGREGATION:
        move_to_dataset_printing_routine=syndautils.is_one_variable_per_dataset_project(args) # HACK
        if move_to_dataset_printing_routine:
            # one var exist per dataset for this project

            dataset_search(args)
        else:
            # many var exist per dataset for this project

            variable_search(args)

    elif args.type_==sdconst.SA_TYPE_DATASET:
        dataset_search(args)
示例#12
0
def dataset_search(args):
    import sddeferredafter, sdrdataset, sdstream, sdfields, sdearlystreamutils

    sddeferredafter.add_default_parameter(args.stream, 'limit', args.limit)

    lpcme = sdearlystreamutils.test_facet_value_early(
        args.stream, 'local_path_format',
        'custom')  # lpcme means 'Local Path Custom Mode Enabled'
    fields_ = sdfields.get_all_dataset_fields(
    ) if lpcme else sdfields.get_dataset_light_fields()
    sddeferredafter.add_forced_parameter(args.stream, 'fields', fields_)

    datasets = sdrdataset.get_datasets(stream=args.stream,
                                       dry_run=args.dry_run)

    if not args.dry_run:
        if len(datasets) == 0:
            print_stderr('Dataset not found')
        else:
            if args.replica:
                sdrdataset.print_replica_list(datasets)
            else:
                sdrdataset.print_list(datasets)
示例#13
0
文件: sddump.py 项目: Prodiguer/synda
def dump_ESGF(parameter=None,selection_file=None,fields=None,dry_run=False,playback=None,record=None,no_default=True,type_='Dataset'):
    """This func dumps fields for all ESGF matching files/datasets.

    Initially designed to batch update attribute in Synda database
    (e.g. when a new attribute is decided to be stored in Synda,
    all already downloaded files metadata must be updated).
    """
    stream=sdstreamutils.get_stream(parameter=parameter,selection_file=selection_file,no_default=no_default)

    sddeferredafter.add_forced_parameter(stream,'replica','false')

    sddeferredafter.add_forced_parameter(stream,'type',type_)

    assert fields is not None
    sddeferredafter.add_forced_parameter(stream,'fields',fields)

    metadata=sdsearch.run(stream=stream,post_pipeline_mode=None,dry_run=dry_run,playback=playback,record=record)
    return metadata.get_files()
示例#14
0
def get(args):
    import sdlogon, sdrfile, sddeferredafter, sddirectdownload, syndautils, humanize, sdconfig, os, sdconst, sdearlystreamutils

    # hack
    # see TAG43534FSFS
    if args.quiet:
        args.verbosity=0

    if args.verify_checksum and args.network_bandwidth_test:
        print_stderr("'verify_checksum' option cannot be set when 'network_bandwidth_test' option is set.")
        return 1

    stream=syndautils.get_stream(subcommand=args.subcommand,parameter=args.parameter,selection_file=args.selection_file)


    if args.openid and args.password:
        # use credential from CLI

        oid=args.openid
        pwd=args.password
    else:
        # use credential from file

        if sdconfig.is_openid_set():
            oid=sdconfig.openid
            pwd=sdconfig.password
        else:
            print_stderr('Error: OpenID not set in configuration file (%s).'%sdconfig.credential_file)   

            return 1

    # retrieve certificate
    sdlogon.renew_certificate(oid,pwd,force_renew_certificate=False)


    http_client=sdconst.HTTP_CLIENT_URLLIB if args.urllib2 else sdconst.HTTP_CLIENT_WGET

    # local_path
    #
    # 'synda get' subcommand currently force local_path to the following construct:
    # '<dest_folder>/<filename>' (i.e. you can't use DRS tree in-between). This may
    # change in the future.
    #
    if args.dest_folder is None:
        local_path_prefix=os.getcwd() # current working directory
    else:
        local_path_prefix=args.dest_folder

    # BEWARE
    #
    # when set in CLI parameter, url is usually an ESGF facet, and as so should
    # be sent to the search-api as other facets
    # BUT
    # we want a special behaviour here (i.e. with 'synda get' command) with url:
    # if url is set by user, we DON'T call search-api operator. Instead, we
    # download the url directly.

    urls=sdearlystreamutils.get_facet_values_early(stream,'url')
    if len(urls)==0:
        # no url in stream: switch to search-api operator mode

        sddeferredafter.add_default_parameter(stream,'limit',5)
        sddeferredafter.add_forced_parameter(stream,'local_path_format','notree')

        files=sdrfile.get_files(stream=stream,post_pipeline_mode='file',dry_run=args.dry_run) # yes: this is the second time we run sdinference filter, but it doesn't hurt as sdinference is idempotent

        if not args.dry_run:
            if len(files)>0:

                # compute metric
                total_size=sum(int(f['size']) for f in files)
                total_size=humanize.naturalsize(total_size,gnu=False)

                print_stderr('%i file(s) will be downloaded for a total size of %s.'%(len(files),total_size))

                status=sddirectdownload.run(files,
                                            args.timeout,
                                            args.force,
                                            http_client,
                                            local_path_prefix,
                                            verify_checksum=args.verify_checksum,
                                            network_bandwidth_test=args.network_bandwidth_test,
                                            debug=True,
                                            verbosity=args.verbosity,
                                            buffered=False,
                                            hpss=args.hpss)

                if status!=0:
                    return 1

            else:
                print_stderr("File not found")
                return 1
        else:
            for f in files:
                size=humanize.naturalsize(f['size'],gnu=False)
                print '%-12s %s'%(size,f['filename'])

    elif len(urls)>0:
        # url(s) found in stream: search-api operator not needed (download url directly)

        # TAGDSFDF432F
        if args.verify_checksum:
            print_stderr("To perform checksum verification, ESGF file identifier (e.g. title, id, tracking id..)  must be used instead of file url.")
            return 1

        # TODO: to improve genericity, maybe merge this block into the previous one (i.e. url CAN be used as a search key in the search-api (but not irods url))

        files=[]
        for url in urls:

            filename=os.path.basename(url)
            local_path=filename

            f=dict(local_path=local_path,url=url)

            files.append(f)
            
        status=sddirectdownload.run(files,
                                    args.timeout,
                                    args.force,
                                    http_client,
                                    local_path_prefix,
                                    verify_checksum=args.verify_checksum, # see above at TAGDSFDF432F
                                    network_bandwidth_test=args.network_bandwidth_test,
                                    debug=True,
                                    verbosity=args.verbosity,
                                    buffered=False,
                                    hpss=args.hpss)

        if status!=0:
            return 1

    else:
        assert False

    return 0
示例#15
0
def get(args):
    import sdlogon, sdrfile, sddeferredafter, sddirectdownload, syndautils, humanize, sdconfig, os, sdconst, sdearlystreamutils

    # hack
    # see TAG43534FSFS
    if args.quiet:
        args.verbosity = 0

    if args.verify_checksum and args.network_bandwidth_test:
        print_stderr(
            "'verify_checksum' option cannot be set when 'network_bandwidth_test' option is set."
        )
        return 1

    stream = syndautils.get_stream(subcommand=args.subcommand,
                                   parameter=args.parameter,
                                   selection_file=args.selection_file)

    if args.openid and args.password:
        # use credential from CLI

        oid = args.openid
        pwd = args.password
    else:
        # use credential from file

        if sdconfig.is_openid_set():
            oid = sdconfig.openid
            pwd = sdconfig.password
        else:
            print_stderr('Error: OpenID not set in configuration file (%s).' %
                         sdconfig.credential_file)

            return 1

    # retrieve certificate
    sdlogon.renew_certificate(oid, pwd, force_renew_certificate=False)

    http_client = sdconst.HTTP_CLIENT_URLLIB if args.urllib2 else sdconst.HTTP_CLIENT_WGET

    # local_path
    #
    # 'synda get' subcommand currently force local_path to the following construct:
    # '<dest_folder>/<filename>' (i.e. you can't use DRS tree in-between). This may
    # change in the future.
    #
    if args.dest_folder is None:
        local_path_prefix = os.getcwd()  # current working directory
    else:
        local_path_prefix = args.dest_folder

    # BEWARE
    #
    # when set in CLI parameter, url is usually an ESGF facet, and as so should
    # be sent to the search-api as other facets
    # BUT
    # we want a special behaviour here (i.e. with 'synda get' command) with url:
    # if url is set by user, we DON'T call search-api operator. Instead, we
    # download the url directly.

    urls = sdearlystreamutils.get_facet_values_early(stream, 'url')
    if len(urls) == 0:
        # no url in stream: switch to search-api operator mode

        sddeferredafter.add_default_parameter(stream, 'limit', 5)
        sddeferredafter.add_forced_parameter(stream, 'local_path_format',
                                             'notree')

        files = sdrfile.get_files(
            stream=stream, post_pipeline_mode='file', dry_run=args.dry_run
        )  # yes: this is the second time we run sdinference filter, but it doesn't hurt as sdinference is idempotent

        if not args.dry_run:
            if len(files) > 0:

                # compute metric
                total_size = sum(int(f['size']) for f in files)
                total_size = humanize.naturalsize(total_size, gnu=False)

                print_stderr(
                    '%i file(s) will be downloaded for a total size of %s.' %
                    (len(files), total_size))

                status = sddirectdownload.run(
                    files,
                    args.timeout,
                    args.force,
                    http_client,
                    local_path_prefix,
                    verify_checksum=args.verify_checksum,
                    network_bandwidth_test=args.network_bandwidth_test,
                    debug=True,
                    verbosity=args.verbosity,
                    buffered=False,
                    hpss=args.hpss)

                if status != 0:
                    return 1

            else:
                print_stderr("File not found")
                return 1
        else:
            for f in files:
                size = humanize.naturalsize(f['size'], gnu=False)
                print '%-12s %s' % (size, f['filename'])

    elif len(urls) > 0:
        # url(s) found in stream: search-api operator not needed (download url directly)

        # TAGDSFDF432F
        if args.verify_checksum:
            print_stderr(
                "To perform checksum verification, ESGF file identifier (e.g. title, id, tracking id..)  must be used instead of file url."
            )
            return 1

        # TODO: to improve genericity, maybe merge this block into the previous one (i.e. url CAN be used as a search key in the search-api (but not irods url))

        files = []
        for url in urls:

            filename = os.path.basename(url)
            local_path = filename

            f = dict(local_path=local_path, url=url)

            files.append(f)

        status = sddirectdownload.run(
            files,
            args.timeout,
            args.force,
            http_client,
            local_path_prefix,
            verify_checksum=args.verify_checksum,  # see above at TAGDSFDF432F
            network_bandwidth_test=args.network_bandwidth_test,
            debug=True,
            verbosity=args.verbosity,
            buffered=False,
            hpss=args.hpss)

        if status != 0:
            return 1

    else:
        assert False

    return 0
示例#16
0
        raise SDException("SDQSEARC-002","Number of returned files reach maximum limit")

    result.add_attached_parameters(query.get('attached_parameters',{}))
    return result

if __name__ == '__main__':
    prog=os.path.basename(__file__)
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""examples of use\n%s"""%sdi18n.m0002(prog))

    parser.add_argument('parameter',nargs='+',help=sdi18n.m0001)

    parser.add_argument('-c','--count',action='store_true',help='Count how many found files')
    parser.add_argument('-f','--format',choices=['raw','line','indent'],default='indent')
    parser.add_argument('-i','--index_host')
    parser.add_argument('-m','--post_pipeline_mode',default='file')
    parser.add_argument('-y','--dry_run',action='store_true')
    parser.add_argument('-1','--print_only_one_item',action='store_true')

    args = parser.parse_args()

    if args.count:
        # in this mode, we don't want to return any files, so we force limit to 0 just in case this option has been set by the user
        sddeferredafter.add_forced_parameter(args.parameter,'limit','0')

    result=run(parameter=args.parameter,index_host=args.index_host,post_pipeline_mode=args.post_pipeline_mode,dry_run=args.dry_run)

    if args.count:
        print "%i"%result.num_found
    else:
        sdprint.print_format(result.files,args.format,args.print_only_one_item)