Пример #1
0
def test_source_processing(api, args, resume,
                           name=None, csv_properties=None,
                           session_file=None, path=None, log=None):
    """Creating or retrieving a test data source from input arguments

    """
    test_source = None
    fields = None
    if csv_properties is None:
        csv_properties = {}
    if args.test_set and args.remote:
        # If resuming, try to extract args.source form log files
        if resume:
            message = u.dated("Test source not found. Resuming.\n")
            resume, args.test_source = c.checkpoint(
                c.is_source_created, path, suffix="_test", debug=args.debug,
                message=message, log_file=session_file, console=args.verbosity)

        if not resume:
            source_args = r.set_source_args(args, name=name,
                                            data_set_header=args.test_header)
            test_source = r.create_source(args.test_set, source_args, args,
                                          api, path, session_file, log,
                                          source_type="test")

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.test_source:
        test_source = bigml.api.get_source_id(args.test_source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if test_source:
        test_source = r.get_source(test_source, api, args.verbosity,
                                   session_file)
        if 'source_parser' in test_source['object']:
            source_parser = test_source['object']['source_parser']
            if 'missing_tokens' in source_parser:
                csv_properties['missing_tokens'] = (
                    source_parser['missing_tokens'])
            if 'locale' in source_parser:
                csv_properties['data_locale'] = source_parser['locale']
                if (args.user_locale is not None and
                        bigml_locale(args.user_locale) ==
                        source_parser['locale']):
                    args.user_locale = None

        fields = Fields(test_source['object']['fields'], **csv_properties)

        if (args.field_attributes_ or args.types_ or args.user_locale
                or args.json_args.get('source')):
            # avoid updating project_id in source
            project_id, args.project_id = args.project_id, None
            test_source_args = r.set_source_args(args, fields=fields)
            test_source = r.update_source(test_source, test_source_args, args,
                                          api, session_file)
            args.project_id = project_id
            fields = Fields(test_source['object']['fields'], **csv_properties)

    return test_source, resume, csv_properties, fields
Пример #2
0
def source_processing(
    api, args, resume, csv_properties=None, multi_label_data=None, session_file=None, path=None, log=None
):
    """Creating or retrieving a data source from input arguments

    """
    source = None
    fields = None
    if args.training_set or (hasattr(args, "evaluate") and args.evaluate and args.test_set):
        # If resuming, try to extract args.source form log files

        if resume:
            message = u.dated("Source not found. Resuming.\n")
            resume, args.source = c.checkpoint(
                c.is_source_created,
                path,
                debug=args.debug,
                message=message,
                log_file=session_file,
                console=args.verbosity,
            )

    # If neither a previous source, dataset or model are provided.
    # we create a new one. Also if --evaluate and test data are provided
    # we create a new dataset to test with.
    data_set, data_set_header = r.data_to_source(args)
    if data_set is not None:
        source_args = r.set_source_args(args, multi_label_data=multi_label_data, data_set_header=data_set_header)
        source = r.create_source(data_set, source_args, args, api, path, session_file, log)

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.source:
        source = bigml.api.get_source_id(args.source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if source:
        source = r.get_source(source, api, args.verbosity, session_file)
        if "source_parser" in source["object"]:
            source_parser = source["object"]["source_parser"]
            if "missing_tokens" in source_parser:
                csv_properties["missing_tokens"] = source_parser["missing_tokens"]
            if "locale" in source_parser:
                csv_properties["data_locale"] = source_parser["locale"]
                # No changes if user locale is the one in the source.
                if args.user_locale is not None and bigml_locale(args.user_locale) == source_parser["locale"]:
                    args.user_locale = None
        fields = Fields(source["object"]["fields"], **csv_properties)

        if args.field_attributes_ or args.types_ or args.user_locale or args.json_args.get("source"):
            source_args = r.set_source_args(args, fields=fields)
            source = r.update_source(source, source_args, args, api, session_file)
            fields = Fields(source["object"]["fields"], **csv_properties)

    return source, resume, csv_properties, fields
Пример #3
0
def test_source_processing(api,
                           args,
                           resume,
                           name=None,
                           csv_properties=None,
                           session_file=None,
                           path=None,
                           log=None):
    """Creating or retrieving a test data source from input arguments

    """
    test_source = None
    fields = None
    if csv_properties is None:
        csv_properties = {}
    if args.test_set and args.remote:
        # If resuming, try to extract args.source form log files
        if resume:
            message = u.dated("Test source not found. Resuming.\n")
            resume, args.test_source = c.checkpoint(c.is_source_created,
                                                    path,
                                                    suffix="_test",
                                                    debug=args.debug,
                                                    message=message,
                                                    log_file=session_file,
                                                    console=args.verbosity)

        if not resume:
            source_args = r.set_source_args(args,
                                            name=name,
                                            data_set_header=args.test_header)
            test_source = r.create_source(args.test_set,
                                          source_args,
                                          args,
                                          api,
                                          path,
                                          session_file,
                                          log,
                                          source_type="test")

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.test_source:
        test_source = bigml.api.get_source_id(args.test_source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if test_source:
        test_source = r.get_source(test_source, api, args.verbosity,
                                   session_file)
        if 'source_parser' in test_source['object']:
            source_parser = test_source['object']['source_parser']
            if 'missing_tokens' in source_parser:
                csv_properties['missing_tokens'] = (
                    source_parser['missing_tokens'])
            if 'locale' in source_parser:
                csv_properties['data_locale'] = source_parser['locale']
                if (args.user_locale is not None and bigml_locale(
                        args.user_locale) == source_parser['locale']):
                    args.user_locale = None

        fields = Fields(test_source['object']['fields'], **csv_properties)

        if (args.field_attributes_ or args.types_ or args.user_locale
                or args.json_args.get('source')):
            # avoid updating project_id in source
            project_id, args.project_id = args.project_id, None
            test_source_args = r.set_source_args(args, fields=fields)
            test_source = r.update_source(test_source, source_args, args, api,
                                          session_file)
            args.project_id = project_id
            fields = Fields(source['object']['fields'], **csv_properties)

    return test_source, resume, csv_properties, fields
Пример #4
0
def source_processing(api,
                      args,
                      resume,
                      csv_properties=None,
                      multi_label_data=None,
                      session_file=None,
                      path=None,
                      log=None):
    """Creating or retrieving a data source from input arguments

    """
    source = None
    fields = None
    if (args.training_set or
        (hasattr(args, "evaluate") and args.evaluate and args.test_set)):
        # If resuming, try to extract args.source form log files

        if resume:
            message = u.dated("Source not found. Resuming.\n")
            resume, args.source = c.checkpoint(c.is_source_created,
                                               path,
                                               debug=args.debug,
                                               message=message,
                                               log_file=session_file,
                                               console=args.verbosity)

    # If neither a previous source, dataset or model are provided.
    # we create a new one. Also if --evaluate and test data are provided
    # we create a new dataset to test with.
    data_set, data_set_header = r.data_to_source(args)
    if data_set is not None:
        # Check if there's a created project for it
        args.project_id = pp.project_processing(api,
                                                args,
                                                resume,
                                                session_file=session_file,
                                                path=path,
                                                log=log)
        source_args = r.set_source_args(args,
                                        multi_label_data=multi_label_data,
                                        data_set_header=data_set_header)
        source = r.create_source(data_set, source_args, args, api, path,
                                 session_file, log)

    # If a source is provided either through the command line or in resume
    # steps, we use it.
    elif args.source:
        source = bigml.api.get_source_id(args.source)

    # If we already have source, we check that is finished, extract the
    # fields, and update them if needed.
    if source:
        source = r.get_source(source, api, args.verbosity, session_file)
        if 'source_parser' in source['object']:
            source_parser = source['object']['source_parser']
            if 'missing_tokens' in source_parser:
                csv_properties['missing_tokens'] = (
                    source_parser['missing_tokens'])
            if 'locale' in source_parser:
                csv_properties['data_locale'] = source_parser['locale']
                # No changes if user locale is the one in the source.
                if (args.user_locale is not None and bigml_locale(
                        args.user_locale) == source_parser['locale']):
                    args.user_locale = None
        fields = Fields(source['object']['fields'], **csv_properties)

        if (args.field_attributes_ or args.types_ or args.user_locale
                or args.json_args.get('source')):
            # avoid updating project_id in source
            project_id, args.project_id = args.project_id, None
            source_args = r.set_source_args(args, fields=fields)
            source = r.update_source(source, source_args, args, api,
                                     session_file)
            args.project_id = project_id
            fields = Fields(source['object']['fields'], **csv_properties)

    return source, resume, csv_properties, fields