Пример #1
0
def train(args, params):
    '''
    Train model
    '''
    model = build_model(params)
    X_train, Y1_train, Y2_train, X_test, Y1_test, Y2_test = load_dataset(
        args.dataset_name, args.num_train)

    print('Fitting model...')
    results = model.fit(
        X_train, [Y1_train, Y2_train],
        epochs=args.epochs,
        verbose=1,
        validation_data=(X_test, [Y1_test, Y2_test]),
        callbacks=[SendMetrics(),
                   TensorBoard(log_dir=TENSORBOARD_DIR)])

    _, _, _, cat_acc, subcat_acc = model.evaluate(X_test, [Y1_test, Y2_test],
                                                  verbose=0)
    LOG.debug('Final result is: %d', subcat_acc)
    nni.report_final_result(subcat_acc)
    print('Final result is: %d', subcat_acc)

    model_id = nni.get_sequence_id()
    # serialize model to JSON
    model_json = model.to_json()
    with open("model-{}.json".format(model_id), "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("model-{}.h5".format(model_id))
    print("Saved model to disk")
Пример #2
0
def train(params):
    '''
    Train model
    '''
    x_train, y_train, x_test, y_test = load_mnist_data()
    model = create_mnist_model(params)

    epochs = 10
    model.fit(x_train,
              y_train,
              batch_size=params['batch_size'],
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test),
              callbacks=[SendMetrics()])

    _, acc = model.evaluate(x_test, y_test, verbose=0)
    logger.debug('Final result is: %d', acc)
    nni.report_final_result(acc)

    model_id = nni.get_sequence_id()
    model_json = model.to_json()
    with open('./ckpt/model-{}.json'.format(model_id), 'w') as json_file:
        json_file.write(model_json)
    model.save_weights('./ckpt/model-{}.h5'.format(model_id))
Пример #3
0
def setup_experiment(
        runtime_config: RuntimeConfig,
        enable_nni: bool = False,
        logger_blacklist: Optional[List[str]] = None) -> RuntimeConfig:
    if logger_blacklist is None:
        logger_blacklist = ['numba']
    setup_distributed_training()
    seed_everything(runtime_config.seed)

    if runtime_config.output_dir is None:
        if 'PT_OUTPUT_DIR' in os.environ:
            runtime_config.output_dir = Path(os.environ['PT_OUTPUT_DIR'])
        else:
            runtime_config.output_dir = Path('./outputs')

    if enable_nni:
        import nni
        if nni.get_experiment_id() != 'STANDALONE':
            runtime_config.output_dir = runtime_config.output_dir / nni.get_experiment_id(
            ) / str(nni.get_sequence_id())

    runtime_config.output_dir.mkdir(exist_ok=True)

    if runtime_config.checkpoint_dir is None:
        runtime_config.checkpoint_dir = runtime_config.output_dir / 'checkpoints'
        runtime_config.checkpoint_dir.mkdir(exist_ok=True)

    if runtime_config.tb_log_dir is None:
        runtime_config.tb_log_dir = runtime_config.output_dir / 'tb'
        runtime_config.tb_log_dir.mkdir(exist_ok=True)

    reset_logger()
    setup_logger(
        '',
        log_file=(runtime_config.output_dir / 'stdout.log').as_posix(),
        log_level=logging.DEBUG if runtime_config.debug else logging.INFO)
    for logger in logger_blacklist:
        mute_logger(logger)

    global _runtime_config
    _runtime_config = runtime_config

    return runtime_config
Пример #4
0
    parser.add_argument('--BatchSize', default='32', type=int)
    # device parameter
    parser.add_argument('--Device', default='0', type=str)
    # version control
    parser.add_argument('--CodeVersion', default='V')
    # Merge parameter
    parser.add_argument('--MergeIndex', default=6, type=int)
    parser.add_argument('--MergeWay', default='sum', type=str)
    return parser


parser = stmeta_param_parser()
args = vars(parser.parse_args())

nni_params = nni.get_next_parameter()
nni_sid = nni.get_sequence_id()
if nni_params:
    args.update(nni_params)
    args['CodeVersion'] += str(nni_sid)

model_dir = os.path.join('model_dir', args['City'])
code_version = 'ST_MMGCN_{}_K{}L{}_{}_F{}'.format(
    ''.join([e[0] for e in args['Graph'].split('-')]), args['K'], args['L'],
    args['CodeVersion'],
    int(args['MergeIndex']) * 5)

deviceIDs = GPUtil.getAvailable(order='memory',
                                limit=2,
                                maxLoad=1,
                                maxMemory=0.7,
                                includeNan=False,
Пример #5
0
    parser.add_argument('--Device', default='0,1', type=str)
    # version control
    parser.add_argument('--Group', default='Xian')
    parser.add_argument('--CodeVersion', default='ParamTuner')
    return parser


parser = cpt_stmeta_param_parser()
args = vars(parser.parse_args())

args.update(nni.get_next_parameter())

model_dir = os.path.join(model_dir_path, args['Group'])
code_version = 'CPT_STMeta_{}_K{}L{}_{}'.format(
    ''.join([e[0] for e in args['Graph'].split('-')]), args['K'], args['L'],
    args['CodeVersion'] + nni.get_sequence_id())

# Config data loader
data_loader = NodeTrafficLoader(
    dataset=args['Dataset'],
    city=args['City'],
    data_range=args['DataRange'],
    train_data_length=args['TrainDays'],
    test_ratio=0.1,
    C_T=int(args['CT']),
    P_T=int(args['PT']),
    T_T=int(args['TT']),
    TI=args['TI'],
    TD=args['TD'],
    TC=args['TC'],
    normalize=True if args['Normalize'] == 'True' else False,
Пример #6
0
 def test_get_sequence_id(self):
     self.assertEqual(nni.get_sequence_id(), 0)
Пример #7
0
                f.seek(0)
                lines = f.readlines()
            lock.release()
            if lines:
                break

        if len(lines) > args.slave:
            x = random.randint(1, args.slave)
            json_and_id_str = lines[-x].replace("\n", "")
        else:
            json_and_id_str = lines[-1].replace("\n", "")

        with open(
                experiment_path + "/trials/" + str(nni.get_trial_id()) +
                "/output.log", "a+") as f:
            f.write("sequence_id=" + str(nni.get_sequence_id()) + "\n")
        json_and_id = dict((l.split('=') for l in json_and_id_str.split('+')))
        if str(json_and_id['history']) == "True":
            socket.send_pyobj({
                "type": "generated_parameter",
                "parameters": json_and_id['json_out'],
                "father_id": int(json_and_id['father_id']),
                "parameter_id": int(nni.get_sequence_id())
            })
            message = socket.recv_pyobj()
        elif str(json_and_id['history']) == "False":
            socket.send_pyobj({"type": "generated_parameter"})
            message = socket.recv_pyobj()
        RCV_CONFIG = json_and_id['json_out']

        start_time = time.time()
Пример #8
0
    def _start_mlflow_run(self, run_params: Dict[str, Any],
                          pipeline: Pipeline):
        """ Log basic informations to MLFlow about pipeline if this pipeline is tagged with 'train' (creates a new MLFLow experiment and/or run named after training pipeline if it doesn't exists yet)
        NOTE: If NNI is in dry run mode (mode used to generate NNI Classic NAS search space JSON file from a model which contains NNI NAS Mutables `LayerChoice` and/or `InputChoice`) we avoid creating any new MLFlow experiment/run nor logging anything else to mlflow during this dry run
        """
        node_tags = functools.reduce(set.union,
                                     [n.tags for n in pipeline.nodes])
        if not deepcv.meta.nni_tools.is_nni_gen_search_space_mode() and (
                'train' in run_params['tags'] or 'train' in node_tags):
            if mlflow.active_run() is None:
                # Create MLFlow run in an experiment named after pipeline involved in training and log various pipeline/datasets informations to mlflow. If we are running an NNI hp/nas search, mlflow experiment and run will be named after NNI experiment and trial ids for better consitency.
                # TODO: find another way to name experiment as pipeline name is only available when running `kedro run --pipeline=<pipeline_name>` (e.g. special tag to node after which experiment is named)

                if not deepcv.meta.nni_tools.is_nni_run_standalone(
                ):  # 'STANDALONE' is NNI default experiment ID if python process haven't been started by NNI
                    nni_experiment = nni.get_experiment_id()
                    mlflow.set_experiment(nni_experiment)
                    mlflow.start_run(run_name=nni.get_trial_id())
                    # Flag indicating whether we are using NNI HP or Classic NAS API (Hyperparameter and/or Classic Neural Architecture search using NNI)
                    mlflow.set_tag('nni_standalone_mode', False)
                    mlflow.set_tag('nni_experiment_id', nni_experiment)
                    mlflow.set_tag('nni_trial_id', nni.get_trial_id())
                    mlflow.set_tag('nni_sequence_id', nni.get_sequence_id())
                else:
                    pipeline_name = run_params['pipeline_name'].lower(
                    ) if run_params['pipeline_name'] else 'default'
                    mlflow.set_experiment(
                        f'{self.project_ctx.project_name.lower()}_{pipeline_name}'
                    )
                    mlflow.start_run(
                        run_name=
                        f'{pipeline_name.lower()}_run_{run_params["run_id"]}')
                    mlflow.set_tag('nni_standalone_mode', True)

            # Log basic informations about Kedro training pipeline to mlflow
            mlflow.set_tags({
                f'kedro_node_tag_{i}': tag
                for i, tag in enumerate(node_tags)
            })
            mlflow.log_params({n: v for n, v in run_params.items() if v})
            mlflow.log_param('pipeline.json', pipeline.to_json())
            mlflow.log_param('pipeline.describe', pipeline.describe())
            mlflow.log_param('pipeline.pipeline_datasets',
                             pipeline.data_sets())
            """ The following code creates special mlflow tags about current repository infos, which is not done by mlflow when starting an MLFlow run from code instead of from `mlflow run` command
            Code inspired from [`mlflow.projects._create_run`](https://www.mlflow.org/docs/latest/_modules/mlflow/projects.html) which doesn't seems to be called by `mlflow.start_run`
            """
            tags = {
                mlflow.utils.mlflow_tags.MLFLOW_SOURCE_NAME:
                self.project_ctx.package_name,
                mlflow.utils.mlflow_tags.MLFLOW_SOURCE_TYPE:
                mlflow.entities.SourceType.to_string(
                    mlflow.entities.SourceType.PROJECT),
                mlflow.utils.mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT:
                inspect.getsourcefile(type(self.project_ctx))
            }
            try:
                repo = git.Repo(self.project_ctx.project_path,
                                search_parent_directories=True)
                git_repo_url = repo.remote(
                ).url if 'origin' in repo.remotes else (
                    repo.remotes[0].url if len(repo.remotes) > 0 else '')
                git_repo_url = re.sub(
                    r'git@([.\w]+):', r'https://\1/', git_repo_url).rstrip(
                        '.git')  # Convert SSH git URL to http URL
                mlflow.log_param(
                    'commit_url',
                    git_repo_url + f'/commit/{repo.head.commit.hexsha}/')

                # We also set MLFLOW_SOURCE_NAME to repo URL so that MLFlow web UI is able to parse it and render commit and source hyperlinks (MLFLow only supports github URLs for now)
                tags.update({
                    mlflow.utils.mlflow_tags.MLFLOW_SOURCE_NAME:
                    git_repo_url
                    if git_repo_url else self.project_ctx.project_name,
                    mlflow.utils.mlflow_tags.MLFLOW_GIT_BRANCH:
                    repo.active_branch.name,
                    mlflow.utils.mlflow_tags.MLFLOW_GIT_REPO_URL:
                    git_repo_url,
                    mlflow.utils.mlflow_tags.MLFLOW_GIT_COMMIT:
                    repo.head.commit.hexsha
                })

                # Change mlflow user to be git repository user instead of system user (if any git user is specified)
                git_config_reader = repo.config_reader()
                git_config_reader.read()
                user = git_config_reader.get_value('user',
                                                   'name',
                                                   default=None)
                email = git_config_reader.get_value('user',
                                                    'email',
                                                    default=None)
                if user or email:
                    tags[mlflow.utils.mlflow_tags.MLFLOW_USER] = (
                        str(user) + (f' <{email}>' if email else '')
                    ) if user else str(email)
            except (ImportError, OSError, ValueError, IOError, KeyError,
                    git.GitError, configparser.Error) as e:
                logging.warning(
                    f'Failed to import Git or to get repository informations. Error: {e}'
                )

            mlflow.set_tags(tags)
Пример #9
0
     f1 = open(experiment_path + "/graph.txt", "a+")
     f1.seek(0)
     lines = f1.readlines()
     f1.close()
     lock.release()
     if lines:
         break
 json_and_id_str = lines[-1].replace("\n", "")  #逆序读取并记录,数据组成字典
 json_and_id = dict(
     (l.split('=') for l in json_and_id_str.split('+')))
 if str(json_and_id['history']) == "True":
     socket.send_pyobj({
         "type": "generated_parameter",
         "parameters": json_and_id['json_out'],
         "father_id": int(json_and_id['father_id']),
         "parameter_id": int(nni.get_sequence_id())
     })
     f11 = open('/root/log', 'a+')
     f11.write('histtory is True so \nsend parameters')
     f11.close()
     message = socket.recv_pyobj()
     f11 = open('/root/log', 'a+')
     f11.write('recv message: ' + str(message) + '\n')
     f11.close()
 elif str(json_and_id['history']) == "False":
     socket.send_pyobj({"type": "generated_parameter"})
     f11 = open('/root/log', 'a+')
     f11.write('history is false so \nsend generated_parameter\n')
     f11.close()
     message = socket.recv_pyobj()
     f11 = open('/root/log', 'a+')
Пример #10
0
def is_nni_run_standalone() -> bool:
    """ Simple helper function which returns whether NNI is in standalone trial run mode """
    return nni.get_experiment_id() == r'STANDALONE' and nni.get_trial_id() == r'STANDALONE' and nni.get_sequence_id() == 0
Пример #11
0
                        help='set tolerance of termination criterion')

    parser.add_argument('--verbose',
                        type=int,
                        default=0,
                        help='train verbosity level (0 is less verbosity)')
    parser.add_argument('--debug',
                        action="store_true",
                        help="info debug information")
    parser.add_argument('--log-to-file',
                        type=bool,
                        default=False,
                        help="Save log to file")
    args = vars(parser.parse_args())  # return as dictionary

    args['id'] = nni.get_sequence_id()
    log_level = logging.DEBUG if args['debug'] else logging.INFO
    if args['log_to_file']:
        log_file = os.path.join(args['output'],
                                'execution-{}.log'.format(args['id']))
        logging.basicConfig(filename=log_file, level=log_level)
    else:
        logging.basicConfig(level=log_level)

    RECEIVED_PARAMS = nni.get_next_parameter()
    has_nni = True
    if RECEIVED_PARAMS is None:
        """this only occurs if you call this python module from the command line with using nnictl
        """
        RECEIVED_PARAMS = default_params()
        has_nni = False