Exemplo n.º 1
0
def train(config, trainfile, testfile):
    """Entry for trainig

  Args:
    config: (configparser) All the hyperparameters for training
  """
    keys = get_norm_keys(config['input'].get('conf'))
    train_dirs = trainfile.split(',')
    train_files = [[] for _ in range(5)]
    for train_dir in train_dirs:
        for f in os.listdir(train_dir):
            if f != "_SUCCESS":
                ind = int(int(f.split('-')[-1]) / 40)
                train_files[ind].append(os.path.join(train_dir, f))
    #train_files = [os.path.join(train_dir, f) for train_dir in train_dirs for f in os.listdir(train_dir) if f != "_SUCCESS"]
    #train_files = tf.random_shuffle(tf.train.match_filenames_once([os.path.join(train_dir, f) for f in os.listdir(train_dir) if f != "_SUCCESS"]))
    #train_files = tf.random_shuffle(tf.train.match_filenames_once(['%s/%s/part-r-*' % (data_path, dt) for dt in date_list]))
    #logging.info('train directory: {}'.format(train_dirs))
    #logging.info('train files: {}'.format(reprlib.repr(train_files)))

    dev_dirs = testfile.split(',')
    dev_files = [
        os.path.join(dev_dir, f) for dev_dir in dev_dirs
        for f in os.listdir(dev_dir) if f != "_SUCCESS"
    ]
    #logging.info('dev directory: {}'.format(dev_dirs))
    #logging.info('dev files: {}'.format(reprlib.repr(dev_files)))
    #特征的配置文件 在input 这个section的spec这个key
    feature_config = configparser.ConfigParser()
    feature_config.read(config['input']['spec'])  #特征配置文件 有boundaries等信息
    columns, spec = FCGen.GetFeatureSpec(
        feature_config)  #按特征列对特征进行处理,不同类型处理会不一样,比如数值、embed等

    batch_size = int(config['train']['batch_size'])

    conf = tf.ConfigProto()
    conf.gpu_options.allow_growth = True

    os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    run_config = tf.estimator.RunConfig().replace(
        model_dir=config['train'].get('model_dir', 'model_dir'),
        save_checkpoints_secs=3600,
        session_config=conf)
    dynamic = config['train']['dynamic'] == 'true'
    print("dynamic:", dynamic)
    logging.info("Creating model...")
    # Define the model
    hidden_units = [int(n) for n in config['model']['hidden_units'].split(',')]
    learning_rate = float(config['model']['learning_rate'])
    ctr_reg = float(config['model'].get('ctr_reg', '1e-4'))
    cvr_reg = float(config['model'].get('cvr_reg', '1e-4'))
    ctcvr_loss_weight = float(config['model'].get('ctcvr_loss_weight', '1.0'))
    model = tf.estimator.Estimator(model_fn=esmm_model_fn,
                                   params={
                                       'cat_columns':
                                       columns['cat'],
                                       'val_columns':
                                       columns['val'],
                                       'dnn_columns':
                                       list(columns['dnn'].values()),
                                       'weight_columns':
                                       list(columns['weight'].values()),
                                       'column_to_field': {},
                                       'hidden_units':
                                       hidden_units,
                                       'learning_rate':
                                       learning_rate,
                                       'ctr_reg':
                                       ctr_reg,
                                       'cvr_reg':
                                       cvr_reg,
                                       'reg':
                                       1e-4,
                                       'ctcvr_loss_weight':
                                       ctcvr_loss_weight,
                                       'model':
                                       config['model']['model'],
                                       'embed_dim':
                                       int(config['model']['embedding_dim']),
                                       'dynamic':
                                       dynamic
                                   },
                                   config=run_config)
    # Train and evaluate
    max_steps = config['train'].get('max_step', '')
    if max_steps == '':
        max_steps = None
    else:
        max_steps = int(max_steps)
    #for variable_name in model.get_variable_names():
    #  print(variable_name)
    logging.info("training...")
    epochs = int(config['train'].get('epochs', '1'))
    #train_input_fn = lambda: input_fn(train_files, spec, True, batch_size, mt=True)
    eval_input_fn = lambda: input_fn(
        dev_files, spec, False, batch_size, mt=True)
    for i in range(epochs):
        logging.info("{}th training...".format(i + 1))
        for j in range(len(train_files)):
            model.train(input_fn=lambda: input_fn(
                train_files[j], spec, True, batch_size, mt=True),
                        steps=max_steps)

            results = model.evaluate(input_fn=eval_input_fn)

            logging.info("{}th test results...".format(i + j + 1))
            for key in sorted(results):
                print('%s: %s' % (key, results[key]))

    model.export_savedmodel(
        export_dir_base=config['train'].get('export_dir', 'export_dir'),
        serving_input_receiver_fn=lambda: input_receiver(spec),
        strip_default_attrs=True)
Exemplo n.º 2
0
def train(config, trainfile, testfile):
    """Entry for trainig

  Args:
    config: (configparser) All the hyperparameters for training
  """
    prefix = "/data/home/graywang/esmm/tfrecords/rt_mt"
    train_dirs = trainfile.split(',')
    cluster = "hdfs://ss-sng-dc-v2/stage/outface/SNG/g_sng_qqmusic_develop/g_sng_qqmusic_develop/timmili/gray_temp/"
    if config['train']['source'] == 'hdfs':
        train_files = [[] for _ in range(5)]
        for train_dir in train_dirs:
            for i in range(5):
                train_files[i].append(cluster + train_dir + "/part-r-00" +
                                      str(i) + "*")
    else:
        train_files = [[] for _ in range(4)]
        for train_dir in train_dirs:
            for f in os.listdir(prefix + "/" + train_dir):
                if len(os.listdir(prefix + "/" + train_dir)) > 250:
                    div = 125
                else:
                    div = 50
                if f != "_SUCCESS":
                    ind = int(int(f.split('-')[-1]) / div)
                    train_files[ind].append(os.path.join(prefix, train_dir, f))
    logging.info('train directory: {}'.format(train_dirs))
    logging.info('train files: {}'.format(reprlib.repr(train_files)))

    dev_dirs = testfile.split(',')
    dev_files = [
        os.path.join(prefix, dev_dir, f) for dev_dir in dev_dirs
        for f in os.listdir(prefix + "/" + dev_dir) if f != "_SUCCESS"
    ]
    logging.info('dev directory: {}'.format(dev_dirs))
    logging.info('dev files: {}'.format(reprlib.repr(dev_files)))
    #logging.info('dev directory: {}'.format(dev_dirs))
    #logging.info('dev files: {}'.format(reprlib.repr(dev_files)))
    #特征的配置文件 在input 这个section的spec这个key
    feature_config = configparser.ConfigParser()
    feature_config.read(config['input']['spec'])  #特征配置文件 有boundaries等信息
    columns, spec, dimension_config = FCGen.GetFeatureSpec(
        feature_config)  #按特征列对特征进行处理,不同类型处理会不一样,比如数值、embed等

    batch_size = int(config['train']['batch_size'])

    conf = tf.ConfigProto()
    conf.gpu_options.allow_growth = True
    dimension_config = {}
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    run_config = tf.estimator.RunConfig().replace(
        model_dir=config['train'].get('model_dir', 'model_dir'),
        save_checkpoints_secs=3600,
        session_config=conf)
    dynamic = config['train']['dynamic'] == 'true'
    warm_dir = config['train'].get('warm_dir', '')
    if len(warm_dir) > 1:
        ws = tf.estimator.WarmStartSettings(ckpt_to_initialize_from=warm_dir,
                                            vars_to_warm_start=".*")
    else:
        ws = None
    print("dynamic:", dynamic)
    logging.info("Creating model...")
    # Define the model
    hidden_units = [int(n) for n in config['model']['hidden_units'].split(',')]
    learning_rate = float(config['model']['learning_rate'])
    ctr_reg = float(config['model'].get('ctr_reg', '1e-4'))
    cvr_reg = float(config['model'].get('cvr_reg', '1e-4'))
    ctcvr_loss_weight = float(config['model'].get('ctcvr_loss_weight', '1.0'))
    model = tf.estimator.Estimator(model_fn=esmm_model_fn,
                                   params={
                                       'cat_columns':
                                       columns['cat'],
                                       'val_columns':
                                       columns['val'],
                                       'dnn_columns':
                                       list(columns['dnn'].values()),
                                       'weight_columns':
                                       list(columns['weight'].values())[0],
                                       'column_to_field': {},
                                       'hidden_units':
                                       hidden_units,
                                       'learning_rate':
                                       learning_rate,
                                       'ctr_reg':
                                       ctr_reg,
                                       'cvr_reg':
                                       cvr_reg,
                                       'reg':
                                       1e-4,
                                       'dimension_config':
                                       dimension_config,
                                       'ctcvr_loss_weight':
                                       ctcvr_loss_weight,
                                       'model':
                                       config['model']['model'],
                                       'embed_dim':
                                       int(config['model']['embedding_dim']),
                                       'expert_num':
                                       int(config['model']['expert_num']),
                                       'expert_unit':
                                       int(config['model']['expert_unit']),
                                       'dynamic':
                                       dynamic
                                   },
                                   config=run_config,
                                   warm_start_from=ws)
    # Train and evaluate
    max_steps = config['train'].get('max_step', '')
    if max_steps == '':
        max_steps = None
    else:
        max_steps = int(max_steps)
    #for variable_name in model.get_variable_names():
    #  print(variable_name)
    logging.info("training...")
    epochs = int(config['train'].get('epochs', '1'))
    #train_input_fn = lambda: input_fn(train_files, spec, True, batch_size, mt=True)
    if config['train']['source'] == 'hdfs':
        input_func = input_fn_pattern
    else:
        input_func = input_fn
    eval_input_fn = lambda: input_fn(
        dev_files, spec, False, batch_size, mt=True)
    for i in range(epochs):
        logging.info("{}th training...".format(i + 1))
        for j in range(len(train_files)):
            model.train(input_fn=lambda: input_func(
                train_files[j], spec, True, batch_size, mt=True))

            results = model.evaluate(input_fn=eval_input_fn)

            logging.info("{}th test results...".format(i + j + 1))
            for key in sorted(results):
                print('%s: %s' % (key, results[key]))
    model.export_savedmodel(
        export_dir_base=config['train'].get('export_dir', 'export_dir'),
        serving_input_receiver_fn=lambda: input_receiver(spec),
        strip_default_attrs=True)
Exemplo n.º 3
0
def train(config , trainfile, testfile):
  """Entry for trainig

  Args:
    config: (configparser) All the hyperparameters for training
  """
  keys = get_norm_keys(config['input'].get('conf'))
  train_dirs = trainfile.split(',')
  train_files = [[] for _ in range(5)]
  for train_dir in train_dirs:
    for f in os.listdir(train_dir):
      if f != "_SUCCESS":
        ind = int(int(f.split('-')[-1]) / 40)
        train_files[ind].append(os.path.join(train_dir, f))
  #train_files = [os.path.join(train_dir, f) for train_dir in train_dirs for f in os.listdir(train_dir) if f != "_SUCCESS"]
  #train_files = tf.random_shuffle(tf.train.match_filenames_once([os.path.join(train_dir, f) for f in os.listdir(train_dir) if f != "_SUCCESS"]))
  #train_files = tf.random_shuffle(tf.train.match_filenames_once(['%s/%s/part-r-*' % (data_path, dt) for dt in date_list]))
  logging.info('train directory: {}'.format(train_dirs))
  logging.info('train files: {}'.format(reprlib.repr(train_files)))

  dev_dirs = testfile.split(',')
  dev_files = [os.path.join(dev_dir, f) for dev_dir in dev_dirs for f in os.listdir(dev_dir) if f != "_SUCCESS"]
  logging.info('dev directory: {}'.format(dev_dirs))
  logging.info('dev files: {}'.format(reprlib.repr(dev_files)))
  #特征的配置文件 在input 这个section的spec这个key
  feature_config = configparser.ConfigParser()
  feature_config.read(config['input']['spec'])#特征配置文件 有boundaries等信息
  columns, spec = FCGen.GetFeatureSpec(feature_config)#按特征列对特征进行处理,不同类型处理会不一样,比如数值、embed等
  
  batch_size = int(config['train']['batch_size'])
  
  conf = tf.ConfigProto()  
  conf.gpu_options.allow_growth=True  

  os.environ["CUDA_VISIBLE_DEVICES"] = "5"
  run_config = tf.estimator.RunConfig().replace(
    model_dir=config['train'].get('model_dir', 'model_dir'),  
    session_config=conf)
  
  logging.info("Creating model...")
  # Define the model
  hidden_units = [int(n) for n in config['model']['hidden_units'].split(',')]
  learning_rate = float(config['model']['learning_rate'])
  ctr_reg = float(config['model'].get('ctr_reg', '1e-6'))
  cvr_reg = float(config['model'].get('cvr_reg', '1e-4'))
  ctcvr_loss_weight = float(config['model'].get('ctcvr_loss_weight', '1.0'))
  model = tf.estimator.Estimator(
    model_fn=esmm_model_fn,
    params={
      'dnn_columns': list(columns['dnn'].values()),
      'linear_columns': list(columns['linear'].values()),
      'weight_columns': list(columns['weight'].values()),
      'hidden_units': hidden_units,
      'learning_rate': learning_rate,
      'ctr_reg': ctr_reg,
      'cvr_reg': cvr_reg,
      'ctcvr_loss_weight': ctcvr_loss_weight,
      'model': config['model']['model']
    },
    config = run_config
  )
  print(model.evaluate(input_fn=lambda: input_fn(dev_files[0:1], spec, False, batch_size, mt=True)))
  model.export_savedmodel(export_dir_base=config['train'].get('export_dir', 'export_dir'), 
      serving_input_receiver_fn=lambda: input_receiver(spec),
      strip_default_attrs=True) 
Exemplo n.º 4
0
def train(config, trainfile, testfile):
    """Entry for trainig

  Args:
    config: (configparser) All the hyperparameters for training
  """
    train_dirs = trainfile.split(',')
    train_files = [
        os.path.join(train_dir, f) for train_dir in train_dirs
        for f in os.listdir(train_dir) if f != "_SUCCESS"
    ]
    #train_files = tf.random_shuffle(tf.train.match_filenames_once([os.path.join(train_dir, f) for f in os.listdir(train_dir) if f != "_SUCCESS"]))
    #train_files = tf.random_shuffle(tf.train.match_filenames_once(['%s/%s/part-r-*' % (data_path, dt) for dt in date_list]))
    logging.info('train directory: {}'.format(train_dirs))
    logging.info('train files: {}'.format(reprlib.repr(train_files)))

    dev_dirs = testfile.split(',')
    dev_files = [
        os.path.join(dev_dir, f) for dev_dir in dev_dirs
        for f in os.listdir(dev_dir) if f != "_SUCCESS"
    ]
    logging.info('dev directory: {}'.format(dev_dirs))
    logging.info('dev files: {}'.format(reprlib.repr(dev_files)))
    #特征的配置文件 在input 这个section的spec这个key
    feature_config = configparser.ConfigParser()
    feature_config.read(config['input']['spec'])  #特征配置文件 有boundaries等信息
    columns, spec = FCGen.GetFeatureSpec(
        feature_config)  #按特征列对特征进行处理,不同类型处理会不一样,比如数值、embed等

    batch_size = int(config['train']['batch_size'])

    conf = tf.ConfigProto()
    conf.gpu_options.allow_growth = True

    os.environ["CUDA_VISIBLE_DEVICES"] = "5"
    run_config = tf.estimator.RunConfig().replace(session_config=conf)

    logging.info("Creating model...")
    # Define the model
    hidden_units = [
        int(n) for n in config['dnn_model']['hidden_units'].split(',')
    ]
    dropout = config['dnn_model'].get('dropout', '')
    if dropout == '':
        dropout = None
    else:
        dropout = float(dropout)
    #print(columns['weight'][0])#如果有weight这个就不能注释
    model = tf.estimator.DNNLinearCombinedClassifier(
        config=run_config,
        model_dir=config['train'].get('model_dir', 'model_dir'),
        linear_feature_columns=columns['linear'],
        linear_optimizer=tf.train.FtrlOptimizer(
            learning_rate=float(config['linear_model']['learning_rate']),
            #l1_regularization_strength=float(config['linear_model']['l1_reg']),
            #l2_regularization_strength=float(config['linear_model']['l2_reg'])),
            l1_regularization_strength=0.01,
            l2_regularization_strength=0.01),
        dnn_feature_columns=columns['dnn'],  #没有dnn的话这个就注销
        dnn_hidden_units=hidden_units,
        weight_column=columns['weight'][0],  #如果有weight这个就不能注释
        #dnn_optimizer=tf.train.AdamOptimizer(
        #  learning_rate=float(config['dnn_model']['learning_rate'])),
        dnn_optimizer=tf.train.AdagradOptimizer(learning_rate=float(
            config['dnn_model']['learning_rate']),
                                                initial_accumulator_value=0.1,
                                                use_locking=False),
        batch_norm=True,
        #dnn_dropout=dropout,
        #dnn_dropout=None,
        loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)
    # Train and evaluate
    max_steps = config['train'].get('max_step', '')
    if max_steps == '':
        max_steps = None
    else:
        max_steps = int(max_steps)
    epochs = int(config['train']['epochs'])
    for i in range(epochs):
        logging.info("training...")
        model.train(input_fn=lambda: input_fn(
            train_files, spec, shuffle=True, batch_size=batch_size),
                    steps=max_steps)

        results = model.evaluate(input_fn=lambda: input_fn(
            dev_files, spec, shuffle=False, batch_size=batch_size))

        logging.info("results...")
        for key in sorted(results):
            print('{}th {}: {}'.format(i + 1, key, results[key]))

    model.export_savedmodel(
        export_dir_base=config['train'].get('export_dir', 'export_dir'),
        serving_input_receiver_fn=lambda: input_receiver(spec),
        strip_default_attrs=True)
Exemplo n.º 5
0
def train(config, hdfs_prefix, ftime, gap, ckpt_dir, export_dir, metric_dir):
    """Entry for trainig

  Args:
    config: (configparser) All the hyperparameters for training
  """
    train_files = []
    dev_files = []
    cur_date = datetime.datetime.strptime(ftime, "%Y%m%d")
    for i in range(1, gap + 1):
        dest_date = (cur_date + datetime.timedelta(days=-i)).strftime("%Y%m%d")
        train_files.append(hdfs_prefix + "/" + dest_date + "/train/part-r-*")
        dev_files.append(hdfs_prefix + "/" + dest_date + "/test/part-r-*")
    logging.info('train files: {}'.format(reprlib.repr(train_files)))

    logging.info('dev files: {}'.format(reprlib.repr(dev_files)))
    #特征的配置文件 在input 这个section的spec这个key
    feature_config = configparser.ConfigParser()
    feature_config.read(config['input']['spec'])  #特征配置文件 有boundaries等信息
    columns, spec = FCGen.GetFeatureSpec(
        feature_config)  #按特征列对特征进行处理,不同类型处理会不一样,比如数值、embed等

    batch_size = int(config['train']['batch_size'])

    conf = tf.ConfigProto()
    conf.gpu_options.allow_growth = True

    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    run_config = tf.estimator.RunConfig(save_checkpoints_secs=1800).replace(
        session_config=conf)

    logging.info("Creating model...")
    # Define the model
    model = tf.estimator.BoostedTreesClassifier(
        config=run_config,
        n_batches_per_layer=1000,
        n_trees=100,
        learning_rate=0.2,
        l1_regularization=0.01,
        l2_regularization=0.01,
        max_depth=10,
        model_dir=ckpt_dir,
        feature_columns=list(columns['linear'].values()),
        weight_column=list(columns['weight'].values())[0]  #如果有weight这个就不能注释
    )
    #model = tf.estimator.add_metrics(model, metric_auc)
    # Train and evaluate
    epochs = int(config['train']['epochs'])
    for i in range(epochs):
        logging.info("training...")
        model.train(input_fn=lambda: input_fn(
            train_files, spec, shuffle=True, batch_size=batch_size))

        results = model.evaluate(input_fn=lambda: input_fn(
            dev_files, spec, shuffle=False, batch_size=batch_size))
        auc = float(results["auc"])
        logloss = float(results["loss"])
    index = [{
        "name": "auc",
        "type": "float",
        "value": str(auc)
    }, {
        "name": "logloss",
        "type": "float",
        "value": str(logloss)
    }]
    file_name = metric_dir + "/metrics_info.json"
    with open(file_name, 'w') as file_obj:
        json.dump(index, file_obj)
    model.export_savedmodel(
        export_dir_base=export_dir,
        serving_input_receiver_fn=lambda: input_receiver(spec),
        strip_default_attrs=True)
Exemplo n.º 6
0
def train(config, trainfile, testfile):
    """Entry for trainig

  Args:
    config: (configparser) All the hyperparameters for training
  """
    train_dir = trainfile
    train_files = [
        os.path.join(train_dir, f) for f in os.listdir(train_dir)
        if f != "_SUCCESS"
    ]
    logging.info('train directory: {}'.format(train_dir))
    logging.info('train files: {}'.format(reprlib.repr(train_files)))

    dev_dir = testfile
    dev_files = [
        os.path.join(dev_dir, f) for f in os.listdir(dev_dir)
        if f != "_SUCCESS"
    ]
    logging.info('dev directory: {}'.format(dev_dir))
    logging.info('dev files: {}'.format(reprlib.repr(dev_files)))

    feature_config = configparser.ConfigParser()
    feature_config.read(config['input']['spec'])
    columns, spec = FCGen.GetFeatureSpec(feature_config)

    batch_size = int(config['train']['batch_size'])

    conf = tf.ConfigProto()
    conf.gpu_options.allow_growth = True

    os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    run_config = tf.estimator.RunConfig().replace(session_config=conf)

    logging.info("Creating model...")
    # Define the model
    hidden_units = [
        int(n) for n in config['dnn_model']['hidden_units'].split(',')
    ]
    dropout = config['dnn_model'].get('dropout', '')
    if dropout == '':
        dropout = None
    else:
        dropout = float(dropout)
    print(columns['weight'][0])
    model = tf.estimator.DNNLinearCombinedClassifier(
        config=run_config,
        model_dir=config['train'].get('model_dir', 'model_dir'),
        linear_feature_columns=columns['linear'],
        linear_optimizer=tf.train.FtrlOptimizer(
            learning_rate=float(config['linear_model']['learning_rate']),
            l1_regularization_strength=float(config['linear_model']['l1_reg']),
            l2_regularization_strength=float(
                config['linear_model']['l2_reg'])),
        dnn_feature_columns=columns['dnn'],
        dnn_hidden_units=hidden_units,
        weight_column=columns['weight'][0],
        dnn_optimizer=tf.train.AdamOptimizer(
            learning_rate=float(config['dnn_model']['learning_rate'])),
        dnn_dropout=dropout,
        loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)

    # Train and evaluate
    max_steps = config['train'].get('max_step', '')
    if max_steps == '':
        max_steps = None
    else:
        max_steps = int(max_steps)

    logging.info("training...")
    model.train(input_fn=lambda: input_fn(
        train_files, spec, shuffle=True, batch_size=batch_size),
                steps=max_steps)

    results = model.evaluate(input_fn=lambda: input_fn(
        dev_files, spec, shuffle=False, batch_size=batch_size))

    logging.info("results...")
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))

    model.export_savedmodel(
        export_dir_base=config['train'].get('export_dir', 'export_dir'),
        serving_input_receiver_fn=lambda: input_receiver(spec),
        strip_default_attrs=True)