示例#1
0
def main():
    """
    """
    logger = setLogger()
    currencies_responses = get_responses(200)
    logger.debug("Got responses")
    currencies_soups = collect_soups(currencies_responses)
    logger.debug("Got soups")
    curr_short_rate_names = collect_short_rate_names(currencies_soups)
    logger.debug("Collected short rate names")
    short_rate_names_to_parse = prepare_for_parse(curr_short_rate_names)
    logger.debug("Got short rate names to parse")
    short_rate_names_responses = get_short_rate_names_responses(
        short_rate_names_to_parse)
    logger.debug("Got short rate names responses")
    short_rate_names_trees = collect_short_rate_names_trees(
        short_rate_names_responses)
    logger.debug("Collected short rate names trees")
    hash_table = create_hash_table(short_rate_names_trees,
                                   short_rate_names_to_parse)
    logger.debug("Created hash_table")
    create_currencies_tables('userdb',
                             curr_short_rate_names,
                             hash_table,
                             )
    logger.debug("Created currencies table")
    task.react(graph_id_parser, (curr_short_rate_names,))
示例#2
0
class Model_RandomForest:
    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)
    
    # constructor
    def __init__(self,param):
        self.model = GridSearchCV(RandomForestClassifier(random_state=0), param, cv=3, verbose=1,return_train_score=False)

    # 機械学習
    def fit(self,x_train,y_train):
        self.log.info('fit start')
        self.model.fit(x_train, y_train)

        self.log.info('fit end')

    # Best parameters
    def grid_search_feature_importances(self,getList):
        return pd.DataFrame({"feature":getList,"importance":self.model.best_estimator_.feature_importances_}).sort_values(by="importance",ascending=False)

    # Best parameters
    def grid_search_best_params(self):
        return self.model.best_params_

    # Best cross-validation
    def grid_search_best_score(self):
        return self.model.best_score_

    # 結果の取得
    def predict(self,test_data):
        return self.model.predict(test_data).astype(int)
示例#3
0
class Model:
    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)

    # constructor
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=100)

    # modelパラメータの設定
    def set_param(self, param):
        # 将来用のメソッドとして用意する。今のところは未定義(pass)
        pass

    # 機械学習
    def fit(self, x_train, y_train):
        self.model = self.model.fit(x_train, y_train)

    # 結果の取得
    def predict(self, test_data):
        return self.model.predict(test_data).astype(int)

    #評価(RMSE)
    def predictScore(self, y_true, y_pred):
        return np.sqrt(mean_squared_error(y_true, y_pred))
示例#4
0
class SubmitCsv:
    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)

    # constructor
    def __init__(self, file_path):
        self.file_path = file_path

    # csv出力
    def to_csv(self, param_header, param):
        submit_file = open(self.file_path, "w", newline="")
        file_object = csv.writer(submit_file)
        file_object.writerow(param_header)
        file_object.writerows(param)
        submit_file.close()
示例#5
0
class Model_SVM:
    #xgb_model = xgb.XGBRegressor(objective="reg:linear", random_state=42)
    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)

    # constructor
    def __init__(self, param):
        self.model = GridSearchCV(SVC(kernel='linear', random_state=None),
                                  param,
                                  cv=2,
                                  verbose=1,
                                  return_train_score=False)

    # 機械学習
    def fit(self, x_train, y_train):
        self.log.info('fit start')
        self.model.fit(x_train, y_train)

        self.log.info('fit end')

    # Best parameters
    def grid_search_feature_importances(self, getList):
        return pd.DataFrame({
            "feature":
            getList,
            "importance":
            self.model.best_estimator_.feature_importances_
        }).sort_values(by="importance", ascending=False)

    # Best parameters
    def grid_search_best_params(self):
        return self.model.best_params_

    # Best cross-validation
    def grid_search_best_score(self):
        return self.model.best_score_

    # 結果の取得
    def predict(self, test_data):
        return self.model.predict(test_data).astype(int)
示例#6
0
class Model:
    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)
    
    # constructor
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=100)

    # modelパラメータの設定
    def set_param(self,param):
        # 将来用のメソッドとして用意する。今のところは未定義(pass)
        pass

    # 機械学習
    def fit(self,x_train,y_train):
        self.model = self.model.fit(x_train, y_train)

    # 結果の取得
    def predict(self,test_data):
        return self.model.predict(test_data)

    # 特徴量の重要度の取得
    def get_feature_importances(self,x_train):
        x = x_train.columns.values
        y = self.model.feature_importances_
        return pd.DataFrame({"feature":x,"importance":y}).sort_values(by="importance",ascending=False)
    
    #評価(RMSE)
    def predictScore(self,y_true,y_pred):
        rmse_val = np.sqrt(
            np.mean(
                np.square(
                    np.array(y_true - y_pred)
                )
            )
        )
        return rmse_val
示例#7
0
    log(ERROR, data)

def fatal(data):
    log(FATAL, data)

def none(data):
    # Uh...what?
    pass

if __name__ == "__main__":
    # Unit test/example usage:
    import logger

    # Set the logging type you want to use (stdout logging):
    #logger.setLogger(FileLogger(sys.stdout))
    logger.setLogger(NcursesLogger())

    # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE)
    logger.setLogLevel(logger.TRACE)

    # Log a message:
    #logger.log(logger.INFO, "logger!")

    time.sleep(0.01)
    logger.info("This is a long line, it's pretty long, butitalso hasbig wordsthat areprobably hardtobreak oninan easywayforthe ncurseslib, sowhatdoes itdo then?")
    logger.info("aa " + "a"*70 + "B")

    for i in range(20):
        logger.info("iteration #%d/20" % i)
        time.sleep(0.3)
示例#8
0
def main():
    # Parse the arguments received from command line
    parser = argparse.ArgumentParser(description="Train a social LSTM")
    parser.add_argument(
        "modelParams",
        type=str,
        help=
        "Path to the file or folder with the parameters of the experiments",
    )
    parser.add_argument(
        "-l",
        "--logLevel",
        help="logging level of the logger. Default is INFO",
        metavar="level",
        type=str,
    )
    parser.add_argument(
        "-f",
        "--logFolder",
        help=
        "path to the folder where to save the logs. If None, logs are only printed in stderr",
        type=str,
        metavar="path",
    )
    args = parser.parse_args()

    if os.path.isdir(args.modelParams):
        names_experiments = os.listdir(args.modelParams)
        experiments = [
            os.path.join(args.modelParams, experiment)
            for experiment in names_experiments
        ]
    else:
        experiments = [args.modelParams]

    for experiment in experiments:
        # Load the parameters
        hparams = utils.YParams(experiment)
        # Define the logger
        setLogger(hparams, args, PHASE)

        remainSpaces = 29 - len(hparams.name)
        logging.info(
            "\n" +
            "--------------------------------------------------------------------------------\n"
            + "|                            Training experiment: " +
            hparams.name + " " * remainSpaces + "|\n" +
            "--------------------------------------------------------------------------------\n"
        )

        trajectory_size = hparams.obsLen + hparams.predLen

        logging.info("Loading the training datasets...")
        train_loader = utils.DataLoader(
            hparams.dataPath,
            hparams.trainDatasets,
            hparams.trainMaps,
            hparams.semanticMaps,
            hparams.trainMapping,
            hparams.homography,
            num_labels=hparams.numLabels,
            delimiter=hparams.delimiter,
            skip=hparams.skip,
            max_num_ped=hparams.maxNumPed,
            trajectory_size=trajectory_size,
            neighborood_size=hparams.neighborhoodSize,
        )
        logging.info("Loading the validation datasets...")
        val_loader = utils.DataLoader(
            hparams.dataPath,
            hparams.validationDatasets,
            hparams.validationMaps,
            hparams.semanticMaps,
            hparams.validationMapping,
            hparams.homography,
            num_labels=hparams.numLabels,
            delimiter=hparams.delimiter,
            skip=hparams.skip,
            max_num_ped=hparams.maxNumPed,
            trajectory_size=trajectory_size,
            neighborood_size=hparams.neighborhoodSize,
        )

        logging.info(
            "Creating the training and validation dataset pipeline...")
        dataset = utils.TrajectoriesDataset(
            train_loader,
            val_loader=val_loader,
            batch=False,
            shuffle=hparams.shuffle,
            prefetch_size=hparams.prefetchSize,
        )

        hparams.add_hparam("learningRateSteps", train_loader.num_sequences)

        logging.info("Creating the model...")
        start = time.time()
        model = SocialModel(dataset, hparams, phase=PHASE)
        end = time.time() - start
        logging.debug("Model created in {:.2f}s".format(end))

        # Define the path to where save the model and the checkpoints
        if hparams.modelFolder:
            save_model = True
            model_folder = os.path.join(hparams.modelFolder, hparams.name)
            if not os.path.exists(model_folder):
                os.makedirs(model_folder)
                os.makedirs(os.path.join(model_folder, "checkpoints"))
            model_path = os.path.join(model_folder, hparams.name)
            checkpoints_path = os.path.join(model_folder, "checkpoints",
                                            hparams.name)
            # Create the saver
            saver = tf.train.Saver()

        # Zero padding
        padding = len(str(train_loader.num_sequences))

        # ============================ START TRAINING ============================

        with tf.Session() as sess:
            logging.info(
                "\n" +
                "--------------------------------------------------------------------------------\n"
                +
                "|                                Start training                                |\n"
                +
                "--------------------------------------------------------------------------------\n"
            )
            # Initialize all the variables in the graph
            sess.run(tf.global_variables_initializer())

            for epoch in range(hparams.epochs):
                logging.info("Starting epoch {}".format(epoch + 1))

                # ==================== TRAINING PHASE ====================

                # Initialize the iterator of the training dataset
                sess.run(dataset.init_train)

                for sequence in range(train_loader.num_sequences):
                    start = time.time()
                    loss, _ = sess.run([model.loss, model.train_optimizer])
                    end = time.time() - start

                    logging.info(
                        "{:{width}d}/{} epoch: {} time/Batch = {:.2f}s. Loss = {:.4f}"
                        .format(
                            sequence + 1,
                            train_loader.num_sequences,
                            epoch + 1,
                            end,
                            loss,
                            width=padding,
                        ))

                # ==================== VALIDATION PHASE ====================

                logging.info(" ========== Validation ==========")
                # Initialize the iterator of the validation dataset
                sess.run(dataset.init_val)
                loss_val = 0

                for _ in range(val_loader.num_sequences):
                    loss = sess.run(model.loss)
                    loss_val += loss

                mean_val = loss_val / val_loader.num_sequences

                logging.info("Epoch: {}. Validation loss = {:.4f}".format(
                    epoch + 1, mean_val))

                # Save the model
                if save_model:
                    logging.info("Saving model...")
                    saver.save(
                        sess,
                        checkpoints_path,
                        global_step=epoch + 1,
                        write_meta_graph=False,
                    )
                    logging.info("Model saved...")
            # Save the final model
            if save_model:
                saver.save(sess, model_path)
        tf.reset_default_graph()
示例#9
0
            ys = val[args.y]
            zs = val[args.z]
            ax.scatter(xs, ys, zs, c=c, s=100, label=i)
        buildLegend(ax, cmap)

    else:
        # Else just plot.
        xs = dat[args.x]
        ys = dat[args.y]
        zs = dat[args.z]
        ax.scatter(xs, ys, zs, s=100)

    ax.set_xlabel(xlab)
    ax.set_ylabel(ylab)
    ax.set_zlabel(ylab)

    galaxySavefig(fig, args.fig)


if __name__ == "__main__":
    # Command line options
    args = getOptions()

    logger = logging.getLogger()
    if args.debug:
        sl.setLogger(logger, logLevel="debug")
    else:
        sl.setLogger(logger)

    main(args)
示例#10
0
class DataLoad:
    ####################################################
    # 定数宣言
    ####################################################
    #FILE_TRAIN_CSV = './input/sales_train.csv'
    FILE_TRAIN_CSV = './input/sales_train_v2.csv'
    #FILE_TEST_CSV = './input/test.csv'
    FILE_TEST_CSV = './input/test2.csv'

    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)

    # constructor
    def __init__(self):
        #constructorでは特に処理を行わない
        pass

    #トレーニングデータの読み込み
    def read_train_csv(self):
        self.log.info('read_train_csv start')

        #date	date_block_num	shop_id	item_id	item_price	item_cnt_day
        #14.01.2013	0	2	11330	149	1

        ## Load training data
        tmp_df = pd.read_csv(self.FILE_TRAIN_CSV,
                             header=0,
                             dtype={
                                 'date': 'str',
                                 'date_block_num': 'int',
                                 'shop_id': 'int',
                                 'item_id': 'int',
                                 'item_price': 'float',
                                 'item_cnt_day': 'float'
                             })

        tmp_df = tmp_df.groupby(['date_block_num', 'shop_id', 'item_id'],
                                as_index=False).agg({
                                    #'item_price':np.mean,
                                    'item_cnt_day': np.sum
                                })

        self.df = tmp_df

        self.log.info('read_train_csv end')

    #トレーニングデータの読み込み
    def read_test_csv(self):
        self.log.info('read_test_csv start')

        #ID	shop_id	item_id
        #20400	2	5037

        ## Load test data
        tmp_df = pd.read_csv(self.FILE_TEST_CSV,
                             header=0,
                             dtype={
                                 'ID': 'str',
                                 'shop_id': 'int',
                                 'item_id': 'int'
                             })

        self.df_test = tmp_df

        self.log.info('read_test_csv end')

    # トレーニングデータの取得
    def getTrainValues(self):
        return self.df

    # テストデータの取得
    def getTestValues(self):
        return self.df_test
示例#11
0
    header = np.array(['PC{}'.format(x + 1) for x in range(loadings.shape[1])])
    compoundIndex = np.hstack([df_wide.index.name, df_wide.index])
    sampleIndex = np.hstack(['sampleID', df_wide.columns])

    # Create loadings output
    loadHead = np.vstack([header, loadings])
    loadIndex = np.column_stack([sampleIndex, loadHead])
    loadOut = np.vstack([block, loadIndex])

    # Create scores output
    scoreHead = np.vstack([header, scores])
    scoreIndex = np.column_stack([compoundIndex, scoreHead])
    scoreOut = np.vstack([block, scoreIndex])

    # Save output
    np.savetxt(args.lname, loadOut, fmt='%s', delimiter='\t')
    np.savetxt(args.sname, scoreOut, fmt='%s', delimiter='\t')


if __name__ == '__main__':
    # Command line options
    args = getOptions()

    logger = logging.getLogger()
    if args.debug:
        sl.setLogger(logger, logLevel='debug')
    else:
        sl.setLogger(logger)

    main(args)
示例#12
0
class DataLoad:
    ####################################################
    # 定数宣言
    ####################################################
    FILE_TRAIN_CSV = '../input/sales_train_v2.csv'
    FILE_TEST_CSV = '../input/test.csv'
    # FILE_TRAIN_CSV = '../input/sales_train_sample.csv'
    # FILE_TEST_CSV = '../input/test_sample.csv'
    FILE_ITEM_CATEGORIES_CSV = '../input/item_categories.csv'
    FILE_ITEMS_CSV = '../input/items.csv'
    # FILE_SHOPS_CSV = '../input/shops.csv'

    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)

    # constructor
    def __init__(self, windows_size):
        #CSVデータの読み込み
        self.log.info('DataLoad constructor start')

        ## Load test data
        test_df_csv = pd.read_csv(self.FILE_TEST_CSV,
                                  header=0,
                                  dtype={
                                      'ID': 'str',
                                      'shop_id': 'int',
                                      'item_id': 'int'
                                  })

        ## Load training data
        train_df_csv = pd.read_csv(self.FILE_TRAIN_CSV,
                                   header=0,
                                   dtype={
                                       'date': 'str',
                                       'date_block_num': 'int',
                                       'shop_id': 'int',
                                       'item_id': 'int',
                                       'item_price': 'float',
                                       'item_cnt_day': 'float'
                                   })

        ## Load FILE_ITEM_CATEGORIES_CSV data
        item_categories_df_csv = pd.read_csv(self.FILE_ITEM_CATEGORIES_CSV,
                                             header=0,
                                             dtype={
                                                 'item_category_name': 'str',
                                                 'item_category_id': 'int'
                                             })

        ## Load FILE_ITEMS_CSV data
        items_df_csv = pd.read_csv(self.FILE_ITEMS_CSV,
                                   header=0,
                                   dtype={
                                       'item_name': 'str',
                                       'item_id': 'int',
                                       'item_category_id': 'int'
                                   })

        # # Load FILE_SHOPS_CSV data
        # shops_df_csv = pd.read_csv(self.FILE_SHOPS_CSV, header=0,
        #     dtype = {
        #         'shop_name':'str',
        #         'shop_id':'int'})

        # 売上高(日別)を追加する
        train_df_csv['item_sales_day'] = train_df_csv[
            'item_price'] * train_df_csv['item_cnt_day']

        # testデータをtrainデータに合わせる
        train_df = pd.DataFrame()
        for i in range(35):
            tmp = test_df_csv[['shop_id', 'item_id']]
            tmp['date_block_num'] = i
            train_df = pd.concat([train_df, tmp], axis=0)

        # item別に売り上げ数、売上高を集計する
        item_mon_train_df = train_df_csv.groupby(
            ['date_block_num', 'item_id'], as_index=False).agg({
                'item_sales_day':
                np.sum,
                'item_cnt_day':
                np.sum
            }).rename(
                columns={
                    'item_cnt_day': 'only_item_cnt_month',
                    'item_sales_day': 'only_item_sales_month'
                })

        # item別の最高額を編集する
        item_max_train_df = train_df_csv.groupby(
            ['date_block_num', 'item_id'], as_index=False).agg({
                'item_price':
                np.max
            }).rename(columns={'item_price': 'item_price_max'})

        # shop別の金額平均を求める
        mean_train_df = train_df_csv.groupby(
            ['date_block_num', 'shop_id', 'item_id'], as_index=False).agg({
                'item_price':
                np.mean
            }).rename(columns={'item_price': 'item_price_mean'})

        # 月別に集計する
        mon_train_df = train_df_csv.groupby(
            ['date_block_num', 'shop_id', 'item_id'], as_index=False).agg({
                'item_sales_day':
                np.sum,
                'item_cnt_day':
                np.sum
            }).rename(columns={
                'item_cnt_day': 'item_cnt_month',
                'item_sales_day': 'item_sales_month'
            })

        # 0~20の範囲でクリップする
        mon_train_df['item_cnt_month'] = mon_train_df['item_cnt_month'].clip(
            0, 20)

        # item別に売り上げ数、売上高、shop別のitemの最高額の結果をマージする
        mon_train_df = pd.merge(mon_train_df,
                                item_mon_train_df,
                                on=['date_block_num', 'item_id'],
                                how='left').fillna(0)
        mon_train_df = pd.merge(mon_train_df,
                                item_max_train_df,
                                on=['date_block_num', 'item_id'],
                                how='left').fillna(0)
        mon_train_df = pd.merge(mon_train_df,
                                mean_train_df,
                                on=['date_block_num', 'shop_id', 'item_id'],
                                how='left').fillna(0)

        # testデータに月別集計の結果をマージする
        train_df = pd.merge(train_df,
                            mon_train_df,
                            on=['date_block_num', 'shop_id', 'item_id'],
                            how='left').fillna(0)

        # 割引率を求める
        train_df['discount_rate'] = train_df[
            train_df["item_price_max"] != 0]["item_price_mean"] / train_df[
                train_df["item_price_max"] != 0]["item_price_max"]
        train_df.loc[((train_df["item_price_max"] == 0) &
                      (train_df["item_price_mean"] != 0)),
                     "discount_rate"] = 1  # 値がない場合は1とする
        train_df.loc[((train_df["item_price_max"] == 0) &
                      (train_df["item_price_mean"] == 0)), "discount_rate"] = 0

        # shop_id*item_id*date_block_numでソート
        train_df = train_df.sort_values(
            ['shop_id', 'item_id', 'date_block_num'],
            ascending=[True, True, True]).reset_index(drop=True)

        # lag用データを保持する
        lag_df = train_df

        # testデータに月別の売り上げ数をマージする
        lagNum = list(range(1, windows_size))
        lagNum.append(13)
        for i in lagNum:
            train_df = pd.concat([
                train_df,
                lag_df.shift(i).rename(
                    columns={'item_cnt_month': 'lag' + str(i)})['lag' + str(i)]
            ],
                                 axis=1)

        for i in lagNum:
            train_df = pd.concat([
                train_df,
                lag_df.shift(i).rename(
                    columns={'item_sales_month': 'lag_sales' +
                             str(i)})['lag_sales' + str(i)]
            ],
                                 axis=1)

        # 売上高はラグの作成により不要となるため、削除する
        train_df = train_df.drop(columns=['item_sales_month'])

        # N/Aを0に置換する
        train_df = train_df.fillna(0)

        # 前月比の項目を追加する
        train_df['MoM'] = train_df[train_df["lag2"] != 0]["lag1"] / train_df[
            train_df["lag2"] != 0]["lag2"]
        train_df.loc[((train_df["lag2"] == 0) & (train_df["lag1"] != 0)),
                     "MoM"] = 1  # 前月の値がない場合は1とする
        train_df.loc[((train_df["lag2"] == 0) & (train_df["lag1"] == 0)),
                     "MoM"] = 0

        # 前年同月比の項目を追加する
        train_df['YoY'] = train_df[train_df["lag13"] != 0]["lag1"] / train_df[
            train_df["lag13"] != 0]["lag13"]
        train_df.loc[((train_df["lag13"] == 0) & (train_df["lag1"] != 0)),
                     "YoY"] = 1  # 前年の値がない場合は1とする
        train_df.loc[((train_df["lag13"] == 0) & (train_df["lag1"] == 0)),
                     "YoY"] = 0

        # # shopsを結合する
        # train_df = pd.merge(train_df, shops_df_csv, on='shop_id', how='left')

        # itemsを結合する
        train_df = pd.merge(train_df,
                            items_df_csv[['item_id', 'item_category_id']],
                            on='item_id',
                            how='left')

        # item_categoriesを結合する
        # 末尾に「 - 」を追加して、全行split可能とする
        item_categories_df_csv['item_category_name'] = pd.DataFrame({
            'item_category_name':
            item_categories_df_csv['item_category_name'] + " - filler"
        })
        train_df = pd.merge(train_df,
                            item_categories_df_csv,
                            on='item_category_id',
                            how='left')

        # item_category_nameを「 - 」で分割する
        train_df['big_category_name'] = train_df['item_category_name'].map(
            lambda x: x.split(' - ')[0])
        train_df['small_category_name'] = train_df['item_category_name'].map(
            lambda x: x.split(' - ')[1])
        train_df = train_df.drop(columns=['item_category_name'])

        # big_category_nameの名寄せ
        train_df.loc[train_df['big_category_name'] ==
                     'Чистые носители (шпиль)',
                     'big_category_name'] = 'Чистые носители '
        train_df.loc[train_df['big_category_name'] ==
                     'Чистые носители (штучные)',
                     'big_category_name'] = 'Чистые носители'
        # train_df.loc[train_df['big_category_name']=='Игры Android','big_category_name'] = 'Игры'
        # train_df.loc[train_df['big_category_name']=='Игры MAC','big_category_name'] = 'Игры'
        # train_df.loc[train_df['big_category_name']=='Игры PC','big_category_name'] = 'Игры'
        train_df.loc[train_df['big_category_name'] ==
                     'Карты оплаты (Кино, Музыка, Игры)',
                     'big_category_name'] = 'Карты оплаты'

        # 集約具合を確認
        self.log.info(train_df['big_category_name'].value_counts())

        # LabelEncoderの実施
        le = LabelEncoder()
        # train_df['shop_name'] = pd.DataFrame({'shop_name':le.fit_transform(train_df['shop_name'])})
        # train_df['item_name'] = pd.DataFrame({'item_name':le.fit_transform(train_df['item_name'])})
        train_df['big_category_name'] = pd.DataFrame({
            'big_category_name':
            le.fit_transform(train_df['big_category_name'])
        })
        train_df['small_category_name'] = pd.DataFrame({
            'small_category_name':
            le.fit_transform(train_df['small_category_name'])
        })

        # item_idとshop_idを結合して、ユニークNOを作成する
        train_df['unique_no'] = train_df['item_id'] * 100 + train_df['shop_id']
        train_df = train_df.drop(columns=['item_id'])
        train_df = train_df.drop(columns=['shop_id'])

        self.df = train_df
        self.test_df = test_df_csv

        self.log.info('DataLoad constructor end')

    # トレーニングデータの取得
    def getTrainValues(self):
        return self.df

    # テストデータの取得
    def getTestValues(self):
        return self.test_df
示例#13
0

def fatal(data):
    log(FATAL, data)


def none(data):
    # Uh...what?
    pass


if __name__ == "__main__":
    # Unit test/example usage:
    import logger

    # Set the logging type you want to use (stdout logging):
    logger.setLogger(FileLogger(sys.stdout))

    # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE)
    logger.setLogLevel(logger.TRACE)

    # Log a message:
    logger.log(logger.INFO, "logger!")

    time.sleep(0.01)

    # Alternatively, use
    logger.error("errrrr")

    logger.trace("some trace data: %d - %f - %s" % (5, 8.3, 'cows'))
示例#14
0
def fatal(data):
    log(FATAL, data)


def none(data):
    # Uh...what?
    pass


if __name__ == "__main__":
    # Unit test/example usage:
    import logger

    # Set the logging type you want to use (stdout logging):
    #logger.setLogger(FileLogger(sys.stdout))
    logger.setLogger(NcursesLogger())

    # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE)
    logger.setLogLevel(logger.TRACE)

    # Log a message:
    #logger.log(logger.INFO, "logger!")

    time.sleep(0.01)
    logger.info(
        "This is a long line, it's pretty long, butitalso hasbig wordsthat areprobably hardtobreak oninan easywayforthe ncurseslib, sowhatdoes itdo then?"
    )
    logger.info("aa " + "a" * 70 + "B")

    for i in range(20):
        logger.info("iteration #%d/20" % i)
示例#15
0
####################################################
# インポート
####################################################
import data_load
import model_RandomForest as model
import submit_csv
import logger
import logging

####################################################
# ログ宣言
####################################################
log = logging.getLogger(__name__)
logger.setLogger(log)

####################################################
# データ読み込み
####################################################
log.info('start read data')
# トレーニングデータ
train_dl = data_load.DataLoad("./input/train.csv")
# テストデータ
test_dl = data_load.DataLoad("./input/test.csv")

log.info('end read data')

####################################################
# 分析
####################################################
log.info('start analysis')
示例#16
0
    if hasattr(dat, 'group'):
        logger.info('Plotting sample distributions by group')
        legend1 = pltByTrt(dat, ax1)
    else:
        logger.info('Plotting sample distributions')
        pltBySample(dat, ax1)

    # Create Legend
    handles, labels = ax1.get_legend_handles_labels()
    ax1.legend(handles, labels, ncol=5, loc='upper right', fontsize=10)

    # Create second legend if there is group information
    if hasattr(dat, 'group'):
        ax1.add_artist(legend1)

    # Plot boxplot of samples
    pltBoxplot(dat, ax2)

    plt.savefig(args.ofig, format='pdf')
    mpld3.save_html(fig, args.ofig2, template_type='simple')


if __name__ == '__main__':
    # Command line options
    args = getOptions()

    logger = logging.getLogger()
    sl.setLogger(logger)

    main(args)
示例#17
0
def importTargets(options):
    logger.setLogger("console")

    finalTargets = []
    ignoredTargets = []
    partialImport = False
    fetchStep = commands.BuildStep("fetch", commands.__getFetchCommand(None))
    unpackStep = commands.BuildStep("unpack", commands.__getUnpackCommand(None))

    tempDir = tempfile.mkdtemp(prefix="mixdown-")
    options.downloadDir = os.path.join(tempDir, "mdDownloads")

    while len(options.targetsToImport) != 0:
        target = options.targetsToImport.pop(0)

        logger.writeMessage("Analyzing target...", target.name)
        logger.writeMessage("Extracting target...", target.name)

        target.outputPath = os.path.join(tempDir, target.name)
        target.currBuildStep = fetchStep
        if not commands.buildStepActor(target, options, None):
            utilityFunctions.removeDir(tempDir)
            return None, False
        target.currBuildStep = unpackStep
        if not commands.buildStepActor(target, options, None):
            utilityFunctions.removeDir(tempDir)
            return None, False

        #Generate build files and find possible dependencies
        possibleDeps = []
        if cmake.isCMakeProject(target.path):
            logger.writeMessage("CMake project found...", target.name)
            logger.writeMessage("Analyzing for dependencies...", target.name)
            possibleDeps = cmake.getDependencies(target.path, target.name)
        elif autoTools.isAutoToolsProject(target.path):
            logger.writeMessage("Auto Tools project found...", target.name)
            if not os.path.exists(os.path.join(target.path, "configure")):
                if not autoTools.generateConfigureFiles(target.path, target.name):
                    utilityFunctions.removeDir(tempDir)
                    return None, False
            logger.writeMessage("Analyzing for dependencies...", target.name)
            possibleDeps = autoTools.getDependencies(target.path, target.name)
            if possibleDeps == None:
                target.comment = "Unable to parse 'configure --help' output. MixDown cannot determine dependencies for this target."
                logger.writeError(target.comment, target.name)
                partialImport = True
                possibleDeps = []
        elif make.isMakeProject(target.path):
            target.comment = "Make project found. MixDown cannot determine dependencies from Make projects."
            logger.writeError(target.comment, target.name)
            partialImport = True
        else:
            target.comment = "Unknown build system found.  MixDown cannot determine dependencies or build commands."
            logger.writeError(target.comment, target.name)
            partialImport = True

        #Find actual dependencies
        for possibleDependency in possibleDeps:
            if getTarget(possibleDependency, finalTargets + options.targetsToImport):
                logger.writeMessage("Known dependency found (" + possibleDependency + ")", target.name)
                target.dependsOn.append(possibleDependency)
                continue
            elif options.interactive and possibleDependency in ignoredTargets:
                logger.writeMessage("Previously ignored dependency found (" + possibleDependency + ")", target.name)
                continue

            if searchForPossibleAliasInList(possibleDependency, finalTargets + options.targetsToImport, options.interactive):
                target.dependsOn.append(possibleDependency)
            elif not options.interactive:
                logger.writeMessage("Ignoring unknown dependency (" + possibleDependency + ")", target.name)
            else:
                logger.writeMessage("Unknown dependency found (" + possibleDependency + ")", target.name)
                userInput = raw_input(possibleDependency + ": Input location, target name, or blank to ignore:").strip()
                if userInput == "":
                    ignoredTargets.append(possibleDependency)
                elif os.path.isfile(userInput) or os.path.isdir(userInput) or utilityFunctions.isURL(userInput):
                    name = target.targetPathToName(userInput)
                    if name == "":
                        return None, False
                    newTarget = target.Target(name, userInput)
                    options.targetsToImport.append(newTarget)
                    if target.normalizeName(possibleDependency) != target.normalizeName(userInput):
                        newTarget.aliases.append(possibleDependency)
                    target.dependsOn.append(possibleDependency)
                else:
                    aliasTarget = getTarget(userInput, finalTargets + options.targetsToImport, possibleDependency)
                    if aliasTarget != None:
                        logger.writeMessage("Alias added (" + userInput + ")", aliasTarget.name)
                        target.dependsOn.append(possibleDependency)
                    else:
                        aliasLocation = raw_input(userInput + ": Target name not found in any known targets.  Location of new target:").strip()
                        if os.path.isfile(aliasLocation) or os.path.isdir(aliasLocation) or utilityFunctions.isURL(aliasLocation):
                            name = target.targetPathToName(aliasLocation)
                            if name == "":
                                return None, False
                            newTarget = target.Target(name, aliasLocation)
                            notReviewedTargets.append(newTarget)
                            if target.normalizeName(possibleDependency) != target.normalizeName(aliasLocation):
                                newTarget.aliases.append(possibleDependency)
                            target.dependsOn.append(possibleDependency)
                        else:
                            logger.writeError(userInput + ": Alias location not understood.", exitProgram=True)

        finalTargets.append(target)

    #Create project for targets
    projects = project.Project("ProjectNameNotDetermined", finalTargets)

    if not projects.examine(options):
        logger.writeError("Project failed examination", exitProgram=True)
    if not projects.validate(options):
        logger.writeError("Project failed validation", exitProgram=True)

    mainTargetPath = projects.targets[0].origPath
    if utilityFunctions.isURL(mainTargetPath):
        mainTargetPath = utilityFunctions.URLToFilename(mainTargetPath)
    mainTargetName, mainTargetVersion = utilityFunctions.splitFileName(mainTargetPath)
    if mainTargetVersion != "":
        projects.name = mainTargetName + "-" + mainTargetVersion
    else:
        projects.name = mainTargetName
    projects.path = projects.name + ".md"

    for target in projects.targets:
        target.outputPath = ""

    if projects.examine(options):
        logger.writeMessage("\nFinal targets...\n\n" + str(projects))
        projects.write()

    utilityFunctions.removeDir(tempDir)
    return projects, partialImport
示例#18
0
    for target_id in targets:
        print "targets: ", targets

        factories = conn.factories( target_id )
        print "factories: ", factories

        for factory_id, ident in factories:
            print "factory_id ", factory_id, ident
            instances = conn.instances( ident )
            print "instances: ", instances

            for instance_id in instances:
                print "info %s" % instance_id
                info = conn.instance_info(instance_id)
                print info
                assert info['scout'] == True

                print "upgrade elite"
                res = conn.instance_upgrade(instance_id)
                print "res: %s" % res

                info = conn.instance_info(instance_id)
                print info
                if res:
                    assert info['upgradable'] == True

if __name__ == "__main__":
    import logger
    logger.setLogger(debug=True)
    test()
示例#19
0
def importTargets(options):
    logger.setLogger("console")

    finalTargets = []
    ignoredTargets = []
    partialImport = False
    fetchStep = commands.BuildStep("fetch", commands.__getFetchCommand(None))
    unpackStep = commands.BuildStep("unpack",
                                    commands.__getUnpackCommand(None))

    tempDir = tempfile.mkdtemp(prefix="mixdown-")
    options.downloadDir = os.path.join(tempDir, "mdDownloads")

    while len(options.targetsToImport) != 0:
        target = options.targetsToImport.pop(0)

        logger.writeMessage("Analyzing target...", target.name)
        logger.writeMessage("Extracting target...", target.name)

        target.outputPath = os.path.join(tempDir, target.name)
        target.currBuildStep = fetchStep
        if not commands.buildStepActor(target, options, None):
            utilityFunctions.removeDir(tempDir)
            return None, False
        target.currBuildStep = unpackStep
        if not commands.buildStepActor(target, options, None):
            utilityFunctions.removeDir(tempDir)
            return None, False

        #Generate build files and find possible dependencies
        possibleDeps = []
        if cmake.isCMakeProject(target.path):
            logger.writeMessage("CMake project found...", target.name)
            logger.writeMessage("Analyzing for dependencies...", target.name)
            possibleDeps = cmake.getDependencies(target.path, target.name)
        elif autoTools.isAutoToolsProject(target.path):
            logger.writeMessage("Auto Tools project found...", target.name)
            if not os.path.exists(os.path.join(target.path, "configure")):
                if not autoTools.generateConfigureFiles(
                        target.path, target.name):
                    utilityFunctions.removeDir(tempDir)
                    return None, False
            logger.writeMessage("Analyzing for dependencies...", target.name)
            possibleDeps = autoTools.getDependencies(target.path, target.name)
            if possibleDeps == None:
                target.comment = "Unable to parse 'configure --help' output. MixDown cannot determine dependencies for this target."
                logger.writeError(target.comment, target.name)
                partialImport = True
                possibleDeps = []
        elif make.isMakeProject(target.path):
            target.comment = "Make project found. MixDown cannot determine dependencies from Make projects."
            logger.writeError(target.comment, target.name)
            partialImport = True
        else:
            target.comment = "Unknown build system found.  MixDown cannot determine dependencies or build commands."
            logger.writeError(target.comment, target.name)
            partialImport = True

        #Find actual dependencies
        for possibleDependency in possibleDeps:
            if getTarget(possibleDependency,
                         finalTargets + options.targetsToImport):
                logger.writeMessage(
                    "Known dependency found (" + possibleDependency + ")",
                    target.name)
                target.dependsOn.append(possibleDependency)
                continue
            elif options.interactive and possibleDependency in ignoredTargets:
                logger.writeMessage(
                    "Previously ignored dependency found (" +
                    possibleDependency + ")", target.name)
                continue

            if searchForPossibleAliasInList(
                    possibleDependency, finalTargets + options.targetsToImport,
                    options.interactive):
                target.dependsOn.append(possibleDependency)
            elif not options.interactive:
                logger.writeMessage(
                    "Ignoring unknown dependency (" + possibleDependency + ")",
                    target.name)
            else:
                logger.writeMessage(
                    "Unknown dependency found (" + possibleDependency + ")",
                    target.name)
                userInput = raw_input(
                    possibleDependency +
                    ": Input location, target name, or blank to ignore:"
                ).strip()
                if userInput == "":
                    ignoredTargets.append(possibleDependency)
                elif os.path.isfile(userInput) or os.path.isdir(
                        userInput) or utilityFunctions.isURL(userInput):
                    name = target.targetPathToName(userInput)
                    if name == "":
                        return None, False
                    newTarget = target.Target(name, userInput)
                    options.targetsToImport.append(newTarget)
                    if target.normalizeName(
                            possibleDependency) != target.normalizeName(
                                userInput):
                        newTarget.aliases.append(possibleDependency)
                    target.dependsOn.append(possibleDependency)
                else:
                    aliasTarget = getTarget(
                        userInput, finalTargets + options.targetsToImport,
                        possibleDependency)
                    if aliasTarget != None:
                        logger.writeMessage("Alias added (" + userInput + ")",
                                            aliasTarget.name)
                        target.dependsOn.append(possibleDependency)
                    else:
                        aliasLocation = raw_input(
                            userInput +
                            ": Target name not found in any known targets.  Location of new target:"
                        ).strip()
                        if os.path.isfile(aliasLocation) or os.path.isdir(
                                aliasLocation) or utilityFunctions.isURL(
                                    aliasLocation):
                            name = target.targetPathToName(aliasLocation)
                            if name == "":
                                return None, False
                            newTarget = target.Target(name, aliasLocation)
                            notReviewedTargets.append(newTarget)
                            if target.normalizeName(possibleDependency
                                                    ) != target.normalizeName(
                                                        aliasLocation):
                                newTarget.aliases.append(possibleDependency)
                            target.dependsOn.append(possibleDependency)
                        else:
                            logger.writeError(
                                userInput + ": Alias location not understood.",
                                exitProgram=True)

        finalTargets.append(target)

    #Create project for targets
    projects = project.Project("ProjectNameNotDetermined", finalTargets)

    if not projects.examine(options):
        logger.writeError("Project failed examination", exitProgram=True)
    if not projects.validate(options):
        logger.writeError("Project failed validation", exitProgram=True)

    mainTargetPath = projects.targets[0].origPath
    if utilityFunctions.isURL(mainTargetPath):
        mainTargetPath = utilityFunctions.URLToFilename(mainTargetPath)
    mainTargetName, mainTargetVersion = utilityFunctions.splitFileName(
        mainTargetPath)
    if mainTargetVersion != "":
        projects.name = mainTargetName + "-" + mainTargetVersion
    else:
        projects.name = mainTargetName
    projects.path = projects.name + ".md"

    for target in projects.targets:
        target.outputPath = ""

    if projects.examine(options):
        logger.writeMessage("\nFinal targets...\n\n" + str(projects))
        projects.write()

    utilityFunctions.removeDir(tempDir)
    return projects, partialImport
示例#20
0
class DataLoad:
    ####################################################
    # ログ宣言
    ####################################################
    log = logging.getLogger(__name__)
    logger.setLogger(log)

    #定数宣言
    CABIN_FARE_SAMPLING = 10 #1Cabin当たりの料金の刻み幅
    
    # constructor
    def __init__(self, file_path):
        self.log.info('init start')

        # Load training data
        tmp_df = pd.read_csv(file_path, header=0)
        #LabelEncoderのインスタンスを生成
        le = LabelEncoder()

        #データ編集、データ補完
        # Sex(Gender)
        # Convert "Sex" to be a dummy variable (female = 0, Male = 1)
        tmp_df["Gender"] = tmp_df["Sex"].map({"female": 0, "male": 1}).astype(int)

        # honorific
        #名前に敬称が付いており、生存率に影響すると思われるため、敬称項目を追加する
        # カンマ区切りで分割
        tmp_name = tmp_df['Name'].str.split(', |. ', expand=True)

        # 列を追加
        tmp_df['Honorific'] = tmp_name[1]
        tmp_df.loc[tmp_df.Honorific.isnull(), "Honorific"] = "None"

        #ラベルを覚えさせる
        le = le.fit(tmp_df['Honorific'])
        #ラベルを整数に変換
        tmp_df['Honorific'] = le.transform(tmp_df['Honorific'])

        # Age
        #チケットクラスの購入年層は異なると思われるため、チケットクラス毎に中央値を算出する
        median_age1 = tmp_df[tmp_df["Pclass"] == 1]["Age"].dropna().median()
        median_age2 = tmp_df[tmp_df["Pclass"] == 2]["Age"].dropna().median()
        median_age3 = tmp_df[tmp_df["Pclass"] == 3]["Age"].dropna().median()
        self.log.debug("Age median_age1={} median_age2={} median_age3={}".format(median_age1,median_age2,median_age3))

        if len(tmp_df.Age[tmp_df.Age.isnull()]) > 0:
            #locを用いてAgeの欠損値がある箇所に対して中央値を配置する
            tmp_df.loc[(tmp_df.Age.isnull())&(tmp_df["Pclass"] == 1), "Age"] = median_age1
            tmp_df.loc[(tmp_df.Age.isnull())&(tmp_df["Pclass"] == 2), "Age"] = median_age2
            tmp_df.loc[(tmp_df.Age.isnull())&(tmp_df["Pclass"] == 3), "Age"] = median_age3
        
        # Parch(同乗の親/子供の数)
        #この項目には乳母が含まれていないとの事なので、1人では乗らないであろう15歳以下の0を1に変更する
        tmp_df.loc[(tmp_df["Age"] <= 15)&(tmp_df["Parch"] == 0), "Parch"] = 1

        # Ticket数のカウント列
        for TicketValue in set(tmp_df["Ticket"].values):
            TicketCnt = (tmp_df["Ticket"] == TicketValue).sum()
            tmp_df.loc[(tmp_df["Ticket"] == TicketValue), "TicketCnt"] = TicketCnt
        
        # Team
        #チケット数と同乗者数が一致しないものがあるので、友人などデータにない情報があると思われる
        #家族や友人などの仲間がいると協力プレイで生存率が高まると考えられるため、仲間(チーム)の人数項目を追加する
        
        # SibSpとParchには自分が含まれていないので+1する。チケット数と比較して大きい方をチームの人数とする
        tmp_df["Team"] = (tmp_df["SibSp"] + tmp_df["Parch"] + 1) 
        tmp_df.loc[tmp_df["Team"] < tmp_df["TicketCnt"], "Team"] = tmp_df["TicketCnt"]

        # TravelAlone
        #一人旅と少人数(8人未満)と大人数(8人以上)で生存率が異なるので、独立した項目を持たせる
        tmp_df['TravelAlone'] = 0
        tmp_df.loc[tmp_df["Team"] == 1, "TravelAlone"] = 1

        # SmallGroup
        tmp_df['SmallGroup'] = 0
        tmp_df.loc[(tmp_df["Team"] > 1)&(tmp_df["Team"] < 8), "SmallGroup"] = 1

        # BigGroup
        tmp_df['BigGroup'] = 0
        tmp_df.loc[tmp_df["Team"] >= 8, "BigGroup"] = 1
        
        # Fare(料金)
        #料金がチケット数の合計(合算)になっているようなので、1人あたりの料金に割り戻す
        tmp_df["Fare"] = tmp_df["Fare"] / tmp_df["TicketCnt"]
        #料金=0も欠損値として扱う
        tmp_df.loc[tmp_df["Fare"] == 0, "Fare"] = None
        #料金を割り戻した後に、クラスチケット毎の中央値を求める
        median_fare = tmp_df["Fare"].dropna().median()
        median_fare1 = tmp_df[tmp_df["Pclass"] == 1]["Fare"].dropna().median()
        median_fare2 = tmp_df[tmp_df["Pclass"] == 2]["Fare"].dropna().median()
        median_fare3 = tmp_df[tmp_df["Pclass"] == 3]["Fare"].dropna().median()
        self.log.debug("Fare median_fare={} median_fare1={} median_fare2={} median_fare3={}".format(median_fare,median_fare1,median_fare2,median_fare3))
        
        if len(tmp_df.Fare[tmp_df.Fare.isnull()]) > 0:
            #locを用いてFareの欠損値がある箇所に対して中央値を配置する
            tmp_df.loc[(tmp_df.Fare.isnull())&(tmp_df["Pclass"] == 1), "Fare"] = median_fare1
            tmp_df.loc[(tmp_df.Fare.isnull())&(tmp_df["Pclass"] == 2), "Fare"] = median_fare2
            tmp_df.loc[(tmp_df.Fare.isnull())&(tmp_df["Pclass"] == 3), "Fare"] = median_fare3
        
        #Cabin項目を見ると、1チケットで複数のCabinを取っているケースがあるため、1Cabinあたりの料金も求めておく
        tmp_df["CabinCnt"] = tmp_df["Cabin"].str.count(" ")+1
        tmp_df.loc[(tmp_df.Cabin.isnull()), "CabinCnt"] = 1
        #CABIN_FARE_SAMPLINGで設定した刻み幅で1Cabin当たりの料金を保持する
        tmp_df["CabinFare"] = self.CABIN_FARE_SAMPLING * (((tmp_df["Fare"] / tmp_df["CabinCnt"]) // self.CABIN_FARE_SAMPLING) + 1)
        
        # Cabin(客室番号)
        #Cabinは、A*:0、B*:1、C*:2、D*:3、E*:4、F*:5、G*:6、T*:7とする(複数存在時は上位層に合わせる)
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("A")), "CabinRank"] = 0
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("B")), "CabinRank"] = 1
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("C")), "CabinRank"] = 2
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("D")), "CabinRank"] = 3
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("E")), "CabinRank"] = 4
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("F")), "CabinRank"] = 5
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("G")), "CabinRank"] = 6
        tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("T")), "CabinRank"] = 7
        
        #Cabinは欠損値が多いため、いくつかの情報を持ち合わせて推測する
        before_median_CabinRank = 8 #一旦8で初期化する
        for CabinFareValue in sorted(set(tmp_df["CabinFare"].values)):
            #1Cabin当たりの料金に対してのCabinRankの中央値を求める
            median_CabinRank = tmp_df[tmp_df["CabinFare"] == CabinFareValue]["CabinRank"].dropna().median()
        
            # 取得不可(null値)の場合は一つ前のレベルで代用する
            if np.isnan(median_CabinRank):
                median_CabinRank = before_median_CabinRank
            
            #中央値をセットする
            tmp_df.loc[(tmp_df.CabinRank.isnull())&(tmp_df["CabinFare"] == CabinFareValue), "CabinRank"] = median_CabinRank
            self.log.debug("median_CabinRank CabinFareValue={} median_CabinRank={}".format(CabinFareValue,median_CabinRank))
            #一つ前のレベルをキープする
            before_median_CabinRank = median_CabinRank

        # Embarked(出向地)
        #欠損値はデータ量の多い「S」とする
        #Embarkedは、S:0、C:1、Q:2とする
        tmp_df.loc[(tmp_df.Embarked.isnull()), "Embarked"] = "S"
        tmp_df["Embarked_NUM"] = tmp_df["Embarked"].map({"S": 0, "C": 1, "Q": 2}).astype(int)


        self.df = tmp_df

        self.log.info('init end')

    # 該当項目の取得
    def getValues(self,param):
        return self.df[param].values
示例#21
0
文件: logger.py 项目: ewust/blokus
    log(ERROR, data)

def fatal(data):
    log(FATAL, data)

def none(data):
    # Uh...what?
    pass 


if __name__ == "__main__":
    # Unit test/example usage:
    import logger

    # Set the logging type you want to use (stdout logging):
    logger.setLogger(FileLogger(sys.stdout))

    # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE)
    logger.setLogLevel(logger.TRACE)

    # Log a message:
    logger.log(logger.INFO, "logger!")

    time.sleep(0.01)

    # Alternatively, use 
    logger.error("errrrr")

    logger.trace("some trace data: %d - %f - %s" % (5, 8.3, 'cows'))

示例#22
0
def main():
    parser = argparse.ArgumentParser(
        description="Sample new trajectories with a social LSTM")
    parser.add_argument(
        "modelParams",
        type=str,
        help=
        "Path to the file or folder with the parameters of the experiments",
    )
    parser.add_argument(
        "-l",
        "--logLevel",
        help="logging level of the logger. Default is INFO",
        metavar="level",
        type=str,
    )
    parser.add_argument(
        "-f",
        "--logFolder",
        help=
        "path to the folder where to save the logs. If None, logs are only printed in stderr",
        metavar="path",
        type=str,
    )
    parser.add_argument(
        "-ns",
        "--noSaveCoordinates",
        help="Flag to not save the predicted and ground truth coordinates",
        action="store_true",
    )
    args = parser.parse_args()

    if os.path.isdir(args.modelParams):
        names_experiments = os.listdir(args.modelParams)
        experiments = [
            os.path.join(args.modelParams, experiment)
            for experiment in names_experiments
        ]
    else:
        experiments = [args.modelParams]

    # Table will show the metrics of each experiment
    results = BeautifulTable()
    results.column_headers = ["Name experiment", "ADE", "FDE"]

    for experiment in experiments:
        # Load the parameters
        hparams = utils.YParams(experiment)
        # Define the logger
        setLogger(hparams, args, PHASE)

        remainSpaces = 29 - len(hparams.name)
        logging.info(
            "\n" +
            "--------------------------------------------------------------------------------\n"
            + "|                            Sampling experiment: " +
            hparams.name + " " * remainSpaces + "|\n" +
            "--------------------------------------------------------------------------------\n"
        )

        trajectory_size = hparams.obsLen + hparams.predLen

        saveCoordinates = False
        if args.noSaveCoordinates is True:
            saveCoordinates = False
        elif hparams.saveCoordinates:
            saveCoordinates = hparams.saveCoordinates

        if saveCoordinates:
            coordinates_path = os.path.join("coordinates", hparams.name)
            if not os.path.exists("coordinates"):
                os.makedirs("coordinates")

        logging.info("Loading the test datasets...")
        test_loader = utils.DataLoader(
            hparams.dataPath,
            hparams.testDatasets,
            hparams.testMaps,
            hparams.semanticMaps,
            hparams.testMapping,
            hparams.homography,
            num_labels=hparams.numLabels,
            delimiter=hparams.delimiter,
            skip=hparams.skip,
            max_num_ped=hparams.maxNumPed,
            trajectory_size=trajectory_size,
            neighborood_size=hparams.neighborhoodSize,
        )

        logging.info("Creating the test dataset pipeline...")
        dataset = utils.TrajectoriesDataset(
            test_loader,
            val_loader=None,
            batch=False,
            shuffle=hparams.shuffle,
            prefetch_size=hparams.prefetchSize,
        )

        logging.info("Creating the model...")
        start = time.time()
        model = SocialModel(dataset, hparams, phase=PHASE)
        end = time.time() - start
        logging.debug("Model created in {:.2f}s".format(end))

        # Define the path to the file that contains the variables of the model
        model_folder = os.path.join(hparams.modelFolder, hparams.name)
        model_path = os.path.join(model_folder, hparams.name)

        # Create a saver
        saver = tf.train.Saver()

        # Add to the computation graph the evaluation functions
        ade_sequence = utils.average_displacement_error(
            model.new_pedestrians_coordinates[-hparams.predLen:],
            model.pedestrians_coordinates[-hparams.predLen:],
            model.num_peds_frame,
        )

        fde_sequence = utils.final_displacement_error(
            model.new_pedestrians_coordinates[-1],
            model.pedestrians_coordinates[-1],
            model.num_peds_frame,
        )

        ade = 0
        fde = 0
        coordinates_predicted = []
        coordinates_gt = []
        peds_in_sequence = []

        # Zero padding
        padding = len(str(test_loader.num_sequences))

        # ============================ START SAMPLING ============================

        with tf.Session() as sess:
            # Restore the model trained
            saver.restore(sess, model_path)

            # Initialize the iterator of the sample dataset
            sess.run(dataset.init_train)

            logging.info(
                "\n" +
                "--------------------------------------------------------------------------------\n"
                +
                "|                                Start sampling                                |\n"
                +
                "--------------------------------------------------------------------------------\n"
            )

            for seq in range(test_loader.num_sequences):
                logging.info("Sample trajectory number {:{width}d}/{}".format(
                    seq + 1, test_loader.num_sequences, width=padding))

                ade_value, fde_value, coordinates_pred_value, coordinates_gt_value, num_peds = sess.run(
                    [
                        ade_sequence,
                        fde_sequence,
                        model.new_pedestrians_coordinates,
                        model.pedestrians_coordinates,
                        model.num_peds_frame,
                    ])
                ade += ade_value
                fde += fde_value
                coordinates_predicted.append(coordinates_pred_value)
                coordinates_gt.append(coordinates_gt_value)
                peds_in_sequence.append(num_peds)

            ade = ade / test_loader.num_sequences
            fde = fde / test_loader.num_sequences
            logging.info("Sampling finished. ADE: {:.4f} FDE: {:.4f}".format(
                ade, fde))
            results.append_row([hparams.name, ade, fde])

            if saveCoordinates:
                coordinates_predicted = np.array(coordinates_predicted)
                coordinates_gt = np.array(coordinates_gt)
                saveCoords(
                    coordinates_predicted,
                    coordinates_gt,
                    peds_in_sequence,
                    hparams.predLen,
                    coordinates_path,
                )
        tf.reset_default_graph()
    logging.info("\n{}".format(results))