def main(): """ """ logger = setLogger() currencies_responses = get_responses(200) logger.debug("Got responses") currencies_soups = collect_soups(currencies_responses) logger.debug("Got soups") curr_short_rate_names = collect_short_rate_names(currencies_soups) logger.debug("Collected short rate names") short_rate_names_to_parse = prepare_for_parse(curr_short_rate_names) logger.debug("Got short rate names to parse") short_rate_names_responses = get_short_rate_names_responses( short_rate_names_to_parse) logger.debug("Got short rate names responses") short_rate_names_trees = collect_short_rate_names_trees( short_rate_names_responses) logger.debug("Collected short rate names trees") hash_table = create_hash_table(short_rate_names_trees, short_rate_names_to_parse) logger.debug("Created hash_table") create_currencies_tables('userdb', curr_short_rate_names, hash_table, ) logger.debug("Created currencies table") task.react(graph_id_parser, (curr_short_rate_names,))
class Model_RandomForest: #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) # constructor def __init__(self,param): self.model = GridSearchCV(RandomForestClassifier(random_state=0), param, cv=3, verbose=1,return_train_score=False) # 機械学習 def fit(self,x_train,y_train): self.log.info('fit start') self.model.fit(x_train, y_train) self.log.info('fit end') # Best parameters def grid_search_feature_importances(self,getList): return pd.DataFrame({"feature":getList,"importance":self.model.best_estimator_.feature_importances_}).sort_values(by="importance",ascending=False) # Best parameters def grid_search_best_params(self): return self.model.best_params_ # Best cross-validation def grid_search_best_score(self): return self.model.best_score_ # 結果の取得 def predict(self,test_data): return self.model.predict(test_data).astype(int)
class Model: #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) # constructor def __init__(self): self.model = RandomForestRegressor(n_estimators=100) # modelパラメータの設定 def set_param(self, param): # 将来用のメソッドとして用意する。今のところは未定義(pass) pass # 機械学習 def fit(self, x_train, y_train): self.model = self.model.fit(x_train, y_train) # 結果の取得 def predict(self, test_data): return self.model.predict(test_data).astype(int) #評価(RMSE) def predictScore(self, y_true, y_pred): return np.sqrt(mean_squared_error(y_true, y_pred))
class SubmitCsv: #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) # constructor def __init__(self, file_path): self.file_path = file_path # csv出力 def to_csv(self, param_header, param): submit_file = open(self.file_path, "w", newline="") file_object = csv.writer(submit_file) file_object.writerow(param_header) file_object.writerows(param) submit_file.close()
class Model_SVM: #xgb_model = xgb.XGBRegressor(objective="reg:linear", random_state=42) #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) # constructor def __init__(self, param): self.model = GridSearchCV(SVC(kernel='linear', random_state=None), param, cv=2, verbose=1, return_train_score=False) # 機械学習 def fit(self, x_train, y_train): self.log.info('fit start') self.model.fit(x_train, y_train) self.log.info('fit end') # Best parameters def grid_search_feature_importances(self, getList): return pd.DataFrame({ "feature": getList, "importance": self.model.best_estimator_.feature_importances_ }).sort_values(by="importance", ascending=False) # Best parameters def grid_search_best_params(self): return self.model.best_params_ # Best cross-validation def grid_search_best_score(self): return self.model.best_score_ # 結果の取得 def predict(self, test_data): return self.model.predict(test_data).astype(int)
class Model: #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) # constructor def __init__(self): self.model = RandomForestRegressor(n_estimators=100) # modelパラメータの設定 def set_param(self,param): # 将来用のメソッドとして用意する。今のところは未定義(pass) pass # 機械学習 def fit(self,x_train,y_train): self.model = self.model.fit(x_train, y_train) # 結果の取得 def predict(self,test_data): return self.model.predict(test_data) # 特徴量の重要度の取得 def get_feature_importances(self,x_train): x = x_train.columns.values y = self.model.feature_importances_ return pd.DataFrame({"feature":x,"importance":y}).sort_values(by="importance",ascending=False) #評価(RMSE) def predictScore(self,y_true,y_pred): rmse_val = np.sqrt( np.mean( np.square( np.array(y_true - y_pred) ) ) ) return rmse_val
log(ERROR, data) def fatal(data): log(FATAL, data) def none(data): # Uh...what? pass if __name__ == "__main__": # Unit test/example usage: import logger # Set the logging type you want to use (stdout logging): #logger.setLogger(FileLogger(sys.stdout)) logger.setLogger(NcursesLogger()) # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE) logger.setLogLevel(logger.TRACE) # Log a message: #logger.log(logger.INFO, "logger!") time.sleep(0.01) logger.info("This is a long line, it's pretty long, butitalso hasbig wordsthat areprobably hardtobreak oninan easywayforthe ncurseslib, sowhatdoes itdo then?") logger.info("aa " + "a"*70 + "B") for i in range(20): logger.info("iteration #%d/20" % i) time.sleep(0.3)
def main(): # Parse the arguments received from command line parser = argparse.ArgumentParser(description="Train a social LSTM") parser.add_argument( "modelParams", type=str, help= "Path to the file or folder with the parameters of the experiments", ) parser.add_argument( "-l", "--logLevel", help="logging level of the logger. Default is INFO", metavar="level", type=str, ) parser.add_argument( "-f", "--logFolder", help= "path to the folder where to save the logs. If None, logs are only printed in stderr", type=str, metavar="path", ) args = parser.parse_args() if os.path.isdir(args.modelParams): names_experiments = os.listdir(args.modelParams) experiments = [ os.path.join(args.modelParams, experiment) for experiment in names_experiments ] else: experiments = [args.modelParams] for experiment in experiments: # Load the parameters hparams = utils.YParams(experiment) # Define the logger setLogger(hparams, args, PHASE) remainSpaces = 29 - len(hparams.name) logging.info( "\n" + "--------------------------------------------------------------------------------\n" + "| Training experiment: " + hparams.name + " " * remainSpaces + "|\n" + "--------------------------------------------------------------------------------\n" ) trajectory_size = hparams.obsLen + hparams.predLen logging.info("Loading the training datasets...") train_loader = utils.DataLoader( hparams.dataPath, hparams.trainDatasets, hparams.trainMaps, hparams.semanticMaps, hparams.trainMapping, hparams.homography, num_labels=hparams.numLabels, delimiter=hparams.delimiter, skip=hparams.skip, max_num_ped=hparams.maxNumPed, trajectory_size=trajectory_size, neighborood_size=hparams.neighborhoodSize, ) logging.info("Loading the validation datasets...") val_loader = utils.DataLoader( hparams.dataPath, hparams.validationDatasets, hparams.validationMaps, hparams.semanticMaps, hparams.validationMapping, hparams.homography, num_labels=hparams.numLabels, delimiter=hparams.delimiter, skip=hparams.skip, max_num_ped=hparams.maxNumPed, trajectory_size=trajectory_size, neighborood_size=hparams.neighborhoodSize, ) logging.info( "Creating the training and validation dataset pipeline...") dataset = utils.TrajectoriesDataset( train_loader, val_loader=val_loader, batch=False, shuffle=hparams.shuffle, prefetch_size=hparams.prefetchSize, ) hparams.add_hparam("learningRateSteps", train_loader.num_sequences) logging.info("Creating the model...") start = time.time() model = SocialModel(dataset, hparams, phase=PHASE) end = time.time() - start logging.debug("Model created in {:.2f}s".format(end)) # Define the path to where save the model and the checkpoints if hparams.modelFolder: save_model = True model_folder = os.path.join(hparams.modelFolder, hparams.name) if not os.path.exists(model_folder): os.makedirs(model_folder) os.makedirs(os.path.join(model_folder, "checkpoints")) model_path = os.path.join(model_folder, hparams.name) checkpoints_path = os.path.join(model_folder, "checkpoints", hparams.name) # Create the saver saver = tf.train.Saver() # Zero padding padding = len(str(train_loader.num_sequences)) # ============================ START TRAINING ============================ with tf.Session() as sess: logging.info( "\n" + "--------------------------------------------------------------------------------\n" + "| Start training |\n" + "--------------------------------------------------------------------------------\n" ) # Initialize all the variables in the graph sess.run(tf.global_variables_initializer()) for epoch in range(hparams.epochs): logging.info("Starting epoch {}".format(epoch + 1)) # ==================== TRAINING PHASE ==================== # Initialize the iterator of the training dataset sess.run(dataset.init_train) for sequence in range(train_loader.num_sequences): start = time.time() loss, _ = sess.run([model.loss, model.train_optimizer]) end = time.time() - start logging.info( "{:{width}d}/{} epoch: {} time/Batch = {:.2f}s. Loss = {:.4f}" .format( sequence + 1, train_loader.num_sequences, epoch + 1, end, loss, width=padding, )) # ==================== VALIDATION PHASE ==================== logging.info(" ========== Validation ==========") # Initialize the iterator of the validation dataset sess.run(dataset.init_val) loss_val = 0 for _ in range(val_loader.num_sequences): loss = sess.run(model.loss) loss_val += loss mean_val = loss_val / val_loader.num_sequences logging.info("Epoch: {}. Validation loss = {:.4f}".format( epoch + 1, mean_val)) # Save the model if save_model: logging.info("Saving model...") saver.save( sess, checkpoints_path, global_step=epoch + 1, write_meta_graph=False, ) logging.info("Model saved...") # Save the final model if save_model: saver.save(sess, model_path) tf.reset_default_graph()
ys = val[args.y] zs = val[args.z] ax.scatter(xs, ys, zs, c=c, s=100, label=i) buildLegend(ax, cmap) else: # Else just plot. xs = dat[args.x] ys = dat[args.y] zs = dat[args.z] ax.scatter(xs, ys, zs, s=100) ax.set_xlabel(xlab) ax.set_ylabel(ylab) ax.set_zlabel(ylab) galaxySavefig(fig, args.fig) if __name__ == "__main__": # Command line options args = getOptions() logger = logging.getLogger() if args.debug: sl.setLogger(logger, logLevel="debug") else: sl.setLogger(logger) main(args)
class DataLoad: #################################################### # 定数宣言 #################################################### #FILE_TRAIN_CSV = './input/sales_train.csv' FILE_TRAIN_CSV = './input/sales_train_v2.csv' #FILE_TEST_CSV = './input/test.csv' FILE_TEST_CSV = './input/test2.csv' #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) # constructor def __init__(self): #constructorでは特に処理を行わない pass #トレーニングデータの読み込み def read_train_csv(self): self.log.info('read_train_csv start') #date date_block_num shop_id item_id item_price item_cnt_day #14.01.2013 0 2 11330 149 1 ## Load training data tmp_df = pd.read_csv(self.FILE_TRAIN_CSV, header=0, dtype={ 'date': 'str', 'date_block_num': 'int', 'shop_id': 'int', 'item_id': 'int', 'item_price': 'float', 'item_cnt_day': 'float' }) tmp_df = tmp_df.groupby(['date_block_num', 'shop_id', 'item_id'], as_index=False).agg({ #'item_price':np.mean, 'item_cnt_day': np.sum }) self.df = tmp_df self.log.info('read_train_csv end') #トレーニングデータの読み込み def read_test_csv(self): self.log.info('read_test_csv start') #ID shop_id item_id #20400 2 5037 ## Load test data tmp_df = pd.read_csv(self.FILE_TEST_CSV, header=0, dtype={ 'ID': 'str', 'shop_id': 'int', 'item_id': 'int' }) self.df_test = tmp_df self.log.info('read_test_csv end') # トレーニングデータの取得 def getTrainValues(self): return self.df # テストデータの取得 def getTestValues(self): return self.df_test
header = np.array(['PC{}'.format(x + 1) for x in range(loadings.shape[1])]) compoundIndex = np.hstack([df_wide.index.name, df_wide.index]) sampleIndex = np.hstack(['sampleID', df_wide.columns]) # Create loadings output loadHead = np.vstack([header, loadings]) loadIndex = np.column_stack([sampleIndex, loadHead]) loadOut = np.vstack([block, loadIndex]) # Create scores output scoreHead = np.vstack([header, scores]) scoreIndex = np.column_stack([compoundIndex, scoreHead]) scoreOut = np.vstack([block, scoreIndex]) # Save output np.savetxt(args.lname, loadOut, fmt='%s', delimiter='\t') np.savetxt(args.sname, scoreOut, fmt='%s', delimiter='\t') if __name__ == '__main__': # Command line options args = getOptions() logger = logging.getLogger() if args.debug: sl.setLogger(logger, logLevel='debug') else: sl.setLogger(logger) main(args)
class DataLoad: #################################################### # 定数宣言 #################################################### FILE_TRAIN_CSV = '../input/sales_train_v2.csv' FILE_TEST_CSV = '../input/test.csv' # FILE_TRAIN_CSV = '../input/sales_train_sample.csv' # FILE_TEST_CSV = '../input/test_sample.csv' FILE_ITEM_CATEGORIES_CSV = '../input/item_categories.csv' FILE_ITEMS_CSV = '../input/items.csv' # FILE_SHOPS_CSV = '../input/shops.csv' #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) # constructor def __init__(self, windows_size): #CSVデータの読み込み self.log.info('DataLoad constructor start') ## Load test data test_df_csv = pd.read_csv(self.FILE_TEST_CSV, header=0, dtype={ 'ID': 'str', 'shop_id': 'int', 'item_id': 'int' }) ## Load training data train_df_csv = pd.read_csv(self.FILE_TRAIN_CSV, header=0, dtype={ 'date': 'str', 'date_block_num': 'int', 'shop_id': 'int', 'item_id': 'int', 'item_price': 'float', 'item_cnt_day': 'float' }) ## Load FILE_ITEM_CATEGORIES_CSV data item_categories_df_csv = pd.read_csv(self.FILE_ITEM_CATEGORIES_CSV, header=0, dtype={ 'item_category_name': 'str', 'item_category_id': 'int' }) ## Load FILE_ITEMS_CSV data items_df_csv = pd.read_csv(self.FILE_ITEMS_CSV, header=0, dtype={ 'item_name': 'str', 'item_id': 'int', 'item_category_id': 'int' }) # # Load FILE_SHOPS_CSV data # shops_df_csv = pd.read_csv(self.FILE_SHOPS_CSV, header=0, # dtype = { # 'shop_name':'str', # 'shop_id':'int'}) # 売上高(日別)を追加する train_df_csv['item_sales_day'] = train_df_csv[ 'item_price'] * train_df_csv['item_cnt_day'] # testデータをtrainデータに合わせる train_df = pd.DataFrame() for i in range(35): tmp = test_df_csv[['shop_id', 'item_id']] tmp['date_block_num'] = i train_df = pd.concat([train_df, tmp], axis=0) # item別に売り上げ数、売上高を集計する item_mon_train_df = train_df_csv.groupby( ['date_block_num', 'item_id'], as_index=False).agg({ 'item_sales_day': np.sum, 'item_cnt_day': np.sum }).rename( columns={ 'item_cnt_day': 'only_item_cnt_month', 'item_sales_day': 'only_item_sales_month' }) # item別の最高額を編集する item_max_train_df = train_df_csv.groupby( ['date_block_num', 'item_id'], as_index=False).agg({ 'item_price': np.max }).rename(columns={'item_price': 'item_price_max'}) # shop別の金額平均を求める mean_train_df = train_df_csv.groupby( ['date_block_num', 'shop_id', 'item_id'], as_index=False).agg({ 'item_price': np.mean }).rename(columns={'item_price': 'item_price_mean'}) # 月別に集計する mon_train_df = train_df_csv.groupby( ['date_block_num', 'shop_id', 'item_id'], as_index=False).agg({ 'item_sales_day': np.sum, 'item_cnt_day': np.sum }).rename(columns={ 'item_cnt_day': 'item_cnt_month', 'item_sales_day': 'item_sales_month' }) # 0~20の範囲でクリップする mon_train_df['item_cnt_month'] = mon_train_df['item_cnt_month'].clip( 0, 20) # item別に売り上げ数、売上高、shop別のitemの最高額の結果をマージする mon_train_df = pd.merge(mon_train_df, item_mon_train_df, on=['date_block_num', 'item_id'], how='left').fillna(0) mon_train_df = pd.merge(mon_train_df, item_max_train_df, on=['date_block_num', 'item_id'], how='left').fillna(0) mon_train_df = pd.merge(mon_train_df, mean_train_df, on=['date_block_num', 'shop_id', 'item_id'], how='left').fillna(0) # testデータに月別集計の結果をマージする train_df = pd.merge(train_df, mon_train_df, on=['date_block_num', 'shop_id', 'item_id'], how='left').fillna(0) # 割引率を求める train_df['discount_rate'] = train_df[ train_df["item_price_max"] != 0]["item_price_mean"] / train_df[ train_df["item_price_max"] != 0]["item_price_max"] train_df.loc[((train_df["item_price_max"] == 0) & (train_df["item_price_mean"] != 0)), "discount_rate"] = 1 # 値がない場合は1とする train_df.loc[((train_df["item_price_max"] == 0) & (train_df["item_price_mean"] == 0)), "discount_rate"] = 0 # shop_id*item_id*date_block_numでソート train_df = train_df.sort_values( ['shop_id', 'item_id', 'date_block_num'], ascending=[True, True, True]).reset_index(drop=True) # lag用データを保持する lag_df = train_df # testデータに月別の売り上げ数をマージする lagNum = list(range(1, windows_size)) lagNum.append(13) for i in lagNum: train_df = pd.concat([ train_df, lag_df.shift(i).rename( columns={'item_cnt_month': 'lag' + str(i)})['lag' + str(i)] ], axis=1) for i in lagNum: train_df = pd.concat([ train_df, lag_df.shift(i).rename( columns={'item_sales_month': 'lag_sales' + str(i)})['lag_sales' + str(i)] ], axis=1) # 売上高はラグの作成により不要となるため、削除する train_df = train_df.drop(columns=['item_sales_month']) # N/Aを0に置換する train_df = train_df.fillna(0) # 前月比の項目を追加する train_df['MoM'] = train_df[train_df["lag2"] != 0]["lag1"] / train_df[ train_df["lag2"] != 0]["lag2"] train_df.loc[((train_df["lag2"] == 0) & (train_df["lag1"] != 0)), "MoM"] = 1 # 前月の値がない場合は1とする train_df.loc[((train_df["lag2"] == 0) & (train_df["lag1"] == 0)), "MoM"] = 0 # 前年同月比の項目を追加する train_df['YoY'] = train_df[train_df["lag13"] != 0]["lag1"] / train_df[ train_df["lag13"] != 0]["lag13"] train_df.loc[((train_df["lag13"] == 0) & (train_df["lag1"] != 0)), "YoY"] = 1 # 前年の値がない場合は1とする train_df.loc[((train_df["lag13"] == 0) & (train_df["lag1"] == 0)), "YoY"] = 0 # # shopsを結合する # train_df = pd.merge(train_df, shops_df_csv, on='shop_id', how='left') # itemsを結合する train_df = pd.merge(train_df, items_df_csv[['item_id', 'item_category_id']], on='item_id', how='left') # item_categoriesを結合する # 末尾に「 - 」を追加して、全行split可能とする item_categories_df_csv['item_category_name'] = pd.DataFrame({ 'item_category_name': item_categories_df_csv['item_category_name'] + " - filler" }) train_df = pd.merge(train_df, item_categories_df_csv, on='item_category_id', how='left') # item_category_nameを「 - 」で分割する train_df['big_category_name'] = train_df['item_category_name'].map( lambda x: x.split(' - ')[0]) train_df['small_category_name'] = train_df['item_category_name'].map( lambda x: x.split(' - ')[1]) train_df = train_df.drop(columns=['item_category_name']) # big_category_nameの名寄せ train_df.loc[train_df['big_category_name'] == 'Чистые носители (шпиль)', 'big_category_name'] = 'Чистые носители ' train_df.loc[train_df['big_category_name'] == 'Чистые носители (штучные)', 'big_category_name'] = 'Чистые носители' # train_df.loc[train_df['big_category_name']=='Игры Android','big_category_name'] = 'Игры' # train_df.loc[train_df['big_category_name']=='Игры MAC','big_category_name'] = 'Игры' # train_df.loc[train_df['big_category_name']=='Игры PC','big_category_name'] = 'Игры' train_df.loc[train_df['big_category_name'] == 'Карты оплаты (Кино, Музыка, Игры)', 'big_category_name'] = 'Карты оплаты' # 集約具合を確認 self.log.info(train_df['big_category_name'].value_counts()) # LabelEncoderの実施 le = LabelEncoder() # train_df['shop_name'] = pd.DataFrame({'shop_name':le.fit_transform(train_df['shop_name'])}) # train_df['item_name'] = pd.DataFrame({'item_name':le.fit_transform(train_df['item_name'])}) train_df['big_category_name'] = pd.DataFrame({ 'big_category_name': le.fit_transform(train_df['big_category_name']) }) train_df['small_category_name'] = pd.DataFrame({ 'small_category_name': le.fit_transform(train_df['small_category_name']) }) # item_idとshop_idを結合して、ユニークNOを作成する train_df['unique_no'] = train_df['item_id'] * 100 + train_df['shop_id'] train_df = train_df.drop(columns=['item_id']) train_df = train_df.drop(columns=['shop_id']) self.df = train_df self.test_df = test_df_csv self.log.info('DataLoad constructor end') # トレーニングデータの取得 def getTrainValues(self): return self.df # テストデータの取得 def getTestValues(self): return self.test_df
def fatal(data): log(FATAL, data) def none(data): # Uh...what? pass if __name__ == "__main__": # Unit test/example usage: import logger # Set the logging type you want to use (stdout logging): logger.setLogger(FileLogger(sys.stdout)) # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE) logger.setLogLevel(logger.TRACE) # Log a message: logger.log(logger.INFO, "logger!") time.sleep(0.01) # Alternatively, use logger.error("errrrr") logger.trace("some trace data: %d - %f - %s" % (5, 8.3, 'cows'))
def fatal(data): log(FATAL, data) def none(data): # Uh...what? pass if __name__ == "__main__": # Unit test/example usage: import logger # Set the logging type you want to use (stdout logging): #logger.setLogger(FileLogger(sys.stdout)) logger.setLogger(NcursesLogger()) # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE) logger.setLogLevel(logger.TRACE) # Log a message: #logger.log(logger.INFO, "logger!") time.sleep(0.01) logger.info( "This is a long line, it's pretty long, butitalso hasbig wordsthat areprobably hardtobreak oninan easywayforthe ncurseslib, sowhatdoes itdo then?" ) logger.info("aa " + "a" * 70 + "B") for i in range(20): logger.info("iteration #%d/20" % i)
#################################################### # インポート #################################################### import data_load import model_RandomForest as model import submit_csv import logger import logging #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) #################################################### # データ読み込み #################################################### log.info('start read data') # トレーニングデータ train_dl = data_load.DataLoad("./input/train.csv") # テストデータ test_dl = data_load.DataLoad("./input/test.csv") log.info('end read data') #################################################### # 分析 #################################################### log.info('start analysis')
if hasattr(dat, 'group'): logger.info('Plotting sample distributions by group') legend1 = pltByTrt(dat, ax1) else: logger.info('Plotting sample distributions') pltBySample(dat, ax1) # Create Legend handles, labels = ax1.get_legend_handles_labels() ax1.legend(handles, labels, ncol=5, loc='upper right', fontsize=10) # Create second legend if there is group information if hasattr(dat, 'group'): ax1.add_artist(legend1) # Plot boxplot of samples pltBoxplot(dat, ax2) plt.savefig(args.ofig, format='pdf') mpld3.save_html(fig, args.ofig2, template_type='simple') if __name__ == '__main__': # Command line options args = getOptions() logger = logging.getLogger() sl.setLogger(logger) main(args)
def importTargets(options): logger.setLogger("console") finalTargets = [] ignoredTargets = [] partialImport = False fetchStep = commands.BuildStep("fetch", commands.__getFetchCommand(None)) unpackStep = commands.BuildStep("unpack", commands.__getUnpackCommand(None)) tempDir = tempfile.mkdtemp(prefix="mixdown-") options.downloadDir = os.path.join(tempDir, "mdDownloads") while len(options.targetsToImport) != 0: target = options.targetsToImport.pop(0) logger.writeMessage("Analyzing target...", target.name) logger.writeMessage("Extracting target...", target.name) target.outputPath = os.path.join(tempDir, target.name) target.currBuildStep = fetchStep if not commands.buildStepActor(target, options, None): utilityFunctions.removeDir(tempDir) return None, False target.currBuildStep = unpackStep if not commands.buildStepActor(target, options, None): utilityFunctions.removeDir(tempDir) return None, False #Generate build files and find possible dependencies possibleDeps = [] if cmake.isCMakeProject(target.path): logger.writeMessage("CMake project found...", target.name) logger.writeMessage("Analyzing for dependencies...", target.name) possibleDeps = cmake.getDependencies(target.path, target.name) elif autoTools.isAutoToolsProject(target.path): logger.writeMessage("Auto Tools project found...", target.name) if not os.path.exists(os.path.join(target.path, "configure")): if not autoTools.generateConfigureFiles(target.path, target.name): utilityFunctions.removeDir(tempDir) return None, False logger.writeMessage("Analyzing for dependencies...", target.name) possibleDeps = autoTools.getDependencies(target.path, target.name) if possibleDeps == None: target.comment = "Unable to parse 'configure --help' output. MixDown cannot determine dependencies for this target." logger.writeError(target.comment, target.name) partialImport = True possibleDeps = [] elif make.isMakeProject(target.path): target.comment = "Make project found. MixDown cannot determine dependencies from Make projects." logger.writeError(target.comment, target.name) partialImport = True else: target.comment = "Unknown build system found. MixDown cannot determine dependencies or build commands." logger.writeError(target.comment, target.name) partialImport = True #Find actual dependencies for possibleDependency in possibleDeps: if getTarget(possibleDependency, finalTargets + options.targetsToImport): logger.writeMessage("Known dependency found (" + possibleDependency + ")", target.name) target.dependsOn.append(possibleDependency) continue elif options.interactive and possibleDependency in ignoredTargets: logger.writeMessage("Previously ignored dependency found (" + possibleDependency + ")", target.name) continue if searchForPossibleAliasInList(possibleDependency, finalTargets + options.targetsToImport, options.interactive): target.dependsOn.append(possibleDependency) elif not options.interactive: logger.writeMessage("Ignoring unknown dependency (" + possibleDependency + ")", target.name) else: logger.writeMessage("Unknown dependency found (" + possibleDependency + ")", target.name) userInput = raw_input(possibleDependency + ": Input location, target name, or blank to ignore:").strip() if userInput == "": ignoredTargets.append(possibleDependency) elif os.path.isfile(userInput) or os.path.isdir(userInput) or utilityFunctions.isURL(userInput): name = target.targetPathToName(userInput) if name == "": return None, False newTarget = target.Target(name, userInput) options.targetsToImport.append(newTarget) if target.normalizeName(possibleDependency) != target.normalizeName(userInput): newTarget.aliases.append(possibleDependency) target.dependsOn.append(possibleDependency) else: aliasTarget = getTarget(userInput, finalTargets + options.targetsToImport, possibleDependency) if aliasTarget != None: logger.writeMessage("Alias added (" + userInput + ")", aliasTarget.name) target.dependsOn.append(possibleDependency) else: aliasLocation = raw_input(userInput + ": Target name not found in any known targets. Location of new target:").strip() if os.path.isfile(aliasLocation) or os.path.isdir(aliasLocation) or utilityFunctions.isURL(aliasLocation): name = target.targetPathToName(aliasLocation) if name == "": return None, False newTarget = target.Target(name, aliasLocation) notReviewedTargets.append(newTarget) if target.normalizeName(possibleDependency) != target.normalizeName(aliasLocation): newTarget.aliases.append(possibleDependency) target.dependsOn.append(possibleDependency) else: logger.writeError(userInput + ": Alias location not understood.", exitProgram=True) finalTargets.append(target) #Create project for targets projects = project.Project("ProjectNameNotDetermined", finalTargets) if not projects.examine(options): logger.writeError("Project failed examination", exitProgram=True) if not projects.validate(options): logger.writeError("Project failed validation", exitProgram=True) mainTargetPath = projects.targets[0].origPath if utilityFunctions.isURL(mainTargetPath): mainTargetPath = utilityFunctions.URLToFilename(mainTargetPath) mainTargetName, mainTargetVersion = utilityFunctions.splitFileName(mainTargetPath) if mainTargetVersion != "": projects.name = mainTargetName + "-" + mainTargetVersion else: projects.name = mainTargetName projects.path = projects.name + ".md" for target in projects.targets: target.outputPath = "" if projects.examine(options): logger.writeMessage("\nFinal targets...\n\n" + str(projects)) projects.write() utilityFunctions.removeDir(tempDir) return projects, partialImport
for target_id in targets: print "targets: ", targets factories = conn.factories( target_id ) print "factories: ", factories for factory_id, ident in factories: print "factory_id ", factory_id, ident instances = conn.instances( ident ) print "instances: ", instances for instance_id in instances: print "info %s" % instance_id info = conn.instance_info(instance_id) print info assert info['scout'] == True print "upgrade elite" res = conn.instance_upgrade(instance_id) print "res: %s" % res info = conn.instance_info(instance_id) print info if res: assert info['upgradable'] == True if __name__ == "__main__": import logger logger.setLogger(debug=True) test()
def importTargets(options): logger.setLogger("console") finalTargets = [] ignoredTargets = [] partialImport = False fetchStep = commands.BuildStep("fetch", commands.__getFetchCommand(None)) unpackStep = commands.BuildStep("unpack", commands.__getUnpackCommand(None)) tempDir = tempfile.mkdtemp(prefix="mixdown-") options.downloadDir = os.path.join(tempDir, "mdDownloads") while len(options.targetsToImport) != 0: target = options.targetsToImport.pop(0) logger.writeMessage("Analyzing target...", target.name) logger.writeMessage("Extracting target...", target.name) target.outputPath = os.path.join(tempDir, target.name) target.currBuildStep = fetchStep if not commands.buildStepActor(target, options, None): utilityFunctions.removeDir(tempDir) return None, False target.currBuildStep = unpackStep if not commands.buildStepActor(target, options, None): utilityFunctions.removeDir(tempDir) return None, False #Generate build files and find possible dependencies possibleDeps = [] if cmake.isCMakeProject(target.path): logger.writeMessage("CMake project found...", target.name) logger.writeMessage("Analyzing for dependencies...", target.name) possibleDeps = cmake.getDependencies(target.path, target.name) elif autoTools.isAutoToolsProject(target.path): logger.writeMessage("Auto Tools project found...", target.name) if not os.path.exists(os.path.join(target.path, "configure")): if not autoTools.generateConfigureFiles( target.path, target.name): utilityFunctions.removeDir(tempDir) return None, False logger.writeMessage("Analyzing for dependencies...", target.name) possibleDeps = autoTools.getDependencies(target.path, target.name) if possibleDeps == None: target.comment = "Unable to parse 'configure --help' output. MixDown cannot determine dependencies for this target." logger.writeError(target.comment, target.name) partialImport = True possibleDeps = [] elif make.isMakeProject(target.path): target.comment = "Make project found. MixDown cannot determine dependencies from Make projects." logger.writeError(target.comment, target.name) partialImport = True else: target.comment = "Unknown build system found. MixDown cannot determine dependencies or build commands." logger.writeError(target.comment, target.name) partialImport = True #Find actual dependencies for possibleDependency in possibleDeps: if getTarget(possibleDependency, finalTargets + options.targetsToImport): logger.writeMessage( "Known dependency found (" + possibleDependency + ")", target.name) target.dependsOn.append(possibleDependency) continue elif options.interactive and possibleDependency in ignoredTargets: logger.writeMessage( "Previously ignored dependency found (" + possibleDependency + ")", target.name) continue if searchForPossibleAliasInList( possibleDependency, finalTargets + options.targetsToImport, options.interactive): target.dependsOn.append(possibleDependency) elif not options.interactive: logger.writeMessage( "Ignoring unknown dependency (" + possibleDependency + ")", target.name) else: logger.writeMessage( "Unknown dependency found (" + possibleDependency + ")", target.name) userInput = raw_input( possibleDependency + ": Input location, target name, or blank to ignore:" ).strip() if userInput == "": ignoredTargets.append(possibleDependency) elif os.path.isfile(userInput) or os.path.isdir( userInput) or utilityFunctions.isURL(userInput): name = target.targetPathToName(userInput) if name == "": return None, False newTarget = target.Target(name, userInput) options.targetsToImport.append(newTarget) if target.normalizeName( possibleDependency) != target.normalizeName( userInput): newTarget.aliases.append(possibleDependency) target.dependsOn.append(possibleDependency) else: aliasTarget = getTarget( userInput, finalTargets + options.targetsToImport, possibleDependency) if aliasTarget != None: logger.writeMessage("Alias added (" + userInput + ")", aliasTarget.name) target.dependsOn.append(possibleDependency) else: aliasLocation = raw_input( userInput + ": Target name not found in any known targets. Location of new target:" ).strip() if os.path.isfile(aliasLocation) or os.path.isdir( aliasLocation) or utilityFunctions.isURL( aliasLocation): name = target.targetPathToName(aliasLocation) if name == "": return None, False newTarget = target.Target(name, aliasLocation) notReviewedTargets.append(newTarget) if target.normalizeName(possibleDependency ) != target.normalizeName( aliasLocation): newTarget.aliases.append(possibleDependency) target.dependsOn.append(possibleDependency) else: logger.writeError( userInput + ": Alias location not understood.", exitProgram=True) finalTargets.append(target) #Create project for targets projects = project.Project("ProjectNameNotDetermined", finalTargets) if not projects.examine(options): logger.writeError("Project failed examination", exitProgram=True) if not projects.validate(options): logger.writeError("Project failed validation", exitProgram=True) mainTargetPath = projects.targets[0].origPath if utilityFunctions.isURL(mainTargetPath): mainTargetPath = utilityFunctions.URLToFilename(mainTargetPath) mainTargetName, mainTargetVersion = utilityFunctions.splitFileName( mainTargetPath) if mainTargetVersion != "": projects.name = mainTargetName + "-" + mainTargetVersion else: projects.name = mainTargetName projects.path = projects.name + ".md" for target in projects.targets: target.outputPath = "" if projects.examine(options): logger.writeMessage("\nFinal targets...\n\n" + str(projects)) projects.write() utilityFunctions.removeDir(tempDir) return projects, partialImport
class DataLoad: #################################################### # ログ宣言 #################################################### log = logging.getLogger(__name__) logger.setLogger(log) #定数宣言 CABIN_FARE_SAMPLING = 10 #1Cabin当たりの料金の刻み幅 # constructor def __init__(self, file_path): self.log.info('init start') # Load training data tmp_df = pd.read_csv(file_path, header=0) #LabelEncoderのインスタンスを生成 le = LabelEncoder() #データ編集、データ補完 # Sex(Gender) # Convert "Sex" to be a dummy variable (female = 0, Male = 1) tmp_df["Gender"] = tmp_df["Sex"].map({"female": 0, "male": 1}).astype(int) # honorific #名前に敬称が付いており、生存率に影響すると思われるため、敬称項目を追加する # カンマ区切りで分割 tmp_name = tmp_df['Name'].str.split(', |. ', expand=True) # 列を追加 tmp_df['Honorific'] = tmp_name[1] tmp_df.loc[tmp_df.Honorific.isnull(), "Honorific"] = "None" #ラベルを覚えさせる le = le.fit(tmp_df['Honorific']) #ラベルを整数に変換 tmp_df['Honorific'] = le.transform(tmp_df['Honorific']) # Age #チケットクラスの購入年層は異なると思われるため、チケットクラス毎に中央値を算出する median_age1 = tmp_df[tmp_df["Pclass"] == 1]["Age"].dropna().median() median_age2 = tmp_df[tmp_df["Pclass"] == 2]["Age"].dropna().median() median_age3 = tmp_df[tmp_df["Pclass"] == 3]["Age"].dropna().median() self.log.debug("Age median_age1={} median_age2={} median_age3={}".format(median_age1,median_age2,median_age3)) if len(tmp_df.Age[tmp_df.Age.isnull()]) > 0: #locを用いてAgeの欠損値がある箇所に対して中央値を配置する tmp_df.loc[(tmp_df.Age.isnull())&(tmp_df["Pclass"] == 1), "Age"] = median_age1 tmp_df.loc[(tmp_df.Age.isnull())&(tmp_df["Pclass"] == 2), "Age"] = median_age2 tmp_df.loc[(tmp_df.Age.isnull())&(tmp_df["Pclass"] == 3), "Age"] = median_age3 # Parch(同乗の親/子供の数) #この項目には乳母が含まれていないとの事なので、1人では乗らないであろう15歳以下の0を1に変更する tmp_df.loc[(tmp_df["Age"] <= 15)&(tmp_df["Parch"] == 0), "Parch"] = 1 # Ticket数のカウント列 for TicketValue in set(tmp_df["Ticket"].values): TicketCnt = (tmp_df["Ticket"] == TicketValue).sum() tmp_df.loc[(tmp_df["Ticket"] == TicketValue), "TicketCnt"] = TicketCnt # Team #チケット数と同乗者数が一致しないものがあるので、友人などデータにない情報があると思われる #家族や友人などの仲間がいると協力プレイで生存率が高まると考えられるため、仲間(チーム)の人数項目を追加する # SibSpとParchには自分が含まれていないので+1する。チケット数と比較して大きい方をチームの人数とする tmp_df["Team"] = (tmp_df["SibSp"] + tmp_df["Parch"] + 1) tmp_df.loc[tmp_df["Team"] < tmp_df["TicketCnt"], "Team"] = tmp_df["TicketCnt"] # TravelAlone #一人旅と少人数(8人未満)と大人数(8人以上)で生存率が異なるので、独立した項目を持たせる tmp_df['TravelAlone'] = 0 tmp_df.loc[tmp_df["Team"] == 1, "TravelAlone"] = 1 # SmallGroup tmp_df['SmallGroup'] = 0 tmp_df.loc[(tmp_df["Team"] > 1)&(tmp_df["Team"] < 8), "SmallGroup"] = 1 # BigGroup tmp_df['BigGroup'] = 0 tmp_df.loc[tmp_df["Team"] >= 8, "BigGroup"] = 1 # Fare(料金) #料金がチケット数の合計(合算)になっているようなので、1人あたりの料金に割り戻す tmp_df["Fare"] = tmp_df["Fare"] / tmp_df["TicketCnt"] #料金=0も欠損値として扱う tmp_df.loc[tmp_df["Fare"] == 0, "Fare"] = None #料金を割り戻した後に、クラスチケット毎の中央値を求める median_fare = tmp_df["Fare"].dropna().median() median_fare1 = tmp_df[tmp_df["Pclass"] == 1]["Fare"].dropna().median() median_fare2 = tmp_df[tmp_df["Pclass"] == 2]["Fare"].dropna().median() median_fare3 = tmp_df[tmp_df["Pclass"] == 3]["Fare"].dropna().median() self.log.debug("Fare median_fare={} median_fare1={} median_fare2={} median_fare3={}".format(median_fare,median_fare1,median_fare2,median_fare3)) if len(tmp_df.Fare[tmp_df.Fare.isnull()]) > 0: #locを用いてFareの欠損値がある箇所に対して中央値を配置する tmp_df.loc[(tmp_df.Fare.isnull())&(tmp_df["Pclass"] == 1), "Fare"] = median_fare1 tmp_df.loc[(tmp_df.Fare.isnull())&(tmp_df["Pclass"] == 2), "Fare"] = median_fare2 tmp_df.loc[(tmp_df.Fare.isnull())&(tmp_df["Pclass"] == 3), "Fare"] = median_fare3 #Cabin項目を見ると、1チケットで複数のCabinを取っているケースがあるため、1Cabinあたりの料金も求めておく tmp_df["CabinCnt"] = tmp_df["Cabin"].str.count(" ")+1 tmp_df.loc[(tmp_df.Cabin.isnull()), "CabinCnt"] = 1 #CABIN_FARE_SAMPLINGで設定した刻み幅で1Cabin当たりの料金を保持する tmp_df["CabinFare"] = self.CABIN_FARE_SAMPLING * (((tmp_df["Fare"] / tmp_df["CabinCnt"]) // self.CABIN_FARE_SAMPLING) + 1) # Cabin(客室番号) #Cabinは、A*:0、B*:1、C*:2、D*:3、E*:4、F*:5、G*:6、T*:7とする(複数存在時は上位層に合わせる) tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("A")), "CabinRank"] = 0 tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("B")), "CabinRank"] = 1 tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("C")), "CabinRank"] = 2 tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("D")), "CabinRank"] = 3 tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("E")), "CabinRank"] = 4 tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("F")), "CabinRank"] = 5 tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("G")), "CabinRank"] = 6 tmp_df.loc[(tmp_df.Cabin.notnull())&(tmp_df["Cabin"].str.contains("T")), "CabinRank"] = 7 #Cabinは欠損値が多いため、いくつかの情報を持ち合わせて推測する before_median_CabinRank = 8 #一旦8で初期化する for CabinFareValue in sorted(set(tmp_df["CabinFare"].values)): #1Cabin当たりの料金に対してのCabinRankの中央値を求める median_CabinRank = tmp_df[tmp_df["CabinFare"] == CabinFareValue]["CabinRank"].dropna().median() # 取得不可(null値)の場合は一つ前のレベルで代用する if np.isnan(median_CabinRank): median_CabinRank = before_median_CabinRank #中央値をセットする tmp_df.loc[(tmp_df.CabinRank.isnull())&(tmp_df["CabinFare"] == CabinFareValue), "CabinRank"] = median_CabinRank self.log.debug("median_CabinRank CabinFareValue={} median_CabinRank={}".format(CabinFareValue,median_CabinRank)) #一つ前のレベルをキープする before_median_CabinRank = median_CabinRank # Embarked(出向地) #欠損値はデータ量の多い「S」とする #Embarkedは、S:0、C:1、Q:2とする tmp_df.loc[(tmp_df.Embarked.isnull()), "Embarked"] = "S" tmp_df["Embarked_NUM"] = tmp_df["Embarked"].map({"S": 0, "C": 1, "Q": 2}).astype(int) self.df = tmp_df self.log.info('init end') # 該当項目の取得 def getValues(self,param): return self.df[param].values
log(ERROR, data) def fatal(data): log(FATAL, data) def none(data): # Uh...what? pass if __name__ == "__main__": # Unit test/example usage: import logger # Set the logging type you want to use (stdout logging): logger.setLogger(FileLogger(sys.stdout)) # Set the most verbose you want to log (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, NONE) logger.setLogLevel(logger.TRACE) # Log a message: logger.log(logger.INFO, "logger!") time.sleep(0.01) # Alternatively, use logger.error("errrrr") logger.trace("some trace data: %d - %f - %s" % (5, 8.3, 'cows'))
def main(): parser = argparse.ArgumentParser( description="Sample new trajectories with a social LSTM") parser.add_argument( "modelParams", type=str, help= "Path to the file or folder with the parameters of the experiments", ) parser.add_argument( "-l", "--logLevel", help="logging level of the logger. Default is INFO", metavar="level", type=str, ) parser.add_argument( "-f", "--logFolder", help= "path to the folder where to save the logs. If None, logs are only printed in stderr", metavar="path", type=str, ) parser.add_argument( "-ns", "--noSaveCoordinates", help="Flag to not save the predicted and ground truth coordinates", action="store_true", ) args = parser.parse_args() if os.path.isdir(args.modelParams): names_experiments = os.listdir(args.modelParams) experiments = [ os.path.join(args.modelParams, experiment) for experiment in names_experiments ] else: experiments = [args.modelParams] # Table will show the metrics of each experiment results = BeautifulTable() results.column_headers = ["Name experiment", "ADE", "FDE"] for experiment in experiments: # Load the parameters hparams = utils.YParams(experiment) # Define the logger setLogger(hparams, args, PHASE) remainSpaces = 29 - len(hparams.name) logging.info( "\n" + "--------------------------------------------------------------------------------\n" + "| Sampling experiment: " + hparams.name + " " * remainSpaces + "|\n" + "--------------------------------------------------------------------------------\n" ) trajectory_size = hparams.obsLen + hparams.predLen saveCoordinates = False if args.noSaveCoordinates is True: saveCoordinates = False elif hparams.saveCoordinates: saveCoordinates = hparams.saveCoordinates if saveCoordinates: coordinates_path = os.path.join("coordinates", hparams.name) if not os.path.exists("coordinates"): os.makedirs("coordinates") logging.info("Loading the test datasets...") test_loader = utils.DataLoader( hparams.dataPath, hparams.testDatasets, hparams.testMaps, hparams.semanticMaps, hparams.testMapping, hparams.homography, num_labels=hparams.numLabels, delimiter=hparams.delimiter, skip=hparams.skip, max_num_ped=hparams.maxNumPed, trajectory_size=trajectory_size, neighborood_size=hparams.neighborhoodSize, ) logging.info("Creating the test dataset pipeline...") dataset = utils.TrajectoriesDataset( test_loader, val_loader=None, batch=False, shuffle=hparams.shuffle, prefetch_size=hparams.prefetchSize, ) logging.info("Creating the model...") start = time.time() model = SocialModel(dataset, hparams, phase=PHASE) end = time.time() - start logging.debug("Model created in {:.2f}s".format(end)) # Define the path to the file that contains the variables of the model model_folder = os.path.join(hparams.modelFolder, hparams.name) model_path = os.path.join(model_folder, hparams.name) # Create a saver saver = tf.train.Saver() # Add to the computation graph the evaluation functions ade_sequence = utils.average_displacement_error( model.new_pedestrians_coordinates[-hparams.predLen:], model.pedestrians_coordinates[-hparams.predLen:], model.num_peds_frame, ) fde_sequence = utils.final_displacement_error( model.new_pedestrians_coordinates[-1], model.pedestrians_coordinates[-1], model.num_peds_frame, ) ade = 0 fde = 0 coordinates_predicted = [] coordinates_gt = [] peds_in_sequence = [] # Zero padding padding = len(str(test_loader.num_sequences)) # ============================ START SAMPLING ============================ with tf.Session() as sess: # Restore the model trained saver.restore(sess, model_path) # Initialize the iterator of the sample dataset sess.run(dataset.init_train) logging.info( "\n" + "--------------------------------------------------------------------------------\n" + "| Start sampling |\n" + "--------------------------------------------------------------------------------\n" ) for seq in range(test_loader.num_sequences): logging.info("Sample trajectory number {:{width}d}/{}".format( seq + 1, test_loader.num_sequences, width=padding)) ade_value, fde_value, coordinates_pred_value, coordinates_gt_value, num_peds = sess.run( [ ade_sequence, fde_sequence, model.new_pedestrians_coordinates, model.pedestrians_coordinates, model.num_peds_frame, ]) ade += ade_value fde += fde_value coordinates_predicted.append(coordinates_pred_value) coordinates_gt.append(coordinates_gt_value) peds_in_sequence.append(num_peds) ade = ade / test_loader.num_sequences fde = fde / test_loader.num_sequences logging.info("Sampling finished. ADE: {:.4f} FDE: {:.4f}".format( ade, fde)) results.append_row([hparams.name, ade, fde]) if saveCoordinates: coordinates_predicted = np.array(coordinates_predicted) coordinates_gt = np.array(coordinates_gt) saveCoords( coordinates_predicted, coordinates_gt, peds_in_sequence, hparams.predLen, coordinates_path, ) tf.reset_default_graph() logging.info("\n{}".format(results))