def main(): """ main path of execution """ # parse arguments args = parseArguments() # get and iterate images path = os.path.join(args.image_path, '**') images = glob.glob(os.path.join(path, '*_footprint.tif')) for image in images: # extract uid from image pathname uid = getUniqueId(image) if uid is not None: # create image chips getImageChips(image, os.path.join(args.out_path, uid), crops=[1536, 2048, 3072, 4096], resize=512) return
def main(): """ main path of execution """ # parse arguments args = parseArguments() df = pd.read_csv( args.inventory_file ) aoi = AoI() # iterate across images images = glob.glob( os.path.join( os.path.join( args.image_path, '**' ), '*.tif' ) ) for image in images: # get unique id from pathname uid = getUniqueId( image ) if uid is not None: # get corresponding record in data table record = df[ df[ 'uid' ] == uid ] if len( record ) == 1: # get geojson files for uid path = os.path.join( args.footprint_path, '{}'.format( uid ) ) footprints = glob.glob( os.path.join( path, '{}*.geojson' ).format( uid ) ) if len( footprints ) > 0: # create output path out_path = os.path.join( args.out_path, uid ) if not os.path.exists( out_path ): os.makedirs( out_path ) # get output pathname out_pathname = os.path.join( out_path, '{uid}_footprint.tif'.format ( uid=uid ) ) if not os.path.exists( out_pathname ): print ( 'processing: {}'.format( out_pathname ) ) # get footprint binary mask - if sufficient data available epsg = aoi.getEpsg( ( record[ 'longitude' ].iloc[0], record[ 'latitude'].iloc[0] ) ) out = getGeocodedMask( image, footprints, '-of GTiff -t_srs epsg:{epsg} -co TILED=YES -co COMPRESS=DEFLATE'.format( epsg=epsg ), out_path ) # created utm masked image if out is not None: getFootprintImage( out, out_pathname ) return
def getImageDataFrame( image_path, crops=[2048] ): """ placeholder """ # get image chips selected by crop size path = os.path.join( image_path, '**' ) data = { 'image' : [], 'uid' : [] } for crop in [ 1536, 2048, 3072, 4096 ]: data[ 'image' ].extend ( glob.glob( os.path.join( path, '*_footprint_{}_*.jpg'.format( crop ) ) ) ) # get uids and convert dict to dataframe data[ 'uid' ] = [ getUniqueId( image ) for image in data[ 'image' ] ] return pd.DataFrame.from_dict( data )
def getImageDataFrame(image_path, crops=[2048]): """ placeholder """ # get image chips selected by crop size sub_dirs = glob.glob(os.path.join(image_path, '*')) data = {'image': [], 'uid': []} for sub_dir in sub_dirs: images = glob.glob(os.path.join(sub_dir, '*.jpg')) images.sort() data['image'].extend(images[-2:]) # get uids and convert dict to dataframe data['uid'] = [getUniqueId(image) for image in data['image']] return pd.DataFrame.from_dict(data)
def main(): """ main path of execution """ # parse arguments args = parseArguments() args.image_size = 256 # load pre-trained model from file model, model_type = loadFromFile(args.model_path) # select preprocess_input wrapper module = importlib.import_module( 'keras.applications.{}'.format(model_type)) preprocess_input = module.preprocess_input datagen = ImageDataGenerator(preprocessing_function=preprocess_input) scaler = MinMaxScaler() # plot sample size plots and loss diagnostics plotSampleSizes(args.data_path) plotDiagnostics(args.model_path) # read dataframe and normalise target df_train = pd.read_csv(os.path.join(args.data_path, 'train.csv')) df_train['target'] = scaler.fit_transform(df_train[['target']]) #df_train = getPrediction( datagen, model, df_train, os.path.join( args.data_path, 'train' ) ) # read dataframe and normalise target df_test = pd.read_csv(os.path.join(args.data_path, 'test.csv')) df_test['target'] = scaler.transform(df_test[['target']]) #df_test = getPrediction( datagen, model, df_test, os.path.join( args.data_path, 'test' ) ) # plot regression #plotRegression( [ df_train, df_test ] ) # finally run model against unlabelled images - unknown capacity path = os.path.join(args.data_path, 'unlabelled') it = datagen.flow_from_directory(path, classes=['test'], color_mode='rgb', shuffle=False, batch_size=1, target_size=(args.image_size, args.image_size)) # evaluate probabilities y_pred = model.predict_generator(it) # compile results records = [] for idx, filename in enumerate(it.filenames): # assign label and confidence records.append({ 'uid': getUniqueId(filename), 'capacity': float(np.exp(y_pred[idx])) }) # convert to dataframe df = pd.DataFrame.from_dict(records) # compute mean for each uid - drop duplicates df['mean'] = df.groupby(['uid']).capacity.transform('mean') df.drop_duplicates(subset='uid', keep='first', inplace=True) df = df.drop(columns=['capacity']) for idx, row in df.iterrows(): print(row['uid'], row['mean']) # create figure fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 6)) axes.set_title( 'Model Predicted Capacity for Unlabelled Cement Factory Sites') axes.set_ylabel('Mt / year') axes.set_xticks(range(0, len(df))) axes.tick_params(axis='both', which='major', labelsize=8) axes.set_xticklabels(df['uid'].tolist(), rotation=90) axes.plot(df['mean'].tolist()) # show figure fig.tight_layout(rect=[0, 0.05, 1, 0.95]) plt.show() return
def main(): """ main path of execution """ # parse arguments args = parseArguments() args.image_size = 256 # load model model, args.model = loadFromFile(args.model_path) plotSampleSizes(args.data_path) # select preprocess_input wrapper module = importlib.import_module('keras.applications.{}'.format( args.model)) preprocess_input = module.preprocess_input # create test generator and compute results datagen = ImageDataGenerator(preprocessing_function=preprocess_input) train_cm, train_wronguns = getResults(model, datagen, args, 'train') test_cm, test_wronguns = getResults(model, datagen, args, 'test') # plot confusion matrices plotConfusionMatrix([train_cm, test_cm]) # finally run model against unlabelled images - unknown production type path = os.path.join(args.data_path, 'unlabelled') it = datagen.flow_from_directory(path, classes=['test'], color_mode='rgb', shuffle=False, batch_size=1, target_size=(args.image_size, args.image_size)) # evaluate probabilities y_pred = model.predict_generator(it) # compile results records = [] for idx, filename in enumerate(it.filenames): # assign label and confidence label = 'wet' if y_pred[idx] > 0.5 else 'dry' confidence = 'high' if y_pred[idx] > 0.9 or y_pred[idx] < 0.1 else 'low' records.append({ 'uid': getUniqueId(filename), 'label': label, 'probability': y_pred[idx], 'confidence': confidence }) # convert to dataframe df = pd.DataFrame.from_dict(records) df.sort_values('label') print('Total number of unseen images: {}'.format(len(it.filenames))) print('High confidence dry predictions: {}'.format( len(df[(df['confidence'] == 'high') & (df['label'] == 'dry')]))) print('High confidence wet predictions: {}'.format( len(df[(df['confidence'] == 'high') & (df['label'] == 'wet')]))) for idx, row in df.iterrows(): print(row['uid'], row['probability'], row['label'], row['confidence']) # create figure fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(4, 4)) for idx, s in enumerate(['dry', 'wet']): df_subset = df[(df['label'] == s)] count = [ len(df_subset[(df_subset['confidence'] == 'high')]), len(df_subset[(df_subset['confidence'] == 'low')]) ] axes[idx].barh(['high', 'low'], count) axes[idx].set_title('Predictions: {}'.format(s)) # show figure fig.tight_layout(rect=[0, 0.05, 1, 0.95]) plt.show() return