Python SDS_tools.image_std示例

def calculate_features(im_ms, cloud_mask, im_bool):
    """
    Calculates a range of features on the image that are used for the supervised classification.
    The features include spectral normalized-difference indices and standard deviation of the image.

    KV WRL 2018

    Arguments:
    -----------
        im_ms: np.array
            RGB + downsampled NIR and SWIR
        cloud_mask: np.array
            2D cloud mask with True where cloud pixels are
        im_bool: np.array
            2D array of boolean indicating where on the image to calculate the features

    Returns:    -----------
        features: np.array
            matrix containing each feature (columns) calculated for all
            the pixels (rows) indicated in im_bool
    """

    # add all the multispectral bands
    features = np.expand_dims(im_ms[im_bool,0],axis=1)
    for k in range(1,im_ms.shape[2]):
        feature = np.expand_dims(im_ms[im_bool,k],axis=1)
        features = np.append(features, feature, axis=-1)
    # NIR-G
    im_NIRG = SDS_tools.nd_index(im_ms[:,:,3], im_ms[:,:,1], cloud_mask)
    features = np.append(features, np.expand_dims(im_NIRG[im_bool],axis=1), axis=-1)
    # SWIR-G
    im_SWIRG = SDS_tools.nd_index(im_ms[:,:,4], im_ms[:,:,1], cloud_mask)
    features = np.append(features, np.expand_dims(im_SWIRG[im_bool],axis=1), axis=-1)
    # NIR-R
    im_NIRR = SDS_tools.nd_index(im_ms[:,:,3], im_ms[:,:,2], cloud_mask)
    features = np.append(features, np.expand_dims(im_NIRR[im_bool],axis=1), axis=-1)
    # SWIR-NIR
    im_SWIRNIR = SDS_tools.nd_index(im_ms[:,:,4], im_ms[:,:,3], cloud_mask)
    features = np.append(features, np.expand_dims(im_SWIRNIR[im_bool],axis=1), axis=-1)
    # B-R
    im_BR = SDS_tools.nd_index(im_ms[:,:,0], im_ms[:,:,2], cloud_mask)
    features = np.append(features, np.expand_dims(im_BR[im_bool],axis=1), axis=-1)
    # calculate standard deviation of individual bands
    for k in range(im_ms.shape[2]):
        im_std =  SDS_tools.image_std(im_ms[:,:,k], 1)
        features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
    # calculate standard deviation of the spectral indices
    im_std = SDS_tools.image_std(im_NIRG, 1)
    features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
    im_std = SDS_tools.image_std(im_SWIRG, 1)
    features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
    im_std = SDS_tools.image_std(im_NIRR, 1)
    features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
    im_std = SDS_tools.image_std(im_SWIRNIR, 1)
    features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)
    im_std = SDS_tools.image_std(im_BR, 1)
    features = np.append(features, np.expand_dims(im_std[im_bool],axis=1), axis=-1)

    return features

示例#2

显示文件

文件： SDS_download.py 项目： joshicha/CoastSat

def merge_overlapping_images(metadata,inputs):
    """
    Merge simultaneous overlapping images that cover the area of interest.
    When the area of interest is located at the boundary between 2 images, there 
    will be overlap between the 2 images and both will be downloaded from Google
    Earth Engine. This function merges the 2 images, so that the area of interest 
    is covered by only 1 image.
    
    KV WRL 2018
        
    Arguments:
    -----------
    metadata: dict
        contains all the information about the satellite images that were downloaded
    inputs: dict with the following keys
        'sitename': str
            name of the site
        'polygon': list
            polygon containing the lon/lat coordinates to be extracted,
            longitudes in the first column and latitudes in the second column,
            there are 5 pairs of lat/lon with the fifth point equal to the first point:
            ```
            polygon = [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8],
            [151.3, -33.7]]]
            ```
        'dates': list of str
            list that contains 2 strings with the initial and final dates in 
            format 'yyyy-mm-dd':
            ```
            dates = ['1987-01-01', '2018-01-01']
            ```
        'sat_list': list of str
            list that contains the names of the satellite missions to include: 
            ```
            sat_list = ['L5', 'L7', 'L8', 'S2']
            ```
        'filepath_data': str
            filepath to the directory where the images are downloaded
        
    Returns:
    -----------
    metadata_updated: dict
        updated metadata
            
    """
    
    # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images)    
    sat = 'S2'
    filepath = os.path.join(inputs['filepath'], inputs['sitename'])
    filenames = metadata[sat]['filenames']
    # find the pairs of images that are within 5 minutes of each other
    time_delta = 5*60 # 5 minutes in seconds
    dates = metadata[sat]['dates'].copy()
    pairs = []
    for i,date in enumerate(metadata[sat]['dates']):
        # dummy value so it does not match it again
        dates[i] = pytz.utc.localize(datetime(1,1,1) + timedelta(days=i+1))
        # calculate time difference
        time_diff = np.array([np.abs((date - _).total_seconds()) for _ in dates])
        # find the matching times and add to pairs list
        boolvec = time_diff <= time_delta
        if np.sum(boolvec) == 0:
            continue
        else:
            idx_dup = np.where(boolvec)[0][0]
            pairs.append([i,idx_dup])
                
    # for each pair of image, create a mask and add no_data into the .tif file (this is needed before merging .tif files)
    for i,pair in enumerate(pairs):
        fn_im = []
        for index in range(len(pair)): 
            # get filenames of all the files corresponding to the each image in the pair
            fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[pair[index]]),
                  os.path.join(filepath, 'S2', '20m',  filenames[pair[index]].replace('10m','20m')),
                  os.path.join(filepath, 'S2', '60m',  filenames[pair[index]].replace('10m','60m')),
                  os.path.join(filepath, 'S2', 'meta', filenames[pair[index]].replace('_10m','').replace('.tif','.txt'))])
            # read that image
            im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn_im[index], sat, False) 
            # im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9) 
            
            # in Sentinel2 images close to the edge of the image there are some artefacts, 
            # that are squares with constant pixel intensities. They need to be masked in the 
            # raster (GEOTIFF). It can be done using the image standard deviation, which 
            # indicates values close to 0 for the artefacts.      
            if len(im_ms) > 0:
                # calculate image std for the first 10m band
                im_std = SDS_tools.image_std(im_ms[:,:,0],1)
                # convert to binary
                im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                # dilate to fill the edges (which have high std)
                mask10 = morphology.dilation(im_binary, morphology.square(3))
                # mask all 10m bands
                for k in range(im_ms.shape[2]):
                    im_ms[mask10,k] = np.nan
                # mask the 10m .tif file (add no_data where mask is True)
                SDS_tools.mask_raster(fn_im[index][0], mask10)
                
                # create another mask for the 20m band (SWIR1)
                im_std = SDS_tools.image_std(im_extra,1)
                im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                mask20 = morphology.dilation(im_binary, morphology.square(3))     
                im_extra[mask20] = np.nan
                # mask the 20m .tif file (im_extra)
                SDS_tools.mask_raster(fn_im[index][1], mask20) 
                
                # use the 20m mask to create a mask for the 60m QA band (by resampling)
                mask60 = ndimage.zoom(mask20,zoom=1/3,order=0)
                mask60 = transform.resize(mask60, im_QA.shape, mode='constant', order=0,
                                          preserve_range=True)
                mask60 = mask60.astype(bool)
                # mask the 60m .tif file (im_QA)
                SDS_tools.mask_raster(fn_im[index][2], mask60)    
                            
            else:
                continue
            
            # make a figure for quality control
            # fig,ax= plt.subplots(2,2,tight_layout=True)
            # ax[0,0].imshow(im_RGB)
            # ax[0,0].set_title('RGB original')
            # ax[1,0].imshow(mask10)
            # ax[1,0].set_title('Mask 10m')
            # ax[0,1].imshow(mask20)  
            # ax[0,1].set_title('Mask 20m')
            # ax[1,1].imshow(mask60)
            # ax[1,1].set_title('Mask 60 m')
        
        # once all the pairs of .tif files have been masked with no_data, merge the using gdal_merge
        fn_merged = os.path.join(filepath, 'merged.tif')
        
        # merge masked 10m bands and remove duplicate file
        gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][0], fn_im[1][0]])
        os.chmod(fn_im[0][0], 0o777)
        os.remove(fn_im[0][0])
        os.chmod(fn_im[1][0], 0o777)
        os.remove(fn_im[1][0])
        os.chmod(fn_merged, 0o777)
        os.rename(fn_merged, fn_im[0][0])
        
        # merge masked 20m band (SWIR band)
        gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][1], fn_im[1][1]])
        os.chmod(fn_im[0][1], 0o777)
        os.remove(fn_im[0][1])
        os.chmod(fn_im[1][1], 0o777)
        os.remove(fn_im[1][1])
        os.chmod(fn_merged, 0o777)
        os.rename(fn_merged, fn_im[0][1])
    
        # merge QA band (60m band)
        gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][2], fn_im[1][2]])
        os.chmod(fn_im[0][2], 0o777)
        os.remove(fn_im[0][2])
        os.chmod(fn_im[1][2], 0o777)
        os.remove(fn_im[1][2])
        os.chmod(fn_merged, 0o777)
        os.rename(fn_merged, fn_im[0][2])
        
        # remove the metadata .txt file of the duplicate image
        os.chmod(fn_im[1][3], 0o777)
        os.remove(fn_im[1][3])
        
    print('%d pairs of overlapping Sentinel-2 images were merged' % len(pairs))
    
    # update the metadata dict
    metadata_updated = copy.deepcopy(metadata)
    idx_removed = []
    idx_kept = []
    for pair in pairs: idx_removed.append(pair[1])
    for idx in np.arange(0,len(metadata[sat]['dates'])):
        if not idx in idx_removed: idx_kept.append(idx)
    for key in metadata_updated[sat].keys():
        metadata_updated[sat][key] = [metadata_updated[sat][key][_] for _ in idx_kept]
        
    return metadata_updated

示例#3

显示文件

文件： SDS_download.py 项目： tomatehui/https-github.com-kvos-CoastSat

def merge_overlapping_images(metadata, inputs):
    """
    Merge simultaneous overlapping images that cover the area of interest.
    When the area of interest is located at the boundary between 2 images, there
    will be overlap between the 2 images and both will be downloaded from Google
    Earth Engine. This function merges the 2 images, so that the area of interest
    is covered by only 1 image.

    KV WRL 2018

    Arguments:
    -----------
    metadata: dict
        contains all the information about the satellite images that were downloaded
    inputs: dict with the following keys
        'sitename': str
            name of the site
        'polygon': list
            polygon containing the lon/lat coordinates to be extracted,
            longitudes in the first column and latitudes in the second column,
            there are 5 pairs of lat/lon with the fifth point equal to the first point:
            ```
            polygon = [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8],
            [151.3, -33.7]]]
            ```
        'dates': list of str
            list that contains 2 strings with the initial and final dates in
            format 'yyyy-mm-dd':
            ```
            dates = ['1987-01-01', '2018-01-01']
            ```
        'sat_list': list of str
            list that contains the names of the satellite missions to include:
            ```
            sat_list = ['L5', 'L7', 'L8', 'S2']
            ```
        'filepath_data': str
            filepath to the directory where the images are downloaded

    Returns:
    -----------
    metadata_updated: dict
        updated metadata

    """

    # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images)
    sat = 'S2'
    filepath = os.path.join(inputs['filepath'], inputs['sitename'])
    filenames = metadata[sat]['filenames']

    # find the pairs of images that are within 5 minutes of each other
    time_delta = 5 * 60  # 5 minutes in seconds
    dates = metadata[sat]['dates'].copy()
    pairs = []
    for i, date in enumerate(metadata[sat]['dates']):
        # dummy value so it does not match it again
        dates[i] = pytz.utc.localize(datetime(1, 1, 1) + timedelta(days=i + 1))
        # calculate time difference
        time_diff = np.array(
            [np.abs((date - _).total_seconds()) for _ in dates])
        # find the matching times and add to pairs list
        boolvec = time_diff <= time_delta
        if np.sum(boolvec) == 0:
            continue
        else:
            idx_dup = np.where(boolvec)[0][0]
            pairs.append([i, idx_dup])
    # because they could be triplicates in S2 images, adjust the pairs for consecutive merges
    for i in range(1, len(pairs)):
        if pairs[i - 1][1] == pairs[i][0]:
            pairs[i][0] = pairs[i - 1][0]
    # check also for quadruplicates and remove them
    pair_first = [_[0] for _ in pairs]
    for idx in np.unique(pair_first):
        # quadruplicate if trying to merge 3 times the same image with a successive
        if sum(pair_first == idx) == 3:
            # remove the last image: 3 .tif files + the .txt file
            idx_last = [pairs[_]
                        for _ in np.where(pair_first == idx)[0]][-1][1]
            fn_im = [
                os.path.join(filepath, 'S2', '10m', filenames[idx_last]),
                os.path.join(filepath, 'S2', '20m',
                             filenames[idx_last].replace('10m', '20m')),
                os.path.join(filepath, 'S2', '60m',
                             filenames[idx_last].replace('10m', '60m')),
                os.path.join(
                    filepath, 'S2', 'meta',
                    filenames[idx_last].replace('_10m',
                                                '').replace('.tif', '.txt'))
            ]
            for k in range(4):
                os.chmod(fn_im[k], 0o777)
                os.remove(fn_im[k])
            # remove that pair from the list
            pairs.pop(np.where(pair_first == idx)[0][-1])
    # for each pair of image, first check if one image completely contains the other
    # in that case keep the larger image. Otherwise merge the two images.
    for i, pair in enumerate(pairs):
        # get filenames of all the files corresponding to the each image in the pair
        fn_im = []
        for index in range(len(pair)):
            fn_im.append([
                os.path.join(filepath, 'S2', '10m', filenames[pair[index]]),
                os.path.join(filepath, 'S2', '20m',
                             filenames[pair[index]].replace('10m', '20m')),
                os.path.join(filepath, 'S2', '60m',
                             filenames[pair[index]].replace('10m', '60m')),
                os.path.join(
                    filepath, 'S2', 'meta',
                    filenames[pair[index]].replace('_10m',
                                                   '').replace('.tif', '.txt'))
            ])
        # get polygon for first image
        polygon0 = SDS_tools.get_image_bounds(fn_im[0][0])
        im_epsg0 = metadata[sat]['epsg'][pair[0]]
        # get polygon for second image
        polygon1 = SDS_tools.get_image_bounds(fn_im[1][0])
        im_epsg1 = metadata[sat]['epsg'][pair[1]]
        # check if epsg are the same
        if not im_epsg0 == im_epsg1:
            print(
                'WARNING: there was an error as two S2 images do not have the same epsg,'
                +
                ' please open an issue on Github at https://github.com/kvos/CoastSat/issues'
                + ' and include your script so we can find out what happened.')
            break
        # check if one image contains the other one
        if polygon0.contains(polygon1):
            # if polygon0 contains polygon1, remove files for polygon1
            for k in range(4):  # remove the 3 .tif files + the .txt file
                os.chmod(fn_im[1][k], 0o777)
                os.remove(fn_im[1][k])
            # print('removed 1')
            continue
        elif polygon1.contains(polygon0):
            # if polygon1 contains polygon0, remove image0
            for k in range(4):  # remove the 3 .tif files + the .txt file
                os.chmod(fn_im[0][k], 0o777)
                os.remove(fn_im[0][k])
            # print('removed 0')
            # adjust the order in case of triplicates
            if i + 1 < len(pairs):
                if pairs[i + 1][0] == pair[0]: pairs[i + 1][0] = pairs[i][1]
            continue
        # otherwise merge the two images after masking the nodata values
        else:
            for index in range(len(pair)):
                # read image
                im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(
                    fn_im[index], sat, False)
                # in Sentinel2 images close to the edge of the image there are some artefacts,
                # that are squares with constant pixel intensities. They need to be masked in the
                # raster (GEOTIFF). It can be done using the image standard deviation, which
                # indicates values close to 0 for the artefacts.
                if len(im_ms) > 0:
                    # calculate image std for the first 10m band
                    im_std = SDS_tools.image_std(im_ms[:, :, 0], 1)
                    # convert to binary
                    im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                    # dilate to fill the edges (which have high std)
                    mask10 = morphology.dilation(im_binary,
                                                 morphology.square(3))
                    # mask the 10m .tif file (add no_data where mask is True)
                    SDS_tools.mask_raster(fn_im[index][0], mask10)
                    # now calculate the mask for the 20m band (SWIR1)
                    # for the older version of the ee api calculate the image std again
                    if int(ee.__version__[-3:]) <= 201:
                        # calculate std to create another mask for the 20m band (SWIR1)
                        im_std = SDS_tools.image_std(im_extra, 1)
                        im_binary = np.logical_or(im_std < 1e-6,
                                                  np.isnan(im_std))
                        mask20 = morphology.dilation(im_binary,
                                                     morphology.square(3))
                    # for the newer versions just resample the mask for the 10m bands
                    else:
                        # create mask for the 20m band (SWIR1) by resampling the 10m one
                        mask20 = ndimage.zoom(mask10, zoom=1 / 2, order=0)
                        mask20 = transform.resize(mask20,
                                                  im_extra.shape,
                                                  mode='constant',
                                                  order=0,
                                                  preserve_range=True)
                        mask20 = mask20.astype(bool)
                    # mask the 20m .tif file (im_extra)
                    SDS_tools.mask_raster(fn_im[index][1], mask20)
                    # create a mask for the 60m QA band by resampling the 20m one
                    mask60 = ndimage.zoom(mask20, zoom=1 / 3, order=0)
                    mask60 = transform.resize(mask60,
                                              im_QA.shape,
                                              mode='constant',
                                              order=0,
                                              preserve_range=True)
                    mask60 = mask60.astype(bool)
                    # mask the 60m .tif file (im_QA)
                    SDS_tools.mask_raster(fn_im[index][2], mask60)
                    # make a figure for quality control/debugging
                    # im_RGB = SDS_preprocess.rescale_image_intensity(im_ms[:,:,[2,1,0]], cloud_mask, 99.9)
                    # fig,ax= plt.subplots(2,3,tight_layout=True)
                    # ax[0,0].imshow(im_RGB)
                    # ax[0,0].set_title('RGB original')
                    # ax[1,0].imshow(mask10)
                    # ax[1,0].set_title('Mask 10m')
                    # ax[0,1].imshow(mask20)
                    # ax[0,1].set_title('Mask 20m')
                    # ax[1,1].imshow(mask60)
                    # ax[1,1].set_title('Mask 60 m')
                    # ax[0,2].imshow(im_QA)
                    # ax[0,2].set_title('Im QA')
                    # ax[1,2].imshow(im_nodata)
                    # ax[1,2].set_title('Im nodata')
                else:
                    continue

            # once all the pairs of .tif files have been masked with no_data, merge the using gdal_merge
            fn_merged = os.path.join(filepath, 'merged.tif')
            for k in range(3):
                # merge masked bands
                gdal_merge.main(
                    ['', '-o', fn_merged, '-n', '0', fn_im[0][k], fn_im[1][k]])
                # remove old files
                os.chmod(fn_im[0][k], 0o777)
                os.remove(fn_im[0][k])
                os.chmod(fn_im[1][k], 0o777)
                os.remove(fn_im[1][k])
                # rename new file
                fn_new = fn_im[0][k].split('.')[0] + '_merged.tif'
                os.chmod(fn_merged, 0o777)
                os.rename(fn_merged, fn_new)

            # open both metadata files
            metadict0 = dict([])
            with open(fn_im[0][3], 'r') as f:
                metadict0['filename'] = f.readline().split('\t')[1].replace(
                    '\n', '')
                metadict0['acc_georef'] = float(
                    f.readline().split('\t')[1].replace('\n', ''))
                metadict0['epsg'] = int(f.readline().split('\t')[1].replace(
                    '\n', ''))
            metadict1 = dict([])
            with open(fn_im[1][3], 'r') as f:
                metadict1['filename'] = f.readline().split('\t')[1].replace(
                    '\n', '')
                metadict1['acc_georef'] = float(
                    f.readline().split('\t')[1].replace('\n', ''))
                metadict1['epsg'] = int(f.readline().split('\t')[1].replace(
                    '\n', ''))
            # check if both images have the same georef accuracy
            if np.any(
                    np.array([
                        metadict0['acc_georef'], metadict1['acc_georef']
                    ]) == -1):
                metadict0['georef'] = -1
            # add new name
            metadict0['filename'] = metadict0['filename'].split(
                '.')[0] + '_merged.tif'
            # remove the old metadata.txt files
            os.chmod(fn_im[0][3], 0o777)
            os.remove(fn_im[0][3])
            os.chmod(fn_im[1][3], 0o777)
            os.remove(fn_im[1][3])
            # rewrite the .txt file with a new metadata file
            fn_new = fn_im[0][3].split('.')[0] + '_merged.txt'
            with open(fn_new, 'w') as f:
                for key in metadict0.keys():
                    f.write('%s\t%s\n' % (key, metadict0[key]))

            # update filenames list (in case there are triplicates)
            filenames[pair[0]] = metadict0['filename']

    print(
        '%d out of %d Sentinel-2 images were merged (overlapping or duplicate)'
        % (len(pairs), len(filenames)))

    # update the metadata dict
    metadata_updated = get_metadata(inputs)

    return metadata_updated

示例#4

显示文件

文件： SDS_download.py 项目： sheex2018/CoastSat

def merge_overlapping_images(metadata,inputs):
    """
    When the area of interest is located at the boundary between 2 images, there will be overlap 
    between the 2 images and both will be downloaded from Google Earth Engine. This function 
    merges the 2 images, so that the area of interest is covered by only 1 image.
    
    KV WRL 2018
        
    Arguments:
    -----------
        metadata: dict
            contains all the information about the satellite images that were downloaded
        inputs: dict 
            dictionnary that contains the following fields:
        'sitename': str
            String containig the name of the site
        'polygon': list
            polygon containing the lon/lat coordinates to be extracted,
            longitudes in the first column and latitudes in the second column,
            there are 5 pairs of lat/lon with the fifth point equal to the first point.
            e.g. [[[151.3, -33.7],[151.4, -33.7],[151.4, -33.8],[151.3, -33.8],
            [151.3, -33.7]]]
        'dates': list of str
            list that contains 2 strings with the initial and final dates in format 'yyyy-mm-dd'
            e.g. ['1987-01-01', '2018-01-01']
        'sat_list': list of str
            list that contains the names of the satellite missions to include 
            e.g. ['L5', 'L7', 'L8', 'S2']
        'filepath_data': str
            Filepath to the directory where the images are downloaded
        
    Returns:
    -----------
        metadata_updated: dict
            updated metadata with the information of the merged images
            
    """

    # only for Sentinel-2 at this stage (not sure if this is needed for Landsat images)
    sat = 'S2'
    filepath = os.path.join(inputs['filepath'], inputs['sitename'])
    
    # find the images that are overlapping (same date in S2 filenames)
    filenames = metadata[sat]['filenames']
    filenames_copy = filenames.copy()
    # loop through all the filenames and find the pairs of overlapping images (same date and time of acquisition)
    pairs = []
    for i,fn in enumerate(filenames):
        filenames_copy[i] = []
        # find duplicate
        boolvec = [fn[:22] == _[:22] for _ in filenames_copy]
        if np.any(boolvec):
            idx_dup = np.where(boolvec)[0][0]
            if len(filenames[i]) > len(filenames[idx_dup]): 
                pairs.append([idx_dup,i])
            else:
                pairs.append([i,idx_dup])
                
    # for each pair of images, merge them into one complete image
    for i,pair in enumerate(pairs):
        
        fn_im = []
        for index in range(len(pair)):            
            # read image
            fn_im.append([os.path.join(filepath, 'S2', '10m', filenames[pair[index]]),
                  os.path.join(filepath, 'S2', '20m',  filenames[pair[index]].replace('10m','20m')),
                  os.path.join(filepath, 'S2', '60m',  filenames[pair[index]].replace('10m','60m')),
                  os.path.join(filepath, 'S2', 'meta', filenames[pair[index]].replace('_10m','').replace('.tif','.txt'))])
            im_ms, georef, cloud_mask, im_extra, im_QA, im_nodata = SDS_preprocess.preprocess_single(fn_im[index], sat, False) 
        
            # in Sentinel2 images close to the edge of the image there are some artefacts, 
            # that are squares with constant pixel intensities. They need to be masked in the 
            # raster (GEOTIFF). It can be done using the image standard deviation, which 
            # indicates values close to 0 for the artefacts.
            
            # First mask the 10m bands
            if len(im_ms) > 0:
                im_std = SDS_tools.image_std(im_ms[:,:,0],1)
                im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                mask = morphology.dilation(im_binary, morphology.square(3))
                for k in range(im_ms.shape[2]):
                    im_ms[mask,k] = np.nan
                
                SDS_tools.mask_raster(fn_im[index][0], mask)
                
                # Then mask the 20m band
                im_std = SDS_tools.image_std(im_extra,1)
                im_binary = np.logical_or(im_std < 1e-6, np.isnan(im_std))
                mask = morphology.dilation(im_binary, morphology.square(3))     
                im_extra[mask] = np.nan
                
                SDS_tools.mask_raster(fn_im[index][1], mask) 
            else:
                continue
            
            # make a figure for quality control
#            plt.figure()
#            plt.subplot(221)
#            plt.imshow(im_ms[:,:,[2,1,0]])
#            plt.title('imRGB')
#            plt.subplot(222)
#            plt.imshow(im20, cmap='gray')
#            plt.title('im20')
#            plt.subplot(223)
#            plt.imshow(imQA, cmap='gray')
#            plt.title('imQA')
#            plt.subplot(224)
#            plt.title(fn_im[index][0][-30:])
                        
        # merge masked 10m bands
        fn_merged = os.path.join(os.getcwd(), 'merged.tif')
        gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][0], fn_im[1][0]])
        os.chmod(fn_im[0][0], 0o777)
        os.remove(fn_im[0][0])
        os.chmod(fn_im[1][0], 0o777)
        os.remove(fn_im[1][0])
        os.rename(fn_merged, fn_im[0][0])
        
        # merge masked 20m band (SWIR band)
        fn_merged = os.path.join(os.getcwd(), 'merged.tif')
        gdal_merge.main(['', '-o', fn_merged, '-n', '0', fn_im[0][1], fn_im[1][1]])
        os.chmod(fn_im[0][1], 0o777)
        os.remove(fn_im[0][1])
        os.chmod(fn_im[1][1], 0o777)
        os.remove(fn_im[1][1])
        os.rename(fn_merged, fn_im[0][1])
    
        # merge QA band (60m band)
        fn_merged = os.path.join(os.getcwd(), 'merged.tif')
        gdal_merge.main(['', '-o', fn_merged, '-n', 'nan', fn_im[0][2], fn_im[1][2]])
        os.chmod(fn_im[0][2], 0o777)
        os.remove(fn_im[0][2])
        os.chmod(fn_im[1][2], 0o777)
        os.remove(fn_im[1][2])
        os.rename(fn_merged, fn_im[0][2])
        
        # remove the metadata .txt file of the duplicate image
        os.chmod(fn_im[1][3], 0o777)
        os.remove(fn_im[1][3])
          
    print('%d pairs of overlapping Sentinel-2 images were merged' % len(pairs))
    
    # update the metadata dict (delete all the duplicates)
    metadata_updated = copy.deepcopy(metadata)
    filenames_copy = metadata_updated[sat]['filenames']
    index_list = []
    for i in range(len(filenames_copy)):
            if filenames_copy[i].find('dup') == -1:
                index_list.append(i)
    for key in metadata_updated[sat].keys():
        metadata_updated[sat][key] = [metadata_updated[sat][key][_] for _ in index_list]
        
    return metadata_updated