def _create_pair(path, augmentation_config_encoded, data_type, model_type):
    """
    Create a contrastive learning pair for nt-xent loss.
    """
    augmentation_config = json.loads(augmentation_config_encoded.numpy())
    path = path.numpy()
    data_type = data_type.numpy().decode()
    model_type = model_type.numpy().decode()
    y = read_audio(path, data_type)

    y1 = randomAudioAugmentation(y, augmentation_config)
    y1 = fp_to_pcm(enhance(y1))
    y2 = randomAudioAugmentation(y, augmentation_config)
    y2 = fp_to_pcm(enhance(y2))

    if model_type == "vggvox":
        y1_stft = np.expand_dims(create_spectrogram(y1)[:, :300], axis=-1)
        y2_stft = np.expand_dims(create_spectrogram(y2)[:, :300], axis=-1)
    elif model_type == "lstm":
        y1_stft = create_spectrogram(y1)[:, :300].T
        y2_stft = create_spectrogram(y2)[:, :300].T
    elif model_type == "transformer":
        y1_stft = np.expand_dims(create_spectrogram(y1)[:, :300].T, axis=-1)
        y2_stft = np.expand_dims(create_spectrogram(y2)[:, :300].T, axis=-1)

    return np.array([y1_stft, y2_stft]).astype(np.float32)
def _create_triplet(filenames, data_type, model_type):
    """
    Create a triplet for the triplet loss.
    """
    filenames = filenames.numpy()
    data_type = data_type.numpy().decode()
    model_type = model_type.numpy().decode()

    filename_p = filenames[0]
    filename_n = filenames[1]

    y = read_audio(filename_p, data_type)
    a = crop(y, 4 * 16_000)
    a = fp_to_pcm(enhance(a))
    p = crop(y, 4 * 16_000)
    p = fp_to_pcm(enhance(p))

    y = read_audio(filename_n, data_type)
    n = crop(y, 4 * 16_000)
    n = fp_to_pcm(enhance(n))

    if model_type == "vggvox":
        a_stft = np.expand_dims(create_spectrogram(a)[:, :300], axis=-1)
        p_stft = np.expand_dims(create_spectrogram(p)[:, :300], axis=-1)
        n_stft = np.expand_dims(create_spectrogram(n)[:, :300], axis=-1)
    elif model_type == "lstm":
        a_stft = create_spectrogram(a)[:, :300].T
        p_stft = create_spectrogram(p)[:, :300].T
        n_stft = create_spectrogram(n)[:, :300].T
    elif model_type == "transformer":
        a_stft = np.expand_dims(create_spectrogram(a)[:, :300].T, axis=-1)
        p_stft = np.expand_dims(create_spectrogram(p)[:, :300].T, axis=-1)
        n_stft = np.expand_dims(create_spectrogram(n)[:, :300].T, axis=-1)

    return np.array([a_stft, p_stft, n_stft]).astype(np.float32)
def _create_example(filename, num_labels, split, data_type, model_type,
                    dataset):
    """
    Create a single example and label for classification using CCE loss.
    """
    filename = filename.numpy().decode()
    data_type = data_type.numpy().decode()
    num_labels = num_labels.numpy()
    model_type = model_type.numpy().decode()
    split = split.numpy().decode()
    datset = dataset.numpy().decode()

    if dataset == "voxceleb":
        speaker = filename.split("/")[-3]
        label = int(speaker[3:]) - 1  # there is no label 0
    elif dataset == "music":
        label = int(filename.split("/").split("_")[1])
    elif dataset == "birdsong":
        label = int(filename.split("/")[-3])

    one_hot_label = np.zeros(num_labels)
    one_hot_label[label] = 1

    y = read_audio(filename, data_type)
    # comment this if you do not want to learn with random crops
    if len(y) < 4 * 16_000:
        y = np.tile(y, 2)
    y = crop(y, 4 * 16_000)
    y = fp_to_pcm(enhance(y))
    stft = create_spectrogram(y)

    if split == "train" or split == "validation":
        if model_type == "vggvox":
            stft = np.expand_dims(stft[:, :300], axis=-1)
        elif model_type == "lstm":
            stft = stft[:, :300].T
        elif model_type == "transformer":
            stft = np.expand_dims(stft[:, :300].T, axis=-1)
    elif split == "test":
        if model_type == "vggvox":
            stft = np.expand_dims(stft, axis=-1)
        elif model_type == "lstm":
            stft = stft.T
            stft = [
                stft[i * 300:(i + 1) * 300]
                for i in range(stft.shape[0] // 300)
            ]
            stft = np.array(stft)
        elif model_type == "transformer":
            stft = stft.T
            stft = [
                stft[i * 300:(i + 1) * 300]
                for i in range(stft.shape[0] // 300)
            ]
            stft = np.expand_dims(np.array(stft), axis=-1)
    return stft, one_hot_label
示例#4
0
#open all images in source and store all filenames for search afterwards
path = '/home/zhangzimou/Desktop/code_lnk/database/FVC2002/DB1_B'
size, = np.shape(os.listdir(path))
filename = [1]*size #ten different fingerprints. eight images for same fingerprint
count = 0
for file in os.listdir(path):
	filename[count] = file
	count += 1
#search all filenames for same images and acheive correct probability
matchedPoints_max = np.zeros([size,1])
score = np.zeros([size,1])
correct = 0
wrong = 0
i = 0
img1 = cv2.imread(path+'/'+filename[i],0)
imgE1,imgfore1=enhance(img1)
imgB1=basic.binarize(imgE1)
imgT1=pre.thinning(imgB1)
for j in range(size):
	print "Image %d & Image %d" % (i,j)
	#input
	
	img2 = cv2.imread(path+'/'+filename[j],0)
	#decide if they same fingerprints due to index of files
	if i/8 == j/8:
		print "They are from same fingerprint"
	else:
		print "They are from different fingerprint"
	#preprocess for images
	
	imgE2,imgfore2=enhance(img2)
示例#5
0
def data_generator_multi(images,
                         masks,
                         config,
                         shuffle=True,
                         augment=False,
                         batch_size=None,
                         tp_value=8):
    batch_size = config.BATCH_SIZE if batch_size is None else batch_size
    iterator = num_iterator(masks.shape[0], batch_size, shuffle=shuffle)

    while True:
        ind_batch = next(iterator)
        image_batch = images[ind_batch].copy().astype('float32')
        mask_batch = masks[ind_batch].copy()
        if augment:
            tp = np.random.randint(tp_value)
            if tp == 1:
                if np.random.rand() > 0.5:
                    image_batch = image_batch[:, :, ::-1]
                    mask_batch = mask_batch[:, :, ::-1]
                else:
                    image_batch = image_batch[:, ::-1]
                    mask_batch = mask_batch[:, ::-1]
            elif tp == 2:
                image_batch, mask_batch = affine_transform_batch(
                    image_batch,
                    mask_batch,
                    rotation_range=config.ROTATION_RANGE,
                    width_shift_range=config.WIDTH_SHIFT_RANGE,
                    height_shift_range=config.HEIGHT_SHIFT_RANGE,
                    shear_range=config.SHEAR_RANGE,
                    zoom_range=config.ZOOM_RANGE)
            elif tp == 3:
                image_batch, mask_batch = elastic_transform_batch(
                    image_batch,
                    mask_batch,
                    alpha=config.ALPHA,
                    sigma=config.SIGMA)

            elif tp == 4:
                tries = np.random.randint(20, 100)
                max_fails = 5
                image_batch, mask_batch = insert(image_batch, mask_batch,
                                                 tries, max_fails)

            elif tp == 5:
                image_batch = enhance(image_batch,
                                      mask_batch,
                                      max_tries=10,
                                      max_enhance_ratio=0.8)
            elif tp == 6:
                image_batch = level_noise(image_batch,
                                          mask_batch,
                                          max_level_ratio=4,
                                          max_noise_ratio=0.05)
            elif tp == 7:
                image_batch, mask_batch = stretch(image_batch,
                                                  mask_batch,
                                                  max_ratio=2)
        '''
        image_max = np.max(image_batch, axis=(1,2,3), keepdims=True)
        image_max[image_max==0]=1
        amax = np.random.randint(200, 256) if augment else 255
        image_batch = image_batch/image_max*amax
        '''
        yield [image_batch.astype('float32'), mask_batch]
示例#6
0
from _match import minutiaeMatch

FVC0='/home/zhangzimou/Desktop/code_lnk/database/FVC2000/'
FVC2='/home/zhangzimou/Desktop/code_lnk/database/FVC2002/'
FVC4='/home/zhangzimou/Desktop/code_lnk/database/FVC2004/'
path=FVC2+'DB1_B/'



blockSize=8

img=cv2.imread(path+'108_3.tif',0)



imgE,imgfore=enhance(img)
imgB=basic.binarize(imgE)
imgT=pre.thinning(imgB)

#plt.figure()
#imshow(img)
plt.figure()
plt.imshow(imgE,cmap='gray')
plt.figure()
plt.imshow(imgT,cmap='gray')


ending,bifur,theta1,theta2=minutiaeExtract(imgT,imgfore)
plt.plot(ending.T[1],ending.T[0],'b.',bifur.T[1],bifur.T[0],'r.')
plt.quiver(ending.T[1],ending.T[0],np.cos(theta1),np.sin(-theta1),color='b',width=0.003)
plt.quiver(bifur.T[1],bifur.T[0],np.cos(theta2),np.sin(-theta2),color='r',width=0.003)
示例#7
0
import preprocess as pre
from minutiaeExtract import minutiaeExtract
from basic import block_view

FVC0 = '/home/zhangzimou/Desktop/code_lnk/database/FVC2000/'
FVC2 = '/home/zhangzimou/Desktop/code_lnk/database/FVC2002/'
FVC4 = '/home/zhangzimou/Desktop/code_lnk/database/FVC2004/'
path = FVC4 + 'DB4_B/'

blockSize = 8

img = cv2.imread(path + '102_1.tif', 0)

img_seg, imgfore = pre.segmentation(img)

img_en = enhance(img)
imgB = basic.binarize(img_en)
imgT = pre.thinning(imgB)

plt.figure()
imshow(img_seg)
plt.figure()
plt.imshow(imgB, cmap='gray')
plt.figure()
plt.imshow(imgT, cmap='gray')

ending, bifur, theta1, theta2 = minutiaeExtract(imgT, imgfore)
plt.plot(ending.T[1], ending.T[0], 'b.', bifur.T[1], bifur.T[0], 'r.')
plt.quiver(ending.T[1],
           ending.T[0],
           np.cos(theta1),
示例#8
0
from preprocess import enhance
import preprocess as pre
from minutiaeExtract import minutiaeExtract
import minutiaeExtract as extract

FVC0 = '/home/zhangzimou/Desktop/code_lnk/database/FVC2000/'
FVC2 = '/home/zhangzimou/Desktop/code_lnk/database/FVC2002/'
FVC4 = '/home/zhangzimou/Desktop/code_lnk/database/FVC2004/'
path = FVC4 + 'DB1_B/'

start = time.clock()

img = cv2.imread(path + '105_1.tif', 0)
image, imgfore = pre.segmentation(img)

image = enhance(img)
core_index, delta_index = extract.singular(image, imgfore)

blockSize = 8
#theta=pre.calcDirection(image,blockSize,method='block-wise')
#
#P =[ theta[2:,1:-1], theta[2:,2:], theta[1:-1,2:], theta[:-2,2:], theta[:-2,1:-1],theta[:-2,:-2], theta[1:-1,:-2], theta[2:,:-2]]
#
#
#
#N,M=image.shape
#N1,M1=theta.shape
#delta=np.zeros((N1-2,M1-2))
#for i in range(8):
#    if i==7:
#        de=P[0]-P[7]