Exemplo n.º 1
0
 def _load_utterances(self):
     with self.root.joinpath("_sources.txt").open("r") as sources_file:
         sources = [l.split(",") for l in sources_file]
     self.utterances = [
         Utterance(self.root.joinpath(f), w) for f, w in sources
     ]
     self.utterance_cycler = RandomCycler(self.utterances)
Exemplo n.º 2
0
 def _load_utterances(self):
     with self.root.joinpath("_sources.txt").open("r") as sources_file:
         sources = [l.split(",") for l in sources_file]
     sources = {frames_fname: wave_fpath for frames_fname, wave_fpath in sources}
     self.utterances = [Utterance(self.root.joinpath(f), w) for f, w in sources.items()]
     try:
         self.utterance_cycler = RandomCycler(self.utterances)
     except Exception as e:
         print(self.root.joinpath("_sources.txt"))
         raise(e)
Exemplo n.º 3
0
 def _load_utterances(self):
     with self.root.joinpath("_sources.txt").open("r") as sources_file:
         sources = [l.split(",") for l in sources_file]
     sources = {
         frames_fname: wave_fpath
         for frames_fname, wave_fpath in sources
     }
     self.utterances = [
         Utterance(self.root.joinpath(f), w) for f, w in sources.items()
     ]
     #print("length of utterences ", len(self.utterances))
     self.utterance_cycler = RandomCycler(self.utterances)
Exemplo n.º 4
0
class Speaker:
    def __init__(self, root: Path):
        self.root = root
        self.name = root.name
        self.utterances = None
        self.utterance_cycler = None
        
    def _load_utterances(self):
        with self.root.joinpath("_sources.txt").open("r") as sources_file:
            sources = [l.split(",") for l in sources_file]
        sources = {frames_fname: wave_fpath for frames_fname, wave_fpath in sources}
        self.utterances = [Utterance(self.root.joinpath(f), w) for f, w in sources.items()]
        self.utterance_cycler = RandomCycler(self.utterances)
               
    def random_partial(self, count, n_frames):
        """
        Samples a batch of <count> unique partial utterances from the disk in a way that all 
        utterances come up at least once every two cycles and in a random order every time.
        
        :param count: The number of partial utterances to sample from the set of utterances from 
        that speaker. Utterances are guaranteed not to be repeated if <count> is not larger than 
        the number of utterances available.
        :param n_frames: The number of frames in the partial utterance.
        :return: A list of tuples (utterance, frames, range) where utterance is an Utterance, 
        frames are the frames of the partial utterances and range is the range of the partial 
        utterance with regard to the complete utterance.
        """
        if self.utterances is None:
            self._load_utterances()

        utterances = self.utterance_cycler.sample(count)

        a = [(u,) + u.random_partial(n_frames) for u in utterances]

        return a
    def __init__(self, datasets_root, dataset_len, train_frac):
        """
        :param datasets_root:
        :param dataset_len: the number of pseudo-speakers in the dataset. Speakers are sampled
                            with replacement. Each time a speaker is returned, a random set of
                            utterances and random segment from each utterance is selected.
        :param train_frac: the fraction of training set to use.
        """
        self.root = datasets_root
        speaker_dirs = [f for f in self.root.glob("*") if f.is_dir()]
        n_speakers = len(speaker_dirs)
        if n_speakers == 0:
            raise Exception(
                "No speakers found. Make sure you are pointing to the directory "
                "containing all preprocessed speaker directories.")

        n_speakers_to_use = int(train_frac * n_speakers)
        self.speakers = [
            Speaker(speaker_dir)
            for speaker_dir in speaker_dirs[:n_speakers_to_use]
        ]

        self.speaker_cycler = RandomCycler(self.speakers)
        self.dataset_len = dataset_len

        print("Training set - number of speakers is {} ({}% of total)".format(
            len(self.speakers), train_frac * 100))
Exemplo n.º 6
0
 def __init__(self, datasets_root: Path, user_folder: Path):
     self.root = datasets_root
     speaker_dirs = [f for f in self.root.glob("*") if f.is_dir()]
     if len(speaker_dirs) == 0:
         raise Exception("No speakers found. Make sure you are pointing to the directory "
                         "containing all preprocessed speaker directories.")
     self.speakers = [Speaker(speaker_dir) for speaker_dir in speaker_dirs] + [Speaker(user_folder)]
     self.speaker_cycler = RandomCycler(self.speakers)
    def __init__(self, datasets_root: Path, img_per_cls: int, train: bool):
        self.root = datasets_root
        cls_dirs = [f for f in self.root.glob("*") if f.is_dir()]

        if len(cls_dirs) == 0:
            raise Exception(
                "No image class found. Make sure you are pointing to the directory "
                "containing all preprocessed image class directories.")

        self.classes = [Cls(cls_dir, train) for cls_dir in cls_dirs]
        self.cls_cycler = RandomCycler(self.classes)
Exemplo n.º 8
0
 def __init__(self, root: Path, train: bool):
     self.is_training = train
     self.root = root
     self.name = root.name
     self.images = [image for image in self.root.iterdir()]
     self.image_cycler = RandomCycler(self.images)
Exemplo n.º 9
0
class Cls:
    def __init__(self, root: Path, train: bool):
        self.is_training = train
        self.root = root
        self.name = root.name
        self.images = [image for image in self.root.iterdir()]
        self.image_cycler = RandomCycler(self.images)

    def random_sample(self, count):
        images = self.image_cycler.sample(count)
        return [self.process_img(cv2.imread(str(img)), 224)/255. for img in images]

    # Randomly add gaussian noise to img
    def random_gaussian_noise(self, img, mean=0, std=5.0):
        noisy_img = img + np.random.normal(mean, std, img.shape)
        noisy_img_clipped = np.clip(noisy_img, 0, 255)
        return noisy_img_clipped

    # Randomly change the brightness of img
    def random_brightness(self, img, max_change=60):
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)

        # generate random brightness noise
        value = random.randint(-max_change, max_change+1)

        # Clap the result to 0 - 255
        if value >= 0:
            lim = 255 - value
            v[v > lim] = 255
            v[v <= lim] += value
        else:
            value = int(-value)
            lim = 0 + value
            v[v < lim] = 0
            v[v >= lim] -= value

        final_hsv = cv2.merge((h, s, v))
        img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
        return img

    def process_img(self, img, size):
        height, width, channels = img.shape
        if width == height:
            new_img = cv2.resize(img, (size, size))
            top, bottom, left, right = 0, 0, 0, 0
        else:
            ratio = float(size)/max(height, width)
            new_height = int(height*ratio)
            new_width = int(width*ratio)

            new_img = cv2.resize(img, (new_width, new_height))

            delta_w = size - new_width
            delta_h = size - new_height
            top, bottom = delta_h//2, delta_h-(delta_h//2)
            left, right = delta_w//2, delta_w-(delta_w//2)
        
        if self.is_training:
            p_flip = 0.5
            if (random.uniform(0,1) > 1 - p_flip):
                new_img= cv2.flip(new_img, 1)

            p_brightness = 0.8
            if (random.uniform(0,1) > 1 - p_brightness):
                new_img = self.random_brightness(new_img)

            p_noise = 0.5
            if (random.uniform(0,1) > 1 - p_noise):
                new_img = self.random_gaussian_noise(new_img)

        # small variation applied to validation data
        else:
            p_flip = 0.5
            if (random.uniform(0,1) > 1 - p_flip):
                new_img= cv2.flip(new_img, 1)
            
            p_brightness = 0.75
            if (random.uniform(0,1) > 1 - p_brightness):
                new_img = self.random_brightness(new_img, max_change=40)

        new_img = cv2.copyMakeBorder(new_img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0,0,0])

        # print(self.is_training, img.shape, new_img.shape)
        # cv2.imwrite("/home/ubuntu/google-landmark/landmark-retrieval/baseline/augmented/original.jpg", img)
        # cv2.imwrite("/home/ubuntu/google-landmark/landmark-retrieval/baseline/augmented/augmented.jpg", new_img)
        # exit(0)

        return np.transpose(new_img, (2, 0, 1))