Пример #1
0
    def run(self):
        super(ModFusionHistogram, self).run()
        speech_frames_count = 0.0
        noise_frames_count = 0.0

        # initialize histograms
        speech_histogram = {}
        noise_histogram = {}
        lr_histogram = {}
        probabilites = {}

        vad_combinations = product([0, 1], repeat=len(self.inputs))
        for comb_vec in vad_combinations:
            speech_histogram[comb_vec] = COMB_MIN_VALUE
            noise_histogram[comb_vec] = COMB_MIN_VALUE
            lr_histogram[comb_vec] = COMB_MIN_VALUE
            probabilites[comb_vec] = COMB_MIN_VALUE

        for element in self.vadpy.pipeline:
            lo_list = []  # labels object aka "lo"
            for attr in self.inputs:
                lo_list.append(getattr(element, attr))

            equalize_framelen(*(lo_list + [element.gt_labels]))
            frame_len = lo_list[0].frame_len
            frames_count = min(len(labels) for labels in lo_list)

            for i in range(0, frames_count):
                # i-th frame, (start, end, --> speech <-- ) tuple
                comb_vec = tuple(int(lo[i][2]) for lo in lo_list)

                probabilites[comb_vec] += 1
                if element.gt_labels[i][2]:
                    speech_histogram[comb_vec] += 1
                    speech_frames_count += 1
                else:
                    noise_histogram[comb_vec] += 1
                    noise_frames_count += 1

        # normalize histograms
        for key in probabilites:
            speech_histogram[key] /= speech_frames_count
            noise_histogram[key] /= noise_frames_count
            probabilites[key] /= (speech_frames_count + noise_frames_count)

            speech_val = speech_histogram[key]
            noise_val = noise_histogram[key]

            if noise_val == 0 or speech_val == 0:
                lr_histogram[key] = 0.0
            elif noise_val == 0:
                lr_histogram[key] = 1.0
            else:
                lr_histogram[key] = speech_val / noise_val

        # update pipeline with histogram data
        self.add_result('speech', speech_histogram)
        self.add_result('noise', noise_histogram)
        self.add_result('lr', lr_histogram)
        self.add_result('p', probabilites)
Пример #2
0
    def run(self):
        super(ModFusionHistogram, self).run()
        speech_frames_count = 0.0
        noise_frames_count = 0.0

        # initialize histograms
        speech_histogram = {}
        noise_histogram = {}
        lr_histogram = {}
        probabilites = {}

        vad_combinations = product([0,1], repeat = len(self.inputs))
        for comb_vec in vad_combinations:
            speech_histogram[comb_vec] = COMB_MIN_VALUE
            noise_histogram[comb_vec] = COMB_MIN_VALUE
            lr_histogram[comb_vec] = COMB_MIN_VALUE
            probabilites[comb_vec] = COMB_MIN_VALUE

        for element in self.vadpy.pipeline:            
            lo_list = []                        # labels object aka "lo"
            for attr in self.inputs:
                lo_list.append(getattr(element, attr))

            equalize_framelen(*(lo_list + [element.gt_labels]))
            frame_len = lo_list[0].frame_len
            frames_count = min(len(labels) for labels in lo_list)

            for i in range(0, frames_count):
                # i-th frame, (start, end, --> speech <-- ) tuple
                comb_vec = tuple(int(lo[i][2]) for lo in lo_list) 
                
                probabilites[comb_vec] += 1
                if element.gt_labels[i][2]:
                    speech_histogram[comb_vec] += 1
                    speech_frames_count += 1
                else:                    
                    noise_histogram[comb_vec] += 1
                    noise_frames_count += 1
                
        # normalize histograms
        for key in probabilites:
            speech_histogram[key] /= speech_frames_count
            noise_histogram[key] /= noise_frames_count
            probabilites[key] /= (speech_frames_count + noise_frames_count)

            speech_val = speech_histogram[key]
            noise_val = noise_histogram[key]

            if noise_val == 0 or speech_val == 0:
                lr_histogram[key] = 0.0
            elif noise_val == 0:
                lr_histogram[key] = 1.0
            else:
                lr_histogram[key] = speech_val / noise_val
                
        # update pipeline with histogram data
        self.add_result('speech', speech_histogram)
        self.add_result('noise', noise_histogram)
        self.add_result('lr', lr_histogram)
        self.add_result('p', probabilites)
Пример #3
0
    def run(self):
        super(ModCorrelation, self).run()
        assert len(set(self.inputs)) == 3, 'Q-statistics module requires three different inputs (GT, VAD1, VAD2)'

        # false positives/false negatives per source
        # the format of every tuple in dictionary is 
        # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :)

        a_total = 0.0
        b_total = 0.0
        c_total = 0.0
        d_total = 0.0

        for element in self.vadpy.pipeline:            
            gt_labels = getattr(element, self.inputs[0])
            vad1_labels = getattr(element, self.inputs[1])
            vad2_labels = getattr(element, self.inputs[2])
            
            if len(set([len(gt_labels), len(vad1_labels), len(vad2_labels)])) != 1:
                log.warning('Labels length mismatch: {0} / {1} / {2}, equlizing frame lengths.'.format(
                        len(gt_labels), len(vad1_labels), len(vad2_labels) ))
                equalize_framelen(gt_labels, vad1_labels, vad2_labels)

            speechData = zip((int(speech) for start, stop, speech in gt_labels), 
                             (int(speech) for start, stop, speech in vad1_labels),
                             (int(speech) for start, stop, speech in vad2_labels))

            a = 0; b = 0; c = 0; d = 0;

            for i in range(0, len(speechData)):
                valGT = speechData[i][0]
                valV1 = speechData[i][1]
                valV2 = speechData[i][2]
                
                if valV1 == valGT and valV2 == valGT:
                    a += 1
                elif valV1 != valGT and valV2 == valGT:
                    b += 1
                elif valV1 == valGT and valV2 != valGT:
                    c += 1
                else:
                    d += 1

            a_total += a
            b_total += b
            c_total += c
            d_total += d

        length = a_total + b_total + c_total + d_total;
        a = a_total / length
        b = b_total / length
        c = c_total / length
        d = d_total / length

        corrQ = (a*d - b*c) / (a*d + b*c)
        corrp = (a*d - b*c) / math.sqrt((a + b)*(c + d)*(a + c)*(b + d))

        self.add_result('corrQ', corrQ)
        self.add_result('corrp', corrp)
Пример #4
0
    def run(self):
        super(ModCorrelation, self).run()
        assert len(
            set(self.inputs)
        ) == 3, 'Q-statistics module requires three different inputs (GT, VAD1, VAD2)'

        # false positives/false negatives per source
        # the format of every tuple in dictionary is
        # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :)

        a_total = 0.0
        b_total = 0.0
        c_total = 0.0
        d_total = 0.0

        for element in self.vadpy.pipeline:
            gt_labels = getattr(element, self.inputs[0])
            vad1_labels = getattr(element, self.inputs[1])
            vad2_labels = getattr(element, self.inputs[2])

            if len(set([len(gt_labels),
                        len(vad1_labels),
                        len(vad2_labels)])) != 1:
                log.warning(
                    'Labels length mismatch: {0} / {1} / {2}, equlizing frame lengths.'
                    .format(len(gt_labels), len(vad1_labels),
                            len(vad2_labels)))
                equalize_framelen(gt_labels, vad1_labels, vad2_labels)

            speechData = zip(
                (int(speech) for start, stop, speech in gt_labels),
                (int(speech) for start, stop, speech in vad1_labels),
                (int(speech) for start, stop, speech in vad2_labels))

            a = 0
            b = 0
            c = 0
            d = 0

            for i in range(0, len(speechData)):
                valGT = speechData[i][0]
                valV1 = speechData[i][1]
                valV2 = speechData[i][2]

                if valV1 == valGT and valV2 == valGT:
                    a += 1
                elif valV1 != valGT and valV2 == valGT:
                    b += 1
                elif valV1 == valGT and valV2 != valGT:
                    c += 1
                else:
                    d += 1

            a_total += a
            b_total += b
            c_total += c
            d_total += d

        length = a_total + b_total + c_total + d_total
        a = a_total / length
        b = b_total / length
        c = c_total / length
        d = d_total / length

        corrQ = (a * d - b * c) / (a * d + b * c)
        corrp = (a * d - b * c) / math.sqrt(
            (a + b) * (c + d) * (a + c) * (b + d))

        self.add_result('corrQ', corrQ)
        self.add_result('corrp', corrp)
Пример #5
0
    def run(self):
        super(ModConfusion, self).run()
        assert len(set(self.inputs)) == 2, 'Confusion module requires two different inputs'

        # false positives/false negatives per source
        # the format of every tuple in dictionary is 
        # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :)

        tp_total = 0.0
        tn_total = 0.0
        fp_total = 0.0
        fn_total = 0.0

        for element in self.vadpy.pipeline:            
            # Generate a list of decision (speech/noise) pairs for Labels objects
            # 
            gt_labels = getattr(element, self.inputs[0])
            vad_labels = getattr(element, self.inputs[1])
            
            if len(gt_labels) != len(vad_labels):
                log.warning('Labels length mismatch: {0} / {1}, equlizing frame lengths.'.format(
                        len(gt_labels), len(vad_labels)))

                equalize_framelen(gt_labels, vad_labels)

            # zip will concatenate up to min. length of the objects
            speechAB = zip((int(speech) for start, stop, speech in gt_labels), 
                           (int(speech) for start, stop, speech in vad_labels))

            # Calculate False alarm and Miss rate
            tp = 0; tn = 0; fp = 0; fn = 0;

            for i in range(0, len(speechAB)):
                valA = speechAB[i][0]
                
                if (self.ctx_size < i < len(speechAB) - self.ctx_size):
                    valB = int(round(sum(vAB[1] for vAB in speechAB[i - self.ctx_size : 
                                                                    i + self.ctx_size + 1]) 
                                     / float(self.ctx_size * 2 + 1)))
                else:
                    valB = speechAB[i][1]

                if valA:                # concluding, valA is a value 'Speech' Ground Truth frame
                  if valB: tp += 1      # true positive
                  else:    fn += 1      # false negative, miss 
                else:                     
                  if valB: fp += 1      # false positive, false alarm
                  else:    tn += 1      # true negative
                        
            tp_total += tp
            tn_total += tn
            fp_total += fp
            fn_total += fn

        tp = tp_total 
        tn = tn_total
        fp = fp_total
        fn = fn_total

        length = tp + tn + fp + fn;
        gt_speech = fn + tp
        gt_noise = fp + tn

        mr  = fn / (tp + fn)
        far = fp / (tn + fp)
        #total_len = tn + fn + tp + fp

        self.add_result('mr', mr)
        self.add_result('far', far)
Пример #6
0
    def run(self):
        super(ModConfusion, self).run()
        assert len(set(self.inputs)
                   ) == 2, 'Confusion module requires two different inputs'

        # false positives/false negatives per source
        # the format of every tuple in dictionary is
        # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :)

        tp_total = 0.0
        tn_total = 0.0
        fp_total = 0.0
        fn_total = 0.0

        for element in self.vadpy.pipeline:
            # Generate a list of decision (speech/noise) pairs for Labels objects
            #
            gt_labels = getattr(element, self.inputs[0])
            vad_labels = getattr(element, self.inputs[1])

            if len(gt_labels) != len(vad_labels):
                log.warning(
                    'Labels length mismatch: {0} / {1}, equlizing frame lengths.'
                    .format(len(gt_labels), len(vad_labels)))

                equalize_framelen(gt_labels, vad_labels)

            # zip will concatenate up to min. length of the objects
            speechAB = zip((int(speech) for start, stop, speech in gt_labels),
                           (int(speech) for start, stop, speech in vad_labels))

            # Calculate False alarm and Miss rate
            tp = 0
            tn = 0
            fp = 0
            fn = 0

            for i in range(0, len(speechAB)):
                valA = speechAB[i][0]

                if (self.ctx_size < i < len(speechAB) - self.ctx_size):
                    valB = int(
                        round(
                            sum(vAB[1]
                                for vAB in speechAB[i - self.ctx_size:i +
                                                    self.ctx_size + 1]) /
                            float(self.ctx_size * 2 + 1)))
                else:
                    valB = speechAB[i][1]

                if valA:  # concluding, valA is a value 'Speech' Ground Truth frame
                    if valB: tp += 1  # true positive
                    else: fn += 1  # false negative, miss
                else:
                    if valB: fp += 1  # false positive, false alarm
                    else: tn += 1  # true negative

            tp_total += tp
            tn_total += tn
            fp_total += fp
            fn_total += fn

        tp = tp_total
        tn = tn_total
        fp = fp_total
        fn = fn_total

        length = tp + tn + fp + fn
        gt_speech = fn + tp
        gt_noise = fp + tn

        mr = fn / (tp + fn)
        far = fp / (tn + fp)
        #total_len = tn + fn + tp + fp

        self.add_result('mr', mr)
        self.add_result('far', far)