def test_sample_exception_mode(self): """Does sample raise exception for unknown mode?""" with self.assertRaises( ValueError, msg="sample doesn't raise exception for unknown mode!"): Segmenter.sample( self.entire_text_seg, sample_size=3, mode='unknown_mode', )
def test_sample_exception_mode(self): """Does sample raise exception for unknown mode?""" with self.assertRaises( ValueError, msg="sample doesn't raise exception for unknown mode!" ): Segmenter.sample( self.entire_text_seg, sample_size=3, mode='unknown_mode', )
def test_sample_progress(self): """Does sample track progress?""" def progress_callback(): """Mock progress callback""" self.count += 1 Segmenter.sample( self.char_seg, sample_size=4, mode='random', progress_callback=progress_callback, ) self.assertEqual(self.count, len(self.char_seg), msg="sample doesn't track progress!")
def test_sample_systematic_sample(self): """Does sample systematically sample segments?""" segmentation, _ = Segmenter.sample( self.char_seg, sample_size=3, mode='systematic', ) self.assertEqual([s.start for s in segmentation], [0, 2, 4], msg="sample doesn't systematically sample segments!")
def test_sample_progress(self): """Does sample track progress?""" def progress_callback(): """Mock progress callback""" self.count += 1 Segmenter.sample( self.char_seg, sample_size=4, mode='random', progress_callback=progress_callback, ) self.assertEqual( self.count, len(self.char_seg), msg="sample doesn't track progress!" )
def test_sample_autonumber(self): """Does sample autonumber input segments?""" segmentation, _ = Segmenter.sample(self.char_seg, sample_size=4, mode='random', auto_number_as='num') self.assertEqual([s.annotations['num'] for s in segmentation], [1, 2, 3, 4], msg="sample doesn't autonumber input segments!")
def test_sample_import_annotations_false(self): """Does sample skip importing annotations?""" segmentation, _ = Segmenter.sample( self.single_letter_seg, sample_size=1, copy_annotations=False, ) self.assertFalse('b' in segmentation[0].annotations, msg="sample doesn't import annotations!")
def test_sample_import_annotations(self): """Does sample import annotations?""" segmentation, _ = Segmenter.sample( self.single_letter_seg, sample_size=1, copy_annotations=True, ) self.assertEqual(segmentation[0].annotations['b'], '1', msg="sample doesn't import annotations!")
def test_sample_random_sample(self): """Does sample randomly sample segments?""" segmentation, _ = Segmenter.sample( self.char_seg, sample_size=4, mode='random', ) self.assertEqual(len(segmentation), 4, msg="sample doesn't randomly sample segments!")
def test_sample_neg(self): """Does sample output complementary segmentation?""" _, segmentation = Segmenter.sample( self.char_seg, sample_size=4, mode='random', ) self.assertEqual( len(segmentation), 2, msg="sample doesn't output complementary segmentation!")
def test_sample_import_annotations_false(self): """Does sample skip importing annotations?""" segmentation, _ = Segmenter.sample( self.single_letter_seg, sample_size=1, copy_annotations=False, ) self.assertFalse( 'b' in segmentation[0].annotations, msg="sample doesn't import annotations!" )
def test_sample_systematic_sample(self): """Does sample systematically sample segments?""" segmentation, _ = Segmenter.sample( self.char_seg, sample_size=3, mode='systematic', ) self.assertEqual( [s.start for s in segmentation], [0, 2, 4], msg="sample doesn't systematically sample segments!" )
def test_sample_import_annotations(self): """Does sample import annotations?""" segmentation, _ = Segmenter.sample( self.single_letter_seg, sample_size=1, copy_annotations=True, ) self.assertEqual( segmentation[0].annotations['b'], '1', msg="sample doesn't import annotations!" )
def test_sample_neg(self): """Does sample output complementary segmentation?""" _, segmentation = Segmenter.sample( self.char_seg, sample_size=4, mode='random', ) self.assertEqual( len(segmentation), 2, msg="sample doesn't output complementary segmentation!" )
def test_sample_random_sample(self): """Does sample randomly sample segments?""" segmentation, _ = Segmenter.sample( self.char_seg, sample_size=4, mode='random', ) self.assertEqual( len(segmentation), 4, msg="sample doesn't randomly sample segments!" )
def test_sample_autonumber(self): """Does sample autonumber input segments?""" segmentation, _ = Segmenter.sample( self.char_seg, sample_size=4, mode='random', auto_number_as='num' ) self.assertEqual( [s.annotations['num'] for s in segmentation], [1, 2, 3, 4], msg="sample doesn't autonumber input segments!" )
def sendData(self): """(Have LTTL.Segmenter) perform the actual selection""" # Check that there's something on input... if not self.segmentation: self.infoBox.setText(u'Widget needs input.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # TODO: remove message 'No label was provided.' from docs # Advanced settings... if self.displayAdvancedSettings: # If mode is Regex... if self.method == u'Regex': # Check that regex is not empty... if not self.regex: self.infoBox.setText(u'Please enter a regex.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Prepare regex... regex_string = self.regex if (self.ignoreCase or self.unicodeDependent or self.multiline or self.dotAll): flags = '' if self.ignoreCase: flags += 'i' if self.unicodeDependent: flags += 'u' if self.multiline: flags += 'm' if self.dotAll: flags += 's' regex_string += '(?%s)' % flags try: regex = re.compile(regex_string) except re.error as re_error: try: message = u'Please enter a valid regex (error: %s).' % \ re_error.msg except AttributeError: message = u'Please enter a valid regex.' self.infoBox.setText(message, 'error') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Else if mode is Sample... elif self.method == u'Sample': # Get sample size... if self.sampleSizeMode == u'Proportion': sampleSize = iround( len(self.segmentation) * (self.samplingRate / 100)) else: sampleSize = self.sampleSize if sampleSize <= 0: self.infoBox.setText( message='Please enter a larger sample size', state="error", ) self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Else if mode is Threshold... elif self.method == u'Threshold': # Get min and max count... if self.thresholdMode == u'Proportion': minCount = iround( math.ceil( len(self.segmentation) * (self.minProportion / 100))) maxCount = iround( math.floor( len(self.segmentation) * (self.maxProportion / 100))) else: minCount = self.minCount maxCount = self.maxCount if not self.applyMinThreshold: minCount = 1 if not self.applyMaxThreshold: maxCount = len(self.segmentation) # Get number of iterations... num_iterations = len(self.segmentation) # Check that autoNumberKey is not empty (if necessary)... if self.autoNumber: if self.autoNumberKey: autoNumberKey = self.autoNumberKey else: self.infoBox.setText( u'Please enter an annotation key for auto-numbering.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return else: autoNumberKey = None # Perform selection... self.infoBox.setText(u"Processing, please wait...", "warning") self.controlArea.setDisabled(True) progressBar = ProgressBar(self, iterations=num_iterations) if self.method == u'Regex': regexAnnotationKeyParam = self.regexAnnotationKey if regexAnnotationKeyParam == u'(none)': regexAnnotationKeyParam = None (selected_data, discarded_data) = Segmenter.select( segmentation=self.segmentation, regex=regex, mode=self.regexMode.lower(), annotation_key=regexAnnotationKeyParam or None, label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) elif self.method == u'Sample': (selected_data, discarded_data) = Segmenter.sample( segmentation=self.segmentation, sample_size=sampleSize, mode='random', label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) elif self.method == u'Threshold': if ((minCount == 1 or not self.applyMinThreshold) and (maxCount == len(self.segmentation) or not self.applyMaxThreshold)): selected_data = Segmenter.bypass( segmentation=self.segmentation, label=self.captionTitle, ) discarded_data = None else: thresholdAnnotationKeyParam = self.thresholdAnnotationKey if thresholdAnnotationKeyParam == u'(none)': thresholdAnnotationKeyParam = None (selected_data, discarded_data) = Segmenter.threshold( segmentation=self.segmentation, annotation_key=(thresholdAnnotationKeyParam or None), min_count=minCount, max_count=maxCount, label=self.captionTitle, copy_annotations=self.copyAnnotations, auto_number_as=autoNumberKey, progress_callback=progressBar.advance, ) # Basic settings: else: # Check that regex is not empty... if not self.regex: self.infoBox.setText(u'Please enter a regex.', 'warning') self.send('Selected data', None, self) self.send('Discarded data', None, self) return # Get number of iterations... num_iterations = len(self.segmentation) # Perform selection... self.infoBox.setText(u"Processing, please wait...", "warning") self.controlArea.setDisabled(True) progressBar = ProgressBar(self, iterations=num_iterations) regexAnnotationKeyParam = self.regexAnnotationKey if regexAnnotationKeyParam == u'(none)': regexAnnotationKeyParam = None try: (selected_data, discarded_data) = Segmenter.select( segmentation=self.segmentation, regex=re.compile(self.regex + '(?u)'), mode=self.regexMode.lower(), annotation_key=regexAnnotationKeyParam or None, label=self.captionTitle, copy_annotations=True, auto_number_as=None, progress_callback=progressBar.advance, ) except re.error as re_error: try: message = u'Please enter a valid regex (error: %s).' % \ re_error.msg except AttributeError: message = u'Please enter a valid regex.' self.infoBox.setText(message, 'error') self.send('Selected data', None, self) self.send('Discarded data', None, self) progressBar.finish() self.controlArea.setDisabled(False) return progressBar.finish() self.controlArea.setDisabled(False) message = u'%i segment@p sent to output.' % len(selected_data) message = pluralize(message, len(selected_data)) self.infoBox.setText(message) self.send('Selected data', selected_data, self) self.send('Discarded data', discarded_data, self) self.sendButton.resetSettingsChangedFlag()