def test_prod_wordtokens_type(specified_discourse_corpus): seg1 = 's' seg2 = 'ʃ' expected = { "-voc": 0.0, "+voc,+high": 0.863120568566631, "+voc,-high": 0.9852281360342515, "#": 0.0 } env_list = [] expected_envs = {} for k, v in expected.items(): if k != '#': segs = specified_discourse_corpus.lexicon.features_to_segments(k) else: segs = k env = EnvironmentFilter(['s', 'ʃ'], None, [segs]) env_list.append(env) expected_envs[env] = v expected_envs["AVG"] = 0.9241743523004413 type_or_token = 'type' tier = 'transcription' with MostFrequentVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c: result = calc_prod(c, env_list, all_info=False) for k, v in result.items(): assert (expected_envs[k] - v < 0.001) with WeightedVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c: result = calc_prod(c, env_list, all_info=False) for k, v in result.items(): assert (expected_envs[k] - v < 0.001)
def test_prod_wordtokens_type(specified_discourse_corpus): seg1 = 's' seg2 = 'ʃ' expected = {"-voc":0.0, "+voc,+high":0.863120568566631, "+voc,-high":0.9852281360342515, "#":0.0} env_list = [] expected_envs = {} for k, v in expected.items(): if k != '#': segs = specified_discourse_corpus.lexicon.features_to_segments(k) else: segs = k env = EnvironmentFilter(['s', 'ʃ'], None, [segs]) env_list.append(env) expected_envs[env] = v expected_envs["AVG"] = 0.9241743523004413 type_or_token = 'type' tier = 'transcription' with MostFrequentVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c: result = calc_prod(c, env_list, all_info=False) for k,v in result.items(): assert(expected_envs[k]-v < 0.001) with WeightedVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c: result = calc_prod(c, env_list, all_info=False) for k,v in result.items(): assert(expected_envs[k]-v < 0.001)
def test_prod_wordtokens_token(specified_discourse_corpus): seg1 = 's' seg2 = 'ʃ' expected = {"-voc":0.0, "+voc,+high":0.8631205, #0.9321115676166747, #Error!!!?!?!? "+voc,-high":0.9660096062568557, "#":0.0} env_list = [] expected_envs = {} for k, v in expected.items(): if k != '#': segs = specified_discourse_corpus.lexicon.features_to_segments(k) else: segs = k env = EnvironmentFilter(['s', 'ʃ'], None, [segs]) env_list.append(env) expected_envs[env] = v expected_envs["AVG"] = 0.9241743523004413 type_or_token = 'token' tier = 'transcription' with MostFrequentVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c: result = calc_prod(c, env_list) for k,v in result.items(): assert(expected_envs[k]-v < 0.001) type_or_token = 'token' tier = 'transcription' with WeightedVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c: result = calc_prod(c, env_list) for k,v in result.items(): assert(expected_envs[k]-v < 0.001)
def test_prod_token(specified_test_corpus): seg1 = 's' seg2 = 'ʃ' expected = { "-voc": 0.0, "+voc,+high": 0.9321115676166747, "+voc,-high": 0.9660096062568557, "#": 0.0 } env_list = [] expected_envs = {} for k, v in expected.items(): if k != '#': segs = specified_test_corpus.features_to_segments(k) else: segs = k env = EnvironmentFilter(['s', 'ʃ'], None, [segs]) env_list.append(env) expected_envs[env] = v expected_envs["AVG"] = 0.9241743523004413 type_or_token = 'token' tier = 'transcription' with CanonicalVariantContext(specified_test_corpus, tier, type_or_token) as c: result = calc_prod(c, env_list) for k, v in result.items(): assert (expected_envs[k] - v < 0.001)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'], frequency_threshold=kwargs['frequency_cutoff']) as c: try: envs = kwargs.pop('envs', None) for pair in kwargs['segment_pairs']: ordered_pair = pair if envs is not None: for env in envs: env.middle = set(pair) res = calc_prod(c, envs, kwargs['strict'], ordered_pair=ordered_pair, all_info=True, stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) else: res = calc_prod_all_envs( c, pair[0], pair[1], all_info=True, stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'], frequency_threshold = kwargs['frequency_cutoff']) as c: try: envs = kwargs.pop('envs', None) for pair in kwargs['segment_pairs']: ordered_pair = pair if envs is not None: for env in envs: env.middle = set(pair) res = calc_prod(c, envs, kwargs['strict'], ordered_pair = ordered_pair, all_info = True, stop_check = kwargs['stop_check'], call_back = kwargs['call_back']) else: res = calc_prod_all_envs(c, pair[0], pair[1], all_info = True, stop_check = kwargs['stop_check'], call_back = kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)