def test_deepreload(): "Test that dreload does deep reloads and skips excluded modules." with TemporaryDirectory() as tmpdir: with prepended_to_syspath(tmpdir): tmpdirpath = Path(tmpdir) with open(tmpdirpath / "A.py", "w") as f: f.write("class Object:\n pass\nok = True\n") with open(tmpdirpath / "B.py", "w") as f: f.write("import A\nassert A.ok, 'we are fine'\n") import A import B # Test that A is not reloaded. obj = A.Object() dreload(B, exclude=["A"]) assert isinstance(obj, A.Object) is True # Test that an import failure will not blow-up us. A.ok = False with pytest.raises(AssertionError, match="we are fine"): dreload(B, exclude=["A"]) assert len(modules_reloading) == 0 assert not A.ok # Test that A is reloaded. obj = A.Object() A.ok = False dreload(B) assert A.ok assert isinstance(obj, A.Object) is False
def run(train, test, language, answer): results = {} total = len(train) counter = 1 s = build_s(train, language) #s = {} # if language == 'English': # tagger = set_tagger(language) # else: tagger = None #tagger = set_tagger(language) stemmer = set_stemmer(language) for lexelt in train: train_features, y_train = extract_features(train[lexelt], language, tagger, stemmer, s[lexelt]) test_features, _ = extract_features(test[lexelt], language, tagger, stemmer, s[lexelt]) X_train, X_test = vectorize(train_features,test_features) X_train_new, X_test_new = feature_selection(X_train, X_test,y_train, language) results[lexelt] = classify(X_train_new, X_test_new,y_train) print str(counter) + ' out of ' + str(total) + ' completed' counter += 1 A.print_results(results, answer)
def classify(X_train, X_test, y_train): ''' Train the best classifier on (X_train, and y_train) then predict X_test labels :param X_train: A dictionary with the following structure { instance_id: [w_1 count, w_2 count, ...], ... } :param X_test: A dictionary with the following structure { instance_id: [w_1 count, w_2 count, ...], ... } :param y_train: A dictionary with the following structure { instance_id : sense_id } :return: results: a list of tuples (instance_id, label) where labels are predicted by the best classifier ''' # create x, y lists from training datas x_train_list, y_train_list = A.x_y_lists_from_training(X_train, y_train) # train svm print 'training svm...' svm_clf = svm.LinearSVC() svm_clf.fit(x_train_list, y_train_list) # predict svm results print 'predicting svm...' svm_results = A.predictions_from_data(svm_clf, X_test) return svm_results
def main(aligned_sents): ba = BerkeleyAligner(aligned_sents, 10) A.save_model_output(aligned_sents, ba, "ba.txt") avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print('Berkeley Aligner') print('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer))
def run(train, test, language, answer): results = {} if language == 'English': _POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle' tagger = load(_POS_TAGGER) elif language == 'Spanish': tagger = ut(cess_esp.tagged_sents()) elif language == 'Catalan': tagger = ut(cess_cat.tagged_sents()) for lexelt in train: train_features, y_train = extract_features(train[lexelt],language,tagger) test_features, _ = extract_features(test[lexelt],language,tagger) X_train, X_test = vectorize(train_features,test_features) X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) results[lexelt] = classify(X_train_new, X_test_new,y_train) """ B1.c for lexelt in train: features = getBestWords(train[lexelt], 30) train_features = countFeature(features, train[lexelt]) _, y_train = extract_features(train[lexelt], language) test_features = countFeature(features, test[lexelt]) X_train, X_test = vectorize(train_features, test_features) results[lexelt] = classify(X_train, X_test, y_train) B1.c """ A.print_results(results, answer)
def main(aligned_sents): ba = BerkeleyAligner(aligned_sents, 10) A.save_model_output(aligned_sents, ba, "ba.txt") avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print ('Berkeley Aligner') print ('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer))
def new(line): line = line.strip() if A.accept(line): return A.new(line) elif C.accept(line): return C.new(line) else: raise SyntaxError("Unknown instruction", (None, -1, 0, line))
def assertIO(self, input, output): stdout, stdin = sys.stdout, sys.stdin sys.stdout, sys.stdin = StringIO(), StringIO(input) A.resolve() sys.stdout.seek(0) out = sys.stdout.read()[:-1] sys.stdout, sys.stdin = stdout, stdin self.assertEqual(out, output)
def main(aligned_sents): time.clock() ba = BerkeleyAligner(aligned_sents, 10) A.save_model_output(aligned_sents, ba, "ba.txt") avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print ('Berkeley Aligner') print ('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer)) print "Part B time: " + str(time.clock()) + ' sec'
def main(aligned_sents): print 'training regular berkeley model' iters = 10 ba = BerkeleyAligner(aligned_sents, iters) A.save_model_output(aligned_sents, ba, "ba.txt") avg_aer = A.compute_avg_aer(aligned_sents, ba, 50, 'berk_errs.txt') print ('Berkeley Aligner') print ('iterations:' + str(iters)) print ('---------------------------') print('Average AER: {0:.3f}\n\n\n'.format(avg_aer))
def main(aligned_sents): ba = BerkeleyAligner(aligned_sents, 10) A.save_model_output(aligned_sents, ba, "ba.txt") avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) #Report aer for each sentence of first 20 sentences for i,aligned_sent in enumerate(aligned_sents[:20]): print "ba , aer of sentence "+str(i)+" "+str(A.compute_avg_aer([aligned_sent],ba,1)) print ('Berkeley Aligner') print ('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer))
def main(aligned_sents): time.clock() ba = BerkeleyAligner(aligned_sents, 10) A.save_model_output(aligned_sents, ba, "ba.txt") avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print('Berkeley Aligner') print('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer)) print print "Part B time: " + str(time.clock()) + ' sec'
def main(aligned_sents): t0 = time.time() print 'Starting Berkeley Aligner' ba = BerkeleyAligner(aligned_sents, 10) A.save_model_output(aligned_sents, ba, "ba.txt") avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print ('Berkeley Aligner') print ('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer)) t1 = time.time() print 'Total B Time: ' + str(t1 - t0)
def run(train, test, language, answer): results = {} for lexelt in train: train_features, y_train = extract_features(train[lexelt]) test_features, _ = extract_features(test[lexelt]) X_train, X_test = vectorize(train_features,test_features) X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) results[lexelt] = classify(X_train_new, X_test_new,y_train) A.print_results(results, answer)
def run(train, test, language, answer): results = {} for lexelt in train: train_features, y_train = extract_features(train[lexelt], language) test_features, _ = extract_features(test[lexelt], language) X_train, X_test = vectorize(train_features, test_features) X_train_new, X_test_new = feature_selection(X_train, X_test, y_train) results[lexelt] = classify(X_train_new, X_test_new, y_train) A.print_results(results, answer)
def testCode(self): x = A.a2('a2m-value', 'legal') self.assertEqual('a2m-value', x.a2member) self.assertEqual(B.bst.legal, x.a2b) myobj = B.b1(x, 'legal') self.assertEqual(myobj.a2elt, x) x2 = A.a2('anotherValue', 'legal') myobj.a2elt = x2 self.assertEqual('anotherValue', myobj.a2elt.a2member) self.assertEqual(B.bst.legal, myobj.a2elt.a2b)
def run(train, test, language, answer): results = {} for lexelt in train: train_features, y_train = extract_features(train[lexelt]) test_features, _ = extract_features(test[lexelt]) X_train, X_test = vectorize(train_features, test_features) X_train_new, X_test_new = feature_selection(X_train, X_test, y_train) results[lexelt] = classify(X_train_new, X_test_new, y_train) answer = answer + '-' + language # Courtesy Pushpendra pratap A.print_results(results, answer)
def testCode (self): x = A.a2('a2m-value', 'legal') self.assertEqual('a2m-value', x.a2member) self.assertEqual(B.bst.legal, x.a2b) myobj = B.b1(x, 'legal') self.assertEqual(myobj.a2elt, x) x2 = A.a2('anotherValue', 'legal') myobj.a2elt = x2 self.assertEqual('anotherValue', myobj.a2elt.a2member) self.assertEqual(B.bst.legal, myobj.a2elt.a2b)
def test_alter_entire_A(self, A_mock): def _altered_say_hi(): return AlterModule.ALTERED_MSG A_mock.say_hi = _altered_say_hi A_mock.say_goodbye.return_value = -1 self.assertEqual(A.say_hi(), AlterModule.ALTERED_MSG) # Module A does not have 'say_goodbye' but we add one to its mock which # makes it seem to have this method. self.assertEqual(A.say_goodbye(), -1) # Because the entire module `A` is patched, its original member class # `Foo` is replaced by the MagicMock class, too. self.assertIsInstance(A.Foo, mock.MagicMock)
def run(train, test, language, answer): results = {} #calc_high_frequency_words(train) print 'Calling A' s = A.build_s(train) for lexelt in train: train_features, y_train = extract_features(train[lexelt],language,lexelt,s[lexelt]) test_features, _ = extract_features(test[lexelt],language,lexelt,s[lexelt]) X_train, X_test = vectorize(train_features,test_features) X_train_new, X_test_new = feature_selection(X_train, X_test,y_train,language) results[lexelt] = classify(X_train_new, X_test_new,y_train) A.print_results(results, answer) print 'ended'
def run(train, test, language, answer): results = {} l = len(train) for i, lexelt in enumerate(train): sys.stdout.write('\r{} / {} ({}%)'.format(i, l, int(float(i) / l * 100))) sys.stdout.flush() train_features, y_train = extract_features(train[lexelt], language) test_features, _ = extract_features(test[lexelt], language) X_train, X_test = vectorize(train_features,test_features) X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) results[lexelt] = classify(X_train_new, X_test_new,y_train) A.print_results(results, answer)
def test_impor(self): import importlib import os import A A.a() os.rename('A.py', 'A_change.py') os.rename('B.py', 'A.py') importlib.reload(A) A.a() os.rename('A.py', 'B.py') os.rename('A_change.py', 'A.py')
def add_k_word_features_count_to_vector(vector, left_tokens, right_tokens, window_size, head=None): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size) for word in words: vector[word] = vector[word] + 1 if word in vector else 1 if head: vector[head] = 1
def calcular(self, tipo_algoritmo): #preferente por amplitud if tipo_algoritmo == 1: self.algoritmo = Preferente_amplitud(self.entrada, self.nodo_inicial, self.nodo_meta) camino = list(reversed(self.algoritmo.camino_final)) return camino elif tipo_algoritmo == 2: self.algoritmo = Costo_uniforme(self.entrada, self.nodo_inicial, self.nodo_meta) camino = list(reversed(self.algoritmo.camino_final)) return camino elif tipo_algoritmo == 3: self.algoritmo = Preferente_profundidad(self.entrada, self.nodo_inicial, self.nodo_meta) camino = list(reversed(self.algoritmo.camino_final)) return camino elif tipo_algoritmo == 4: self.algoritmo = Avara(self.entrada, self.nodo_inicial, self.nodo_meta) camino = list(reversed(self.algoritmo.camino_final)) return camino elif tipo_algoritmo == 5: self.algoritmo = A(self.entrada, self.nodo_inicial, self.nodo_meta) camino = list(reversed(self.algoritmo.camino_final)) return camino
def main(): path_in = './testcase.in' path_out = './result.out' N, K, c, v = _input(path_in) S, L, c, v = A.calc(N, K, c, v) di = similar_terms(c,v) _output(S, len(di), di, path_out)
def G(text): A = hidden_prompt_func if F else visible_prompt_func try: echo(text, nl=_B, err=C) return A('') except (KeyboardInterrupt, EOFError): if F: echo(_A, err=C) raise Abort()
def run(train, test, language, answer): results = {} if language == 'English': language = 'en' if language == 'Spanish': language = 'spa' if language == 'Catalan': language = 'cat' for lexelt in train: rel_dict = relevance(train[lexelt]) train_features, y_train = extract_features(train[lexelt], language, rel_dict=rel_dict) test_features, _ = extract_features(test[lexelt], language, rel_dict=rel_dict) X_train, X_test = vectorize(train_features,test_features) X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) results[lexelt] = classify(X_train_new, X_test_new,y_train) A.print_results(results, answer)
def main(): if len(sys.argv) != 7: print 'Usage: python main.py <input_training file> <input test file> <output KNN file> <output SVM file> <output best file> <language>' sys.exit(0) train_file = sys.argv[1] test_file = sys.argv[2] knn_answer = sys.argv[3] svm_answer = sys.argv[4] best_answer = sys.argv[5] language = sys.argv[6] train_set = parse_data(train_file) test_set = parse_data(test_file) A.run(train_set, test_set, language, knn_answer, svm_answer) B.run(train_set, test_set, language, best_answer)
def classify(X_train, X_test, y_train): ''' Train the best classifier on (X_train, and y_train) then predict X_test labels :param X_train: A dictionary with the following structure { instance_id: [w_1 count, w_2 count, ...], ... } :param X_test: A dictionary with the following structure { instance_id: [w_1 count, w_2 count, ...], ... } :param y_train: A dictionary with the following structure { instance_id : sense_id } :return: results: a list of tuples (instance_id, label) where labels are predicted by the best classifier ''' results = [] trainVectors, _, trainOutcomes = A.getFeatureVectors(X_train, y_train) testVectors, testKeys = A.getFeatureVectors(X_test) # Select Features svm_clf = svm.LinearSVC() selector = RFE(svm_clf, verbose=0, step=10) selector = selector.fit(trainVectors, trainOutcomes) featMask = selector.get_support() # Mask Features nItems = testVectors.shape[0] testVectorsNew = np.zeros((nItems, np.sum(featMask))) for k in range(nItems): testVectorsNew[k, :] = testVectors[k, :][featMask] model = selector.estimator_ svm_predict = model.predict(testVectorsNew) #svm_clf.fit(trainVectorsNew, trainOutcomes) #svm_predict = svm_clf.predict(testVectors) results = [(testKeys[k], svm_predict[k]) for k in range(len(testKeys))] return results
def run(train, test, language, answer): results = {} tagger = get_tagger(language) stemmer = get_stemmer(language) s = build_s(train, stemmer) for lexelt in train: words_count, senses_count = get_relavence_info(train[lexelt], stemmer) train_features, y_train = extract_features(train[lexelt], tagger, words_count, senses_count, stemmer, s[lexelt]) test_features, _ = extract_features(test[lexelt], tagger, words_count, senses_count, stemmer, s[lexelt]) X_train, X_test = vectorize(train_features, test_features) X_train_new, X_test_new, y_train_new, ids_test = feature_selection(X_train, X_test, y_train) results[lexelt] = classify(X_train_new, X_test_new, y_train_new, ids_test) A.print_results(results, answer)
def run(train, test, language, answer): results = {} s = build_s(train, language) for lexelt in train: feas_set = build_feas_set(train[lexelt], language) #feas_set = None train_features, y_train = extract_features(train[lexelt], language, feas_set, s[lexelt]) test_features, _ = extract_features(test[lexelt], language, feas_set, s[lexelt]) # print train_features X_train, X_test = vectorize(train_features,test_features) # print X_train # X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) # results[lexelt] = classify(X_train_new, X_test_new,y_train) results[lexelt] = classify(X_train, X_test,y_train) A.print_results(results, answer)
def main(aligned_sents): ba = BetterBerkeleyAligner(aligned_sents, 10) if ba.t is None: print "Better Berkeley Aligner Not Implemented" else: avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print ('Better Berkeley Aligner') print ('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer))
def echo_via_pager(text_or_generator, color=_A): B = color A = text_or_generator B = resolve_color_default(B) if inspect.isgeneratorfunction(A): C = A() elif isinstance(A, str): C = [A] else: C = iter(A) D = (A if isinstance(A, str) else str(A) for A in C) from ._termui_impl import pager return pager(itertools.chain(D, '\n'), B)
def main(aligned_sents): ba = BetterBerkeleyAligner(aligned_sents, 10) if ba.t is None: print "Better Berkeley Aligner Not Implemented" else: avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print('Better Berkeley Aligner') print('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer))
def run(train, test, language, answer): results = {} s = build_s(train, language) for lexelt in train: feas_set = build_feas_set(train[lexelt], language) #feas_set = None train_features, y_train = extract_features(train[lexelt], language, feas_set, s[lexelt]) test_features, _ = extract_features(test[lexelt], language, feas_set, s[lexelt]) # print train_features X_train, X_test = vectorize(train_features, test_features) # print X_train # X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) # results[lexelt] = classify(X_train_new, X_test_new,y_train) results[lexelt] = classify(X_train, X_test, y_train) A.print_results(results, answer)
def run(train, test, language, answer): print 'running B for language:', language results = {} if language.lower() in ['english', 'spanish']: extract_features.stemmer = nltk.SnowballStemmer(language.lower()) for lexelt in train: train_features, y_train = extract_features(train[lexelt], language=language) test_features, _ = extract_features(test[lexelt], language=language) X_train, X_test = vectorize(train_features,test_features) if language.lower() in ['english', 'spanish']: X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) else: X_train_new = X_train X_test_new = X_test results[lexelt] = classify(X_train_new, X_test_new,y_train) A.print_results(results, answer)
def main(aligned_sents): stemmed_sents = stem_input_sents(aligned_sents) ba = BetterBerkeleyAligner(stemmed_sents, 6) #6 if ba.t is None: print "Better Berkeley Aligner Not Implemented" else: avg_aer = A.compute_avg_aer(stemmed_sents, ba, 50, 'ec_errs.txt') print ('Better Berkeley Aligner') print ('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer))
def run(train, test, language, answer): results = {} stemmer, stop, tokenizer = stemming_and_stop_words( language) # Courtesy Pushpendra pratap for lexelt in train: train_features, y_train = extract_features( train[lexelt], stemmer, stop, tokenizer) # Courtesy Pushpendra pratap test_features, _ = extract_features( test[lexelt], stemmer, stop, tokenizer) # Courtesy Pushpendra pratap X_train, X_test = vectorize(train_features, test_features) X_train_new, X_test_new = feature_selection(X_train, X_test, y_train) results[lexelt] = classify(X_train_new, X_test_new, y_train) answer = answer + '-' + language # Courtesy Pushpendra pratap A.print_results(results, answer)
def test_deepreload(): "Test that dreload does deep reloads and skips excluded modules." with TemporaryDirectory() as tmpdir: with prepended_to_syspath(tmpdir): with open(os.path.join(tmpdir, "A.py"), "w") as f: f.write("class Object(object):\n pass\n") with open(os.path.join(tmpdir, "B.py"), "w") as f: f.write("import A\n") import A import B # Test that A is not reloaded. obj = A.Object() dreload(B, exclude=["A"]) nt.assert_true(isinstance(obj, A.Object)) # Test that A is reloaded. obj = A.Object() dreload(B) nt.assert_false(isinstance(obj, A.Object))
def test_alter_A_Foo(self, Foo_mock): # Because `new` is not specified for `patch`, `target` is replaced with # a `MagicMock` object and is passed in as `Foo_mock` so the following # "isinstance" check can succeed. self.assertIsInstance(A.Foo, mock.MagicMock) # Always remember that `A.Foo` is a `MagicMock` now and we can't change # this fact. Therefore, instantiating it is the same as instantiating # `MagicMock`, and you will get another `MagicMock` object. self.assertIsInstance(A.Foo(), mock.MagicMock) # The modification to `Foo_mock` is applied to this `MagicMock` class # itself, not to the instances that are instantiated from it. Therefore, # the following `introduce` method is only accessible via `A.Foo` the # class, not via `A.Foo()`. Foo_mock.introduce.return_value = "Hello, my name is Alice." self.assertEqual(A.Foo.introduce(), "Hello, my name is Alice.") self.assertIsInstance(A.Foo().introduce(), mock.MagicMock) # Because we only patch `Foo`, `A.say_hi` is not affected. self.assertEqual(A.say_hi(), A.ORIGINAL_HI_MSG)
def main(aligned_sents): time.clock() ba = BetterBerkeleyAligner(aligned_sents, 10) if ba.t is None: print "Better Berkeley Aligner Not Implemented" else: avg_aer = A.compute_avg_aer(aligned_sents, ba, 50) print ('Better Berkeley Aligner') print ('---------------------------') print('Average AER: {0:.3f}\n'.format(avg_aer)) print "Part EC time: " + str(time.clock()) + ' sec'
def test_deepreload(): "Test that dreload does deep reloads and skips excluded modules." with TemporaryDirectory() as tmpdir: with prepended_to_syspath(tmpdir): tmpdirpath = Path(tmpdir) with open(tmpdirpath / "A.py", "w") as f: f.write("class Object(object):\n pass\n") with open(tmpdirpath / "B.py", "w") as f: f.write("import A\n") import A import B # Test that A is not reloaded. obj = A.Object() dreload(B, exclude=["A"]) assert isinstance(obj, A.Object) is True # Test that A is reloaded. obj = A.Object() dreload(B) assert isinstance(obj, A.Object) is False
def run(train, test, language, answer): print 'running B for language:', language results = {} if language.lower() in ['english', 'spanish']: extract_features.stemmer = nltk.SnowballStemmer(language.lower()) for lexelt in train: train_features, y_train = extract_features(train[lexelt], language=language) test_features, _ = extract_features(test[lexelt], language=language) X_train, X_test = vectorize(train_features, test_features) if language.lower() in ['english', 'spanish']: X_train_new, X_test_new = feature_selection( X_train, X_test, y_train) else: X_train_new = X_train X_test_new = X_test results[lexelt] = classify(X_train_new, X_test_new, y_train) A.print_results(results, answer)
def run(train, test, language, answer): results = {} # tag_and_save(train, test, language) # load cached POS tags # print 'loading cached pos tags...' # train_name = language + '-train.p' # test_name = language + '-test.p' # train_pos_tags = pickle.load(open(train_name, 'rb')) # test_pos_tags = pickle.load(open(test_name, 'rb')) tagger = None if POS_WINDOW > 0 or POS_HEAD or FORCE_TAGGER_USE: tagger = UniversalTagger.EnglishTagger() if language is 'Spanish': tagger = UniversalTagger.SpanishTagger() if language is 'Catalan': tagger = UniversalTagger.CatalanTagger() stemmer = None if STEM: stemmer = PorterStemmer() for lexelt in train: relevance_key = None if USE_RELEVANCY_SCORES: relevance_key = top_relevant_words_from_data(train[lexelt]) train_features, y_train = extract_features(train[lexelt], tagger, stemmer, relevance_key) test_features, _ = extract_features(test[lexelt], tagger, stemmer, relevance_key) X_train, X_test = vectorize(train_features,test_features) X_train_new, X_test_new = feature_selection(X_train, X_test,y_train) results[lexelt] = classify(X_train_new, X_test_new,y_train) A.print_results(results, answer)
def coherencia_radio(cluster): """ Function: coherencia_radio Descrp: Calcula la coherencia de un cluster por el metodo del radio. Args: -> cluster: Lista de instancias que forman el cluster. Return: -> Valor de coherencia """ c = A.get_centroide(cluster) # maximo de los radios (todas las instancias al centroide) return max([distance.euclidean(i,c) for i in cluster])
def add_k_word_features_to_vector(vector, left_tokens, right_tokens, window_size, head=None): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size) mid = len(words)/2 left = words[:mid] right = words[mid:] for idx, word in enumerate(left): key = 'w_b' + str(len(left) - idx) vector[key] = word for idx, word in enumerate(right): key = 'w_a' + str(idx+1) vector[key] = word if head: key = 'w_head' vector[key] = head
def test_regression(self): from random import random as rnd from A import solve from time import time SIZE = 1000 # size of the regression (iterations) t1 = time() for _ in xrange(SIZE): # int(rnd() * (upper_bound - lower_bound) + lower_bound) array = [(int(rnd() * (100 - 0)) + 0) for _ in xrange(int(rnd() * (100 - 1) + 1))] true = A.solve(array) try: self.assertEqual(true, hack1(array)) except: print array exit() t2 = time() print "regressionOK : "+str(t2 - t1)
def main(num): '''获取整个数据([]中的所有东西,为完整格式)''' url = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579578460&enterid=1579578460&from=groupmessage&isappinstalled=0' res = request.urlopen(url) soup = BeautifulSoup(res, "html.parser") js = soup.findAll('script', attrs={'id': 'getAreaStat'}) ans = js[0].text head = ans.index('[') tail = ans[::-1].index(']') tail = len(ans) - tail text = ans[head:tail] #text = text.replace(":"," : ") '''替换关键字''' title = ["cityName", "confirmedCount", "curedCount", "deadCount"] see = ["省级行政区", "发现病例", "已治愈", "已死亡", "市(县)"] dictnaru = dict(zip(title, see)) li = text.split('{') li = A.sholi(text) ll = sorted( li, key=lambda x: Pinyin().get_pinyin(x.provinceName).split('-')[0]) '''获取大量数据以便实现数据可视化''' Time = time.strftime("%Y-%m-%d-%H", time.localtime()) print("已创建%s的文件" % (Time)) path_p = 'C:\\Users\\HIKKI\\Desktop\\province\\' path_c = 'C:\\Users\\HIKKI\\Desktop\\city\\' name = str(num) + '.txt' f_p = open(path_p + name, 'w', encoding='utf-8') f_c = open(path_c + name, 'w', encoding='utf-8') f_p.write(Time + '\n') f_c.write(Time + '\n') f_p.write(see[0] + ' ' + see[1] + ' ' + see[2] + ' ' + see[3] + '\n') f_c.write(see[4] + ' ' + see[1] + ' ' + see[2] + ' ' + see[3] + '\n') for i in ll: f_p.write(i.provinceName + ' ' + i.conformCount + ' ' + i.curedCount + ' ' + i.deadCount + '\n') for i in ll[0].city: for j in i: f_c.write(i[j] + ' ') f_c.write('\n') f_c.close() f_p.close() pass
def coherencia_promedio(clustering): """ Function: coherencia_promedio Descrp: Calcula la coherencia de un conjunto de clusters por la formula SUM(dist(c,i)^2)/N Args: -> clustering: Lista de clusters. Return: -> Valor de coherencia """ suma = 0 num = 0 for clu in clustering: c = A.get_centroide(clustering[clu]) for i in clustering[clu]: d = distance.euclidean(c,i) suma += d*d num +=1 return suma/(num*1.0)
def test_patch_B_say_hi(self): B_mock = mock.MagicMock(return_value="Hey, B here!") # NOTE(ywen): You can specify the full reference to the mocking target. with mock.patch(target="__main__.A.B.say_hi", new=B_mock) as m1: self.assertIs(m1, B_mock) self.assertEqual(A.say_hi(), A.ORIGINAL_HI_MSG) self.assertEqual(A.introduce_B(), "Hey, B here!") self.assertEqual(A.say_hi_for_C(), C.ORIGINAL_HI_MSG) # NOTE(ywen): You can specify the partial reference to the mocking # target. with mock.patch(target="B.say_hi", new=B_mock) as m2: self.assertIs(m2, B_mock) self.assertEqual(A.say_hi(), A.ORIGINAL_HI_MSG) self.assertEqual(A.introduce_B(), "Hey, B here!") self.assertEqual(A.say_hi_for_C(), C.ORIGINAL_HI_MSG)
def runTests(): fileList = getInputFiles() listoflists = [] no_test_case = 1 for file_name in fileList: sublist = [no_test_case] if (no_test_case - 1) // 5 == 0: sublist.append("easy") elif (no_test_case - 1) // 5 == 1: sublist.append("moderate") else: sublist.append("hard") puzzle = extract_puzzle(file_name) a = algoA.Sudoku(puzzle) a.solve() sublist.extend([a.time, a.count]) b = algoB.Sudoku(puzzle) b.solve() sublist.extend([b.time, b.count]) c = algoC.Sudoku(puzzle) c.solve() sublist.extend([c.time, c.count]) d = algoD.Sudoku(puzzle) d.solve() sublist.extend([d.time, d.count]) e = algoE.Sudoku(puzzle) e.solve() sublist.extend([e.time, e.count]) listoflists.append(sublist) no_test_case += 1 return listoflists
def comp_move(self): if self.comp == 'A': move = A.minimax(self) elif self.comp == 'B': vals = {} nodes = [] highest = 0 best = () moveset = self.valid_moves('W') for move in moveset: n = tree.Node(self, 3, 'W', move[0], move[1], moveset) # default depth level 3 nodes.append(n) for n in nodes: ret = B.alphabeta(n, 3, -10000, 10000, 'W') x, y = n.get_x(), n.get_y() vals[(x,y)] = ret if ret > highest: highest = ret best = (x,y) return best else: move = C.master(self) return move
def add_k_word_POS_features_to_vector(vector, left_tokens, right_tokens, window_size, tagger, head_tag=None): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size) mid = len(words)/2 left = words[:mid] right = words[mid:] left_tagged = tagger.tag(left) right_tagged = tagger.tag(right) for idx, (word, tag) in enumerate(left_tagged): key = 'pos_b' + str(len(left_tagged) - idx) vector[key] = tag for idx, (word, tag) in enumerate(right_tagged): key = 'pos_a' + str(idx+1) vector[key] = tag # add POS tag for head if head_tag: key = 'pos_head' word, tag = head_tag[0] vector[key] = tag
def add_synonym_counts(tagger, left_tokens, right_tokens, vector, window): words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window) for w in words: tagged = tagger.tag([w]) word, tag = tagged[0] tag = wordnet_tag_from_penn_tag(tag) synonyms = wordnet.synsets(w, pos=tag) for synset in synonyms: if ADD_SYNONYMS: name = synset.name() vector[name] = vector[name]+1 if name in vector else 1 if ADD_HYPONYMS: for idx, hypo in enumerate(synset.hyponyms()): name = hypo.name() vector[name] = vector[name]+1 if name in vector else 1 if ADD_HYPERNYMS: for idx, hypper in enumerate(synset.hypernyms()): name = hypper.name() vector[name] = vector[name]+1 if name in vector else 1
def test_alter_A_2(self): def _say_goodbye(): return "Goodbye!" # When patching the module, we can set 'create' to 'True' so it creates # the attribute that doesn't exist in the target module. In the example # below, an 'AttributeError' is thrown if 'create' is 'False'. with mock.patch( target='__main__.A.say_goodbye', new=_say_goodbye, create=True, ) as m: # The mocked target is returned as `m`. In this case, it is the # `_say_goodbye` function. self.assertEqual(m, _say_goodbye) self.assertEqual(A.say_goodbye(), "Goodbye!") # Because `create` is False, `mock.patch` will raise `AttributeError` # because `A` does not have `say_goodbye`. This helps prevent typos. attribute_error_raised = False try: # NOTE(ywen): `mock.patch` doesn't raise `AttributeError` unless # evaluated as a context manager. In other words, calling # `self.assertRaises(AttributeError, mock.patch, target=...)` or # `mock.patch(target=...)` won't raise `AttributeError`. That's why # I can't use `self.assertRaises` to test the behavior. with mock.patch( target='__main__.A.say_goodbye', new=_say_goodbye, create=False, ): self.fail("AttributeError is not raised.") except AttributeError: attribute_error_raised = True self.assertTrue(attribute_error_raised)
senseid = '' # if train then parse sense, if test then senseid = '' try: senseid = inst.getElementsByTagName('answer')[0].getAttribute('senseid') senseid = replace_accented(senseid).encode('ascii') except: senseid = '' data[lexelt].append((instance_id, left, head, right, senseid)) return data if __name__ == '__main__': if len(sys.argv) != 7: print 'Usage: python main.py <input_training file> <input test file> <output KNN file> <output SVM file> <output best file> <language>' sys.exit(0) train_file = sys.argv[1] test_file = sys.argv[2] knn_answer = sys.argv[3] svm_answer = sys.argv[4] best_answer = sys.argv[5] language = sys.argv[6] train_set = parse_data(train_file) test_set = parse_data(test_file) A.run(train_set, test_set, language, knn_answer, svm_answer) B.run(train_set, test_set, language, best_answer)