Пример #1
0
def test_deepreload():
    "Test that dreload does deep reloads and skips excluded modules."
    with TemporaryDirectory() as tmpdir:
        with prepended_to_syspath(tmpdir):
            tmpdirpath = Path(tmpdir)
            with open(tmpdirpath / "A.py", "w") as f:
                f.write("class Object:\n    pass\nok = True\n")
            with open(tmpdirpath / "B.py", "w") as f:
                f.write("import A\nassert A.ok, 'we are fine'\n")
            import A
            import B

            # Test that A is not reloaded.
            obj = A.Object()
            dreload(B, exclude=["A"])
            assert isinstance(obj, A.Object) is True

            # Test that an import failure will not blow-up us.
            A.ok = False
            with pytest.raises(AssertionError, match="we are fine"):
                dreload(B, exclude=["A"])
            assert len(modules_reloading) == 0
            assert not A.ok

            # Test that A is reloaded.
            obj = A.Object()
            A.ok = False
            dreload(B)
            assert A.ok
            assert isinstance(obj, A.Object) is False
Пример #2
0
def run(train, test, language, answer):
    results = {}

    total = len(train)
    counter = 1

    s = build_s(train, language)
    #s = {}

    # if language == 'English':
    #     tagger = set_tagger(language)
    # else:
    tagger = None
    #tagger = set_tagger(language)
    stemmer = set_stemmer(language)

    for lexelt in train:
        train_features, y_train = extract_features(train[lexelt], language, tagger, stemmer, s[lexelt])
        test_features, _ = extract_features(test[lexelt], language, tagger, stemmer, s[lexelt])

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train, language)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)

        print str(counter) + ' out of ' + str(total) + ' completed'
        counter += 1

    A.print_results(results, answer)
Пример #3
0
def classify(X_train, X_test, y_train):
    '''
    Train the best classifier on (X_train, and y_train) then predict X_test labels

    :param X_train: A dictionary with the following structure
            { instance_id: [w_1 count, w_2 count, ...],
            ...
            }

    :param X_test: A dictionary with the following structure
            { instance_id: [w_1 count, w_2 count, ...],
            ...
            }

    :param y_train: A dictionary with the following structure
            { instance_id : sense_id }

    :return: results: a list of tuples (instance_id, label) where labels are predicted by the best classifier
    '''

# create x, y lists from training datas

    x_train_list, y_train_list = A.x_y_lists_from_training(X_train, y_train)

    # train svm
    print 'training svm...'
    svm_clf = svm.LinearSVC()
    svm_clf.fit(x_train_list, y_train_list)

    # predict svm results
    print 'predicting svm...'
    svm_results = A.predictions_from_data(svm_clf, X_test)

    return svm_results
Пример #4
0
def main(aligned_sents):
    ba = BerkeleyAligner(aligned_sents, 10)
    A.save_model_output(aligned_sents, ba, "ba.txt")
    avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)
    print('Berkeley Aligner')
    print('---------------------------')
    print('Average AER: {0:.3f}\n'.format(avg_aer))
Пример #5
0
def run(train, test, language, answer):
    results = {}
    if language == 'English':
        _POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
        tagger = load(_POS_TAGGER)
    elif language == 'Spanish':
        tagger = ut(cess_esp.tagged_sents())
    elif language == 'Catalan':
        tagger  = ut(cess_cat.tagged_sents())

    for lexelt in train:

        train_features, y_train = extract_features(train[lexelt],language,tagger)
        test_features, _ = extract_features(test[lexelt],language,tagger)

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)
    """
    B1.c
    for lexelt in train:
        features = getBestWords(train[lexelt], 30)
        train_features = countFeature(features, train[lexelt])
        _, y_train = extract_features(train[lexelt], language)
        test_features = countFeature(features, test[lexelt])

        X_train, X_test = vectorize(train_features, test_features)
        results[lexelt] = classify(X_train, X_test, y_train)
    B1.c
    """
    A.print_results(results, answer)
Пример #6
0
def main(aligned_sents):
    ba = BerkeleyAligner(aligned_sents, 10)
    A.save_model_output(aligned_sents, ba, "ba.txt")
    avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)
    print ('Berkeley Aligner')
    print ('---------------------------')
    print('Average AER: {0:.3f}\n'.format(avg_aer))
Пример #7
0
def new(line):
    line = line.strip()
    if A.accept(line):
        return A.new(line)
    elif C.accept(line):
        return C.new(line)
    else:
        raise SyntaxError("Unknown instruction", (None, -1, 0, line))
Пример #8
0
 def assertIO(self, input, output):
     stdout, stdin = sys.stdout, sys.stdin
     sys.stdout, sys.stdin = StringIO(), StringIO(input)
     A.resolve()
     sys.stdout.seek(0)
     out = sys.stdout.read()[:-1]
     sys.stdout, sys.stdin = stdout, stdin
     self.assertEqual(out, output)
Пример #9
0
def new(line):
    line = line.strip()
    if A.accept(line):
        return A.new(line)
    elif C.accept(line):
        return C.new(line)
    else:
        raise SyntaxError("Unknown instruction", (None, -1, 0, line))
Пример #10
0
def main(aligned_sents):
    time.clock()
    ba = BerkeleyAligner(aligned_sents, 10)
    A.save_model_output(aligned_sents, ba, "ba.txt")
    avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)

    print ('Berkeley Aligner')
    print ('---------------------------')
    print('Average AER: {0:.3f}\n'.format(avg_aer))
    print "Part B time: " + str(time.clock()) + ' sec'
Пример #11
0
def main(aligned_sents):
    print 'training regular berkeley model'
    iters = 10
    ba = BerkeleyAligner(aligned_sents, iters)
    A.save_model_output(aligned_sents, ba, "ba.txt")
    avg_aer = A.compute_avg_aer(aligned_sents, ba, 50, 'berk_errs.txt')

    print ('Berkeley Aligner')
    print ('iterations:' + str(iters))
    print ('---------------------------')
    print('Average AER: {0:.3f}\n\n\n'.format(avg_aer))
Пример #12
0
Файл: B.py Проект: actondong/NLP
def main(aligned_sents):
    ba = BerkeleyAligner(aligned_sents, 10)
    A.save_model_output(aligned_sents, ba, "ba.txt")
    avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)
    #Report aer for each sentence of first 20 sentences
    for i,aligned_sent in enumerate(aligned_sents[:20]):
	print "ba , aer of sentence "+str(i)+" "+str(A.compute_avg_aer([aligned_sent],ba,1))

    print ('Berkeley Aligner')
    print ('---------------------------')
    print('Average AER: {0:.3f}\n'.format(avg_aer))
Пример #13
0
def main(aligned_sents):
    time.clock()
    ba = BerkeleyAligner(aligned_sents, 10)
    A.save_model_output(aligned_sents, ba, "ba.txt")
    avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)

    print('Berkeley Aligner')
    print('---------------------------')
    print('Average AER: {0:.3f}\n'.format(avg_aer))
    print
    print "Part B time: " + str(time.clock()) + ' sec'
Пример #14
0
def main(aligned_sents):
    t0 = time.time()
    print 'Starting Berkeley Aligner'
    ba = BerkeleyAligner(aligned_sents, 10)
    A.save_model_output(aligned_sents, ba, "ba.txt")
    avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)

    print ('Berkeley Aligner')
    print ('---------------------------')
    print('Average AER: {0:.3f}\n'.format(avg_aer))
    t1 = time.time()
    print 'Total B Time: ' + str(t1 - t0)
Пример #15
0
Файл: B.py Проект: jpgard/NLP
def run(train, test, language, answer):
    results = {}

    for lexelt in train:

        train_features, y_train = extract_features(train[lexelt])
        test_features, _ = extract_features(test[lexelt])

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)

    A.print_results(results, answer)
Пример #16
0
def run(train, test, language, answer):
    results = {}

    for lexelt in train:

        train_features, y_train = extract_features(train[lexelt], language)
        test_features, _ = extract_features(test[lexelt], language)

        X_train, X_test = vectorize(train_features, test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test, y_train)
        results[lexelt] = classify(X_train_new, X_test_new, y_train)

    A.print_results(results, answer)
Пример #17
0
    def testCode(self):

        x = A.a2('a2m-value', 'legal')
        self.assertEqual('a2m-value', x.a2member)
        self.assertEqual(B.bst.legal, x.a2b)

        myobj = B.b1(x, 'legal')
        self.assertEqual(myobj.a2elt, x)

        x2 = A.a2('anotherValue', 'legal')
        myobj.a2elt = x2
        self.assertEqual('anotherValue', myobj.a2elt.a2member)
        self.assertEqual(B.bst.legal, myobj.a2elt.a2b)
Пример #18
0
def run(train, test, language, answer):
    results = {}

    for lexelt in train:
        train_features, y_train = extract_features(train[lexelt])
        test_features, _ = extract_features(test[lexelt])

        X_train, X_test = vectorize(train_features, test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test, y_train)
        results[lexelt] = classify(X_train_new, X_test_new, y_train)

    answer = answer + '-' + language  # Courtesy Pushpendra pratap
    A.print_results(results, answer)
Пример #19
0
    def testCode (self):

        x = A.a2('a2m-value', 'legal')
        self.assertEqual('a2m-value', x.a2member)
        self.assertEqual(B.bst.legal, x.a2b)

        myobj = B.b1(x, 'legal')
        self.assertEqual(myobj.a2elt, x)

        x2 = A.a2('anotherValue', 'legal')
        myobj.a2elt = x2
        self.assertEqual('anotherValue', myobj.a2elt.a2member)
        self.assertEqual(B.bst.legal, myobj.a2elt.a2b)
Пример #20
0
    def test_alter_entire_A(self, A_mock):
        def _altered_say_hi():
            return AlterModule.ALTERED_MSG

        A_mock.say_hi = _altered_say_hi
        A_mock.say_goodbye.return_value = -1
        self.assertEqual(A.say_hi(), AlterModule.ALTERED_MSG)
        # Module A does not have 'say_goodbye' but we add one to its mock which
        # makes it seem to have this method.
        self.assertEqual(A.say_goodbye(), -1)

        # Because the entire module `A` is patched, its original member class
        # `Foo` is replaced by the MagicMock class, too.
        self.assertIsInstance(A.Foo, mock.MagicMock)
Пример #21
0
def run(train, test, language, answer):
    results = {}
    #calc_high_frequency_words(train)
    print 'Calling A'
    s = A.build_s(train)
    for lexelt in train:

        train_features, y_train = extract_features(train[lexelt],language,lexelt,s[lexelt])
        test_features, _ = extract_features(test[lexelt],language,lexelt,s[lexelt])

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train,language)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)
    A.print_results(results, answer)
    print 'ended'
Пример #22
0
def run(train, test, language, answer):
    results = {}
    l = len(train)
    for i, lexelt in enumerate(train):
        sys.stdout.write('\r{} / {} ({}%)'.format(i, l, int(float(i) / l * 100)))
        sys.stdout.flush()

        train_features, y_train = extract_features(train[lexelt], language)
        test_features, _ = extract_features(test[lexelt], language)

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)

    A.print_results(results, answer)
Пример #23
0
def run(train, test, language, answer):
    results = {}
    l = len(train)
    for i, lexelt in enumerate(train):
        sys.stdout.write('\r{} / {} ({}%)'.format(i, l, int(float(i) / l * 100)))
        sys.stdout.flush()

        train_features, y_train = extract_features(train[lexelt], language)
        test_features, _ = extract_features(test[lexelt], language)

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)

    A.print_results(results, answer)
Пример #24
0
    def test_impor(self):
        import importlib
        import os
        import A

        A.a()

        os.rename('A.py', 'A_change.py')
        os.rename('B.py', 'A.py')

        importlib.reload(A)

        A.a()

        os.rename('A.py', 'B.py')
        os.rename('A_change.py', 'A.py')
Пример #25
0
def add_k_word_features_count_to_vector(vector, left_tokens, right_tokens, window_size, head=None):
    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size)
    for word in words:
        vector[word] = vector[word] + 1 if word in vector else 1

    if head:
        vector[head] = 1
Пример #26
0
    def calcular(self, tipo_algoritmo):

        #preferente por amplitud
        if tipo_algoritmo == 1:
            self.algoritmo = Preferente_amplitud(self.entrada,
                                                 self.nodo_inicial,
                                                 self.nodo_meta)
            camino = list(reversed(self.algoritmo.camino_final))
            return camino
        elif tipo_algoritmo == 2:
            self.algoritmo = Costo_uniforme(self.entrada, self.nodo_inicial,
                                            self.nodo_meta)
            camino = list(reversed(self.algoritmo.camino_final))
            return camino

        elif tipo_algoritmo == 3:
            self.algoritmo = Preferente_profundidad(self.entrada,
                                                    self.nodo_inicial,
                                                    self.nodo_meta)
            camino = list(reversed(self.algoritmo.camino_final))
            return camino
        elif tipo_algoritmo == 4:
            self.algoritmo = Avara(self.entrada, self.nodo_inicial,
                                   self.nodo_meta)
            camino = list(reversed(self.algoritmo.camino_final))
            return camino
        elif tipo_algoritmo == 5:
            self.algoritmo = A(self.entrada, self.nodo_inicial, self.nodo_meta)
            camino = list(reversed(self.algoritmo.camino_final))
            return camino
Пример #27
0
def main():
    path_in = './testcase.in'
    path_out = './result.out'

    N, K, c, v = _input(path_in)
    S, L, c, v = A.calc(N, K, c, v)
    di = similar_terms(c,v)
    _output(S, len(di), di, path_out)
Пример #28
0
 def G(text):
     A = hidden_prompt_func if F else visible_prompt_func
     try:
         echo(text, nl=_B, err=C)
         return A('')
     except (KeyboardInterrupt, EOFError):
         if F: echo(_A, err=C)
         raise Abort()
Пример #29
0
def run(train, test, language, answer):
    results = {}

    if language == 'English': language = 'en'
    if language == 'Spanish': language = 'spa'
    if language == 'Catalan': language = 'cat'

    for lexelt in train:
        rel_dict = relevance(train[lexelt])
        train_features, y_train = extract_features(train[lexelt], language, rel_dict=rel_dict)
        test_features, _ = extract_features(test[lexelt], language, rel_dict=rel_dict)

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)

    A.print_results(results, answer)
Пример #30
0
def main():
    if len(sys.argv) != 7:
        print 'Usage: python main.py <input_training file> <input test file> <output KNN file> <output SVM file> <output best file> <language>'
        sys.exit(0)

    train_file = sys.argv[1]
    test_file = sys.argv[2]
    knn_answer = sys.argv[3]
    svm_answer = sys.argv[4]
    best_answer = sys.argv[5]
    language = sys.argv[6]

    train_set = parse_data(train_file)
    test_set = parse_data(test_file)

    A.run(train_set, test_set, language, knn_answer, svm_answer)
    B.run(train_set, test_set, language, best_answer)
Пример #31
0
def classify(X_train, X_test, y_train):
    '''
    Train the best classifier on (X_train, and y_train) then predict X_test labels

    :param X_train: A dictionary with the following structure
            { instance_id: [w_1 count, w_2 count, ...],
            ...
            }

    :param X_test: A dictionary with the following structure
            { instance_id: [w_1 count, w_2 count, ...],
            ...
            }

    :param y_train: A dictionary with the following structure
            { instance_id : sense_id }

    :return: results: a list of tuples (instance_id, label) where labels are predicted by the best classifier
    '''

    results = []

    trainVectors, _, trainOutcomes = A.getFeatureVectors(X_train, y_train)
    testVectors, testKeys = A.getFeatureVectors(X_test)

    # Select Features
    svm_clf = svm.LinearSVC()
    selector = RFE(svm_clf, verbose=0, step=10)
    selector = selector.fit(trainVectors, trainOutcomes)
    featMask = selector.get_support()

    # Mask Features
    nItems = testVectors.shape[0]
    testVectorsNew = np.zeros((nItems, np.sum(featMask)))
    for k in range(nItems):
        testVectorsNew[k, :] = testVectors[k, :][featMask]

    model = selector.estimator_
    svm_predict = model.predict(testVectorsNew)
    #svm_clf.fit(trainVectorsNew, trainOutcomes)
    #svm_predict = svm_clf.predict(testVectors)

    results = [(testKeys[k], svm_predict[k]) for k in range(len(testKeys))]

    return results
Пример #32
0
Файл: B.py Проект: keyu-lai/NLP
def run(train, test, language, answer):
    results = {}

    tagger = get_tagger(language)
    stemmer = get_stemmer(language)
    s = build_s(train, stemmer)

    for lexelt in train:

        words_count, senses_count = get_relavence_info(train[lexelt], stemmer)
        train_features, y_train = extract_features(train[lexelt], tagger, words_count, senses_count, stemmer, s[lexelt])
        test_features, _ = extract_features(test[lexelt], tagger, words_count, senses_count, stemmer, s[lexelt])

        X_train, X_test = vectorize(train_features, test_features)
        X_train_new, X_test_new, y_train_new, ids_test = feature_selection(X_train, X_test, y_train)
        results[lexelt] = classify(X_train_new, X_test_new, y_train_new, ids_test)

    A.print_results(results, answer)
def run(train, test, language, answer):
    results = {}
    s = build_s(train, language)

    for lexelt in train:

        feas_set = build_feas_set(train[lexelt], language)
        #feas_set = None
        train_features, y_train = extract_features(train[lexelt], language, feas_set, s[lexelt])
        test_features, _ = extract_features(test[lexelt], language, feas_set, s[lexelt])
    #    print train_features
        X_train, X_test = vectorize(train_features,test_features)
    #    print X_train
    #    X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
    #    results[lexelt] = classify(X_train_new, X_test_new,y_train)
        results[lexelt] = classify(X_train, X_test,y_train)

    A.print_results(results, answer)
Пример #34
0
Файл: EC.py Проект: jxWho/h4
def main(aligned_sents):
    ba = BetterBerkeleyAligner(aligned_sents, 10)
    if ba.t is None:
        print "Better Berkeley Aligner Not Implemented"
    else:
        avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)

        print ('Better Berkeley Aligner')
        print ('---------------------------')
        print('Average AER: {0:.3f}\n'.format(avg_aer))
Пример #35
0
def echo_via_pager(text_or_generator, color=_A):
    B = color
    A = text_or_generator
    B = resolve_color_default(B)
    if inspect.isgeneratorfunction(A): C = A()
    elif isinstance(A, str): C = [A]
    else: C = iter(A)
    D = (A if isinstance(A, str) else str(A) for A in C)
    from ._termui_impl import pager
    return pager(itertools.chain(D, '\n'), B)
Пример #36
0
Файл: EC.py Проект: bdqnghi/NLP
def main(aligned_sents):
    ba = BetterBerkeleyAligner(aligned_sents, 10)
    if ba.t is None:
        print "Better Berkeley Aligner Not Implemented"
    else:
        avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)

        print('Better Berkeley Aligner')
        print('---------------------------')
        print('Average AER: {0:.3f}\n'.format(avg_aer))
def run(train, test, language, answer):
    results = {}
    s = build_s(train, language)

    for lexelt in train:

        feas_set = build_feas_set(train[lexelt], language)
        #feas_set = None
        train_features, y_train = extract_features(train[lexelt], language,
                                                   feas_set, s[lexelt])
        test_features, _ = extract_features(test[lexelt], language, feas_set,
                                            s[lexelt])
        #    print train_features
        X_train, X_test = vectorize(train_features, test_features)
        #    print X_train
        #    X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        #    results[lexelt] = classify(X_train_new, X_test_new,y_train)
        results[lexelt] = classify(X_train, X_test, y_train)

    A.print_results(results, answer)
Пример #38
0
Файл: B.py Проект: Alexoner/mooc
def run(train, test, language, answer):
    print 'running B for language:', language
    results = {}
    if language.lower() in ['english', 'spanish']:
        extract_features.stemmer = nltk.SnowballStemmer(language.lower())

    for lexelt in train:

        train_features, y_train = extract_features(train[lexelt], language=language)
        test_features, _ = extract_features(test[lexelt], language=language)

        X_train, X_test = vectorize(train_features,test_features)
        if language.lower() in ['english', 'spanish']:
            X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        else:
            X_train_new = X_train
            X_test_new = X_test
        results[lexelt] = classify(X_train_new, X_test_new,y_train)

    A.print_results(results, answer)
Пример #39
0
def main(aligned_sents):
    stemmed_sents = stem_input_sents(aligned_sents)
    ba = BetterBerkeleyAligner(stemmed_sents, 6) #6
    if ba.t is None:
        print "Better Berkeley Aligner Not Implemented"
    else:
        avg_aer = A.compute_avg_aer(stemmed_sents, ba, 50, 'ec_errs.txt')

        print ('Better Berkeley Aligner')
        print ('---------------------------')
        print('Average AER: {0:.3f}\n'.format(avg_aer))
Пример #40
0
def run(train, test, language, answer):
    results = {}
    stemmer, stop, tokenizer = stemming_and_stop_words(
        language)  # Courtesy Pushpendra pratap

    for lexelt in train:
        train_features, y_train = extract_features(
            train[lexelt], stemmer, stop,
            tokenizer)  # Courtesy Pushpendra pratap
        test_features, _ = extract_features(
            test[lexelt], stemmer, stop,
            tokenizer)  # Courtesy Pushpendra pratap

        X_train, X_test = vectorize(train_features, test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test, y_train)
        results[lexelt] = classify(X_train_new, X_test_new, y_train)

    answer = answer + '-' + language  # Courtesy Pushpendra pratap

    A.print_results(results, answer)
Пример #41
0
def test_deepreload():
    "Test that dreload does deep reloads and skips excluded modules."
    with TemporaryDirectory() as tmpdir:
        with prepended_to_syspath(tmpdir):
            with open(os.path.join(tmpdir, "A.py"), "w") as f:
                f.write("class Object(object):\n    pass\n")
            with open(os.path.join(tmpdir, "B.py"), "w") as f:
                f.write("import A\n")
            import A
            import B

            # Test that A is not reloaded.
            obj = A.Object()
            dreload(B, exclude=["A"])
            nt.assert_true(isinstance(obj, A.Object))

            # Test that A is reloaded.
            obj = A.Object()
            dreload(B)
            nt.assert_false(isinstance(obj, A.Object))
Пример #42
0
    def test_alter_A_Foo(self, Foo_mock):
        # Because `new` is not specified for `patch`, `target` is replaced with
        # a `MagicMock` object and is passed in as `Foo_mock` so the following
        # "isinstance" check can succeed.
        self.assertIsInstance(A.Foo, mock.MagicMock)

        # Always remember that `A.Foo` is a `MagicMock` now and we can't change
        # this fact. Therefore, instantiating it is the same as instantiating
        # `MagicMock`, and you will get another `MagicMock` object.
        self.assertIsInstance(A.Foo(), mock.MagicMock)

        # The modification to `Foo_mock` is applied to this `MagicMock` class
        # itself, not to the instances that are instantiated from it. Therefore,
        # the following `introduce` method is only accessible via `A.Foo` the
        # class, not via `A.Foo()`.
        Foo_mock.introduce.return_value = "Hello, my name is Alice."
        self.assertEqual(A.Foo.introduce(), "Hello, my name is Alice.")
        self.assertIsInstance(A.Foo().introduce(), mock.MagicMock)

        # Because we only patch `Foo`, `A.say_hi` is not affected.
        self.assertEqual(A.say_hi(), A.ORIGINAL_HI_MSG)
Пример #43
0
def main(aligned_sents):
    time.clock()
    ba = BetterBerkeleyAligner(aligned_sents, 10)
    if ba.t is None:
        print "Better Berkeley Aligner Not Implemented"
    else:
        avg_aer = A.compute_avg_aer(aligned_sents, ba, 50)

        print ('Better Berkeley Aligner')
        print ('---------------------------')
        print('Average AER: {0:.3f}\n'.format(avg_aer))
        print "Part EC time: " + str(time.clock()) + ' sec'
Пример #44
0
def test_deepreload():
    "Test that dreload does deep reloads and skips excluded modules."
    with TemporaryDirectory() as tmpdir:
        with prepended_to_syspath(tmpdir):
            tmpdirpath = Path(tmpdir)
            with open(tmpdirpath / "A.py", "w") as f:
                f.write("class Object(object):\n    pass\n")
            with open(tmpdirpath / "B.py", "w") as f:
                f.write("import A\n")
            import A
            import B

            # Test that A is not reloaded.
            obj = A.Object()
            dreload(B, exclude=["A"])
            assert isinstance(obj, A.Object) is True

            # Test that A is reloaded.
            obj = A.Object()
            dreload(B)
            assert isinstance(obj, A.Object) is False
Пример #45
0
Файл: B.py Проект: Alexoner/mooc
def run(train, test, language, answer):
    print 'running B for language:', language
    results = {}
    if language.lower() in ['english', 'spanish']:
        extract_features.stemmer = nltk.SnowballStemmer(language.lower())

    for lexelt in train:

        train_features, y_train = extract_features(train[lexelt],
                                                   language=language)
        test_features, _ = extract_features(test[lexelt], language=language)

        X_train, X_test = vectorize(train_features, test_features)
        if language.lower() in ['english', 'spanish']:
            X_train_new, X_test_new = feature_selection(
                X_train, X_test, y_train)
        else:
            X_train_new = X_train
            X_test_new = X_test
        results[lexelt] = classify(X_train_new, X_test_new, y_train)

    A.print_results(results, answer)
Пример #46
0
def run(train, test, language, answer):
    results = {}

    # tag_and_save(train, test, language)

    # load cached POS tags
    # print 'loading cached pos tags...'
    # train_name = language + '-train.p'
    # test_name = language + '-test.p'
    # train_pos_tags = pickle.load(open(train_name, 'rb'))
    # test_pos_tags = pickle.load(open(test_name, 'rb'))

    tagger = None
    if POS_WINDOW > 0 or POS_HEAD or FORCE_TAGGER_USE:
        tagger = UniversalTagger.EnglishTagger()
        if language is 'Spanish':
            tagger = UniversalTagger.SpanishTagger()

        if language is 'Catalan':
            tagger = UniversalTagger.CatalanTagger()

    stemmer = None
    if STEM:
        stemmer = PorterStemmer()

    for lexelt in train:
        relevance_key = None
        if USE_RELEVANCY_SCORES:
            relevance_key = top_relevant_words_from_data(train[lexelt])

        train_features, y_train = extract_features(train[lexelt], tagger, stemmer, relevance_key)
        test_features, _ = extract_features(test[lexelt], tagger, stemmer, relevance_key)

        X_train, X_test = vectorize(train_features,test_features)
        X_train_new, X_test_new = feature_selection(X_train, X_test,y_train)
        results[lexelt] = classify(X_train_new, X_test_new,y_train)

    A.print_results(results, answer)
Пример #47
0
def coherencia_radio(cluster):
    """
    Function: coherencia_radio
    Descrp: Calcula la coherencia de un cluster por el
    metodo del radio.
    Args:
    -> cluster: Lista de instancias que forman el cluster.
    Return:
    -> Valor de coherencia
    """

    c = A.get_centroide(cluster)
    # maximo de los radios (todas las instancias al centroide)
    return max([distance.euclidean(i,c) for i in cluster])
Пример #48
0
def add_k_word_features_to_vector(vector, left_tokens, right_tokens, window_size, head=None):
    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size)
    mid = len(words)/2
    left = words[:mid]
    right = words[mid:]
    for idx, word in enumerate(left):
        key = 'w_b' + str(len(left) - idx)
        vector[key] = word

    for idx, word in enumerate(right):
        key = 'w_a' + str(idx+1)
        vector[key] = word

    if head:
        key = 'w_head'
        vector[key] = head
Пример #49
0
 def test_regression(self):
     from random import random as rnd
     from A import solve
     from time import time
     SIZE = 1000 # size of the regression (iterations)
     t1 = time()
     for _ in xrange(SIZE):
         # int(rnd() * (upper_bound - lower_bound) + lower_bound)
         array = [(int(rnd() * (100 - 0)) + 0) for _ in xrange(int(rnd() * (100 - 1) + 1))]
         true = A.solve(array)
         try:
             self.assertEqual(true, hack1(array))
         except:
             print array
             exit()
     t2 = time()
     print "regressionOK : "+str(t2 - t1)
Пример #50
0
def main(num):
    '''获取整个数据([]中的所有东西,为完整格式)'''
    url = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579578460&enterid=1579578460&from=groupmessage&isappinstalled=0'
    res = request.urlopen(url)
    soup = BeautifulSoup(res, "html.parser")
    js = soup.findAll('script', attrs={'id': 'getAreaStat'})
    ans = js[0].text
    head = ans.index('[')
    tail = ans[::-1].index(']')
    tail = len(ans) - tail
    text = ans[head:tail]
    #text = text.replace(":"," : ")
    '''替换关键字'''
    title = ["cityName", "confirmedCount", "curedCount", "deadCount"]
    see = ["省级行政区", "发现病例", "已治愈", "已死亡", "市(县)"]
    dictnaru = dict(zip(title, see))

    li = text.split('{')

    li = A.sholi(text)
    ll = sorted(
        li, key=lambda x: Pinyin().get_pinyin(x.provinceName).split('-')[0])
    '''获取大量数据以便实现数据可视化'''

    Time = time.strftime("%Y-%m-%d-%H", time.localtime())
    print("已创建%s的文件" % (Time))
    path_p = 'C:\\Users\\HIKKI\\Desktop\\province\\'
    path_c = 'C:\\Users\\HIKKI\\Desktop\\city\\'
    name = str(num) + '.txt'
    f_p = open(path_p + name, 'w', encoding='utf-8')
    f_c = open(path_c + name, 'w', encoding='utf-8')
    f_p.write(Time + '\n')
    f_c.write(Time + '\n')
    f_p.write(see[0] + ' ' + see[1] + ' ' + see[2] + ' ' + see[3] + '\n')
    f_c.write(see[4] + ' ' + see[1] + ' ' + see[2] + ' ' + see[3] + '\n')
    for i in ll:
        f_p.write(i.provinceName + ' ' + i.conformCount + ' ' + i.curedCount +
                  ' ' + i.deadCount + '\n')
    for i in ll[0].city:
        for j in i:
            f_c.write(i[j] + ' ')
        f_c.write('\n')

    f_c.close()
    f_p.close()
    pass
Пример #51
0
def coherencia_promedio(clustering):
    """
    Function: coherencia_promedio
    Descrp: Calcula la coherencia de un conjunto de clusters por la
    formula SUM(dist(c,i)^2)/N
    Args:
    -> clustering: Lista de clusters.
    Return:
    -> Valor de coherencia
    """


    suma = 0
    num = 0
    for clu in clustering:
        c = A.get_centroide(clustering[clu])
        for i in clustering[clu]:
            d = distance.euclidean(c,i)
            suma += d*d
            num +=1
    return suma/(num*1.0)
Пример #52
0
    def test_patch_B_say_hi(self):
        B_mock = mock.MagicMock(return_value="Hey, B here!")

        # NOTE(ywen): You can specify the full reference to the mocking target.
        with mock.patch(target="__main__.A.B.say_hi", new=B_mock) as m1:
            self.assertIs(m1, B_mock)
            self.assertEqual(A.say_hi(), A.ORIGINAL_HI_MSG)
            self.assertEqual(A.introduce_B(), "Hey, B here!")
            self.assertEqual(A.say_hi_for_C(), C.ORIGINAL_HI_MSG)

        # NOTE(ywen): You can specify the partial reference to the mocking
        # target.
        with mock.patch(target="B.say_hi", new=B_mock) as m2:
            self.assertIs(m2, B_mock)
            self.assertEqual(A.say_hi(), A.ORIGINAL_HI_MSG)
            self.assertEqual(A.introduce_B(), "Hey, B here!")
            self.assertEqual(A.say_hi_for_C(), C.ORIGINAL_HI_MSG)
Пример #53
0
def runTests():
    fileList = getInputFiles()
    listoflists = []
    no_test_case = 1
    for file_name in fileList:
        sublist = [no_test_case]
        if (no_test_case - 1) // 5 == 0:
            sublist.append("easy")
        elif (no_test_case - 1) // 5 == 1:
            sublist.append("moderate")
        else:
            sublist.append("hard")

        puzzle = extract_puzzle(file_name)

        a = algoA.Sudoku(puzzle)
        a.solve()
        sublist.extend([a.time, a.count])

        b = algoB.Sudoku(puzzle)
        b.solve()
        sublist.extend([b.time, b.count])

        c = algoC.Sudoku(puzzle)
        c.solve()
        sublist.extend([c.time, c.count])

        d = algoD.Sudoku(puzzle)
        d.solve()
        sublist.extend([d.time, d.count])

        e = algoE.Sudoku(puzzle)
        e.solve()
        sublist.extend([e.time, e.count])

        listoflists.append(sublist)
        no_test_case += 1
    return listoflists
Пример #54
0
 def comp_move(self):
   if self.comp == 'A':
     move = A.minimax(self)
   elif self.comp == 'B':
     vals = {}
     nodes = []
     highest = 0
     best = ()
     moveset = self.valid_moves('W')
     for move in moveset:
       n = tree.Node(self, 3, 'W', move[0], move[1], moveset) # default depth level 3
       nodes.append(n)
     for n in nodes:
       ret = B.alphabeta(n, 3, -10000, 10000, 'W')
       x, y = n.get_x(), n.get_y()
       vals[(x,y)] = ret
       if ret > highest:
         highest = ret
         best = (x,y)
     return best
   else:
     move = C.master(self)
   return move
Пример #55
0
def add_k_word_POS_features_to_vector(vector, left_tokens, right_tokens, window_size, tagger, head_tag=None):

    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window_size)
    mid = len(words)/2
    left = words[:mid]
    right = words[mid:]

    left_tagged = tagger.tag(left)
    right_tagged = tagger.tag(right)

    for idx, (word, tag) in enumerate(left_tagged):
        key = 'pos_b' + str(len(left_tagged) - idx)
        vector[key] = tag

    for idx, (word, tag) in enumerate(right_tagged):
        key = 'pos_a' + str(idx+1)
        vector[key] = tag

    # add POS tag for head
    if head_tag:
        key = 'pos_head'
        word, tag = head_tag[0]
        vector[key] = tag
Пример #56
0
def add_synonym_counts(tagger, left_tokens, right_tokens, vector, window):
    words = A.k_nearest_words_vector_from_tokens(left_tokens, right_tokens, window)

    for w in words:
        tagged = tagger.tag([w])
        word, tag = tagged[0]
        tag = wordnet_tag_from_penn_tag(tag)
        synonyms = wordnet.synsets(w, pos=tag)
        for synset in synonyms:

            if ADD_SYNONYMS:
                name = synset.name()
                vector[name] = vector[name]+1 if name in vector else 1

            if ADD_HYPONYMS:
                for idx, hypo in enumerate(synset.hyponyms()):
                    name = hypo.name()
                    vector[name] = vector[name]+1 if name in vector else 1

            if ADD_HYPERNYMS:
                for idx, hypper in enumerate(synset.hypernyms()):
                    name = hypper.name()
                    vector[name] = vector[name]+1 if name in vector else 1
Пример #57
0
    def test_alter_A_2(self):
        def _say_goodbye():
            return "Goodbye!"

        # When patching the module, we can set 'create' to 'True' so it creates
        # the attribute that doesn't exist in the target module. In the example
        # below, an 'AttributeError' is thrown if 'create' is 'False'.
        with mock.patch(
                target='__main__.A.say_goodbye',
                new=_say_goodbye,
                create=True,
        ) as m:
            # The mocked target is returned as `m`. In this case, it is the
            # `_say_goodbye` function.
            self.assertEqual(m, _say_goodbye)
            self.assertEqual(A.say_goodbye(), "Goodbye!")

        # Because `create` is False, `mock.patch` will raise `AttributeError`
        # because `A` does not have `say_goodbye`. This helps prevent typos.
        attribute_error_raised = False
        try:
            # NOTE(ywen): `mock.patch` doesn't raise `AttributeError` unless
            # evaluated as a context manager. In other words, calling
            # `self.assertRaises(AttributeError, mock.patch, target=...)` or
            # `mock.patch(target=...)` won't raise `AttributeError`. That's why
            # I can't use `self.assertRaises` to test the behavior.
            with mock.patch(
                    target='__main__.A.say_goodbye',
                    new=_say_goodbye,
                    create=False,
            ):
                self.fail("AttributeError is not raised.")
        except AttributeError:
            attribute_error_raised = True

        self.assertTrue(attribute_error_raised)
Пример #58
0
            senseid = ''

            # if train then parse sense, if test then senseid = ''
            try:
                senseid = inst.getElementsByTagName('answer')[0].getAttribute('senseid')
                senseid = replace_accented(senseid).encode('ascii')
            except:
                senseid = ''
            data[lexelt].append((instance_id, left, head, right, senseid))

    return data


if __name__ == '__main__':
    if len(sys.argv) != 7:
        print 'Usage: python main.py <input_training file> <input test file> <output KNN file> <output SVM file> <output best file> <language>'
        sys.exit(0)

    train_file = sys.argv[1]
    test_file = sys.argv[2]
    knn_answer = sys.argv[3]
    svm_answer = sys.argv[4]
    best_answer = sys.argv[5]
    language = sys.argv[6]

    train_set = parse_data(train_file)
    test_set = parse_data(test_file)

    A.run(train_set, test_set, language, knn_answer, svm_answer)
    B.run(train_set, test_set, language, best_answer)