Пример #1
0
    return new


if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from corpus import iter_corpus, iter_test_corpus, readDataFile
    from predictor import PhraseSentimentPredictor

#    parser = argparse.ArgumentParser(description=__doc__)
#    parser.add_argument("filename")
#    config = parser.parse_args()
#    config = json.load(open(config.filename))
##
    start=time.time()
    predictor = PhraseSentimentPredictor()
#    print(iter_corpus())
    x_train,x_test,y_train,y_test = readDataFile()
    print("data reading finished")
#    print(x_test)
    predictor.fit(x_train,y_train)
    print("fitting takes "+str(time.time()-start))
    test = x_test
#    prediction = predictor.predict(test)
    score = predictor.score(test,y_test,'test')
    print("test score {}%".format(score * 100))
    print('programme finished!')
Пример #2
0
                    value = float(value)
                except ValueError:
                    pass
        new[key] = value
    return new


if __name__ == "__main__":
    import argparse
    import json
    import csv
    import sys

    from corpus import iter_corpus, iter_test_corpus
    from predictor import PhraseSentimentPredictor

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    start=time.time()
    predictor = PhraseSentimentPredictor(**config)
    predictor.fit(list(iter_corpus()))
    print "fitting takes "+str(time.time()-start)
    test = list(iter_test_corpus())
    #prediction = predictor.predict(test)
    score = predictor.score(test,'test')
    print("test score {}%".format(score * 100))
    print 'programme finished!'
Пример #3
0
    import csv, os
    from transformations import ExtractText

    if not os.path.exists('./data/vocabulary'):
        datapoints = list(iter_corpus())
        vocabulary = set()
        et = ExtractText()
        X = et.transform(datapoints)
        for datap in X:
            for w in datap.split():
                vocabulary.add(w.lower())
        vocabulary = list(vocabulary)
        vocabulary.sort()
        with open('./data/vocabulary', 'wb') as f:
            wr = csv.writer(f)
            for voc in vocabulary:
                wr.writerow([voc])

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("filename")
    config = parser.parse_args()
    config = json.load(open(config.filename))

    factory = lambda: PhraseSentimentPredictor(**config)
    factory()  # Run once to check config is ok

    report = PrintPartialCV()
    analyse(factory)

    print "Analysis finished!"