def create_base(path, host, port, db_name): base = TextBase(host, port, db_name) for theme in os.listdir(path): docs_dir = os.path.join(path, theme) for doc in os.listdir(docs_dir): remove_transfer(os.path.join(docs_dir, doc)) base.append(doc, formalize(os.path.join(docs_dir, doc))) base._normalize() return base
def check_svm(self): host = self.pargs.host or MONGODB_BACKEND_SETTINGS['host'] port = self.pargs.port or MONGODB_BACKEND_SETTINGS['port'] name = self.pargs.name or MONGODB_BACKEND_SETTINGS['database'] from mysvm import SVM base = TextBase(host, port, name) input, target = base.to_lists() s = SVM(input, target) #testing from pymongo import Connection connection = Connection(host=host, port=port) db = connection[name] collection = db['test_collection2'] docs = collection.find() count = 0.0 for doc in docs: res = s.predict(base.to_list(doc)) if doc['_class_name'] == base.classes[int(res)]: count += 1 print count / docs.count() * 100