from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker if __name__ == "__main__": #Set database up engine = create_engine('sqlite:///../../database/hegemone_dev.sqlite3', echo=False) db.setup_db(engine) Session = sessionmaker(bind=engine) session = Session() #Set repositories up website_repository = WebsiteRepository(session) word_count_repository = WordCountRepository(session) #Get websites websites = website_repository.get_all_websites() #Crawl websites for website_tuple in websites: raw_text = converter.get_text(website_tuple[1], 0) word_counts = WordFrequency.count_words(raw_text) #Store word counts in database website_id = website_tuple[0] for word in word_counts.keys(): word_count_repository.create_word_count(word, word_counts[word], website_id) session.commit()
class TestWebsiteRepository(unittest.TestCase): def setUp(self): engine = create_engine('sqlite:///:memory:', echo=False) db.setup_db(engine) Session = sessionmaker(bind=engine) self.session = Session() self.category_repository = CategoryRepository(self.session) self.website_repository = WebsiteRepository(self.session) def tearDown(self): self.session.close() def test_create_website(self): self.website_repository.create_website('www.wikipedia.com', 'english', 'test') all_result_websites = self.website_repository.get_all_websites() assert len(all_result_websites) == 1 assert all_result_websites[0][0] == 1 assert all_result_websites[0][1] == 'www.wikipedia.com' assert all_result_websites[0][2] == 'english' def test_create_three_websites(self): test_websites = [('www.wikipedia.com', 'english', 'english test'), ('www.wikipedia.it', 'italian', 'italiano'), ('www.simple.wikipedia.com', 'english', 'simple english')] for test_website in test_websites: self.website_repository.create_website(*test_website) all_result_websites = self.website_repository.get_all_websites() assert len(all_result_websites) == 3 for i in range(3): assert all_result_websites[i][0] == i+1 assert all_result_websites[i][1] == test_websites[i][0] assert all_result_websites[i][2] == test_websites[i][1] def test_get_website_by_category(self): website_id = self.website_repository.create_website('www.wikipedia.it', 'italian', 'italiano') category_id = self.category_repository.create_category('wiki') self.category_repository.add_website(category_id, website_id) all_result_websites = self.website_repository.get_websites_by_category(category_id) assert all_result_websites[0][0] == 1 assert all_result_websites[0][1] == 'www.wikipedia.it' assert all_result_websites[0][2] == 'italian' def test_get_website_id_by_search_term(self): website_id = self.website_repository.create_website( 'https://it.wikipedia.org/wiki/Lingua_italiana', 'italian', 'lingua italiana') self.session.commit() website_ids = self.website_repository.get_website_ids_by_title( 'lingua') assert len(website_ids) == 1 assert website_ids[0] == website_id
from dataAccessLayer.repositories.websiteRepository import WebsiteRepository from dataAccessLayer.helper import db from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker new_entries = [('https://de.wikipedia.org/wiki/Deutsche_Sprache', 'German', 'Deutsche Sprache'), ('https://it.wikipedia.org/wiki/Lingua_italiana', 'Italian', 'Lingua italiana')] if __name__ == "__main__": #Set database up engine = create_engine('sqlite:///../database/hegemone_dev.sqlite3', echo=False) db.setup_db(engine) Session = sessionmaker(bind=engine) session = Session() #Setup website repository up website_repository = WebsiteRepository(session) #Create websites print 'Creating website entries...' for new_entry in new_entries: website_repository.create_website(*new_entry) session.commit() print website_repository.get_all_websites()