class TestNewTableCreation: """ Make sure you can create new tables after recieving new table names. """ def __init__(self): self.cls_initialized = False def setUp(self): assert not self.cls_initialized self.cls_initialized = True #self.path = os.path.abspath("") + "/GatherNews/gathernews/tests/" self.path = os.path.abspath("") + "/gathernews/tests/" self.capture_feeds = CaptureFeeds(self.path) def tearDown(self): assert self.cls_initialized self.cls_initialized = False def test_make_table_names(self): """ Parse an RSS link and create a table name """ # Set parameters RSS_link = "http://feeds.reuters.com/Reuters/worldNews" create_these_tables = [] # Run test assert_true(self.capture_feeds.make_table_names(RSS_link, create_these_tables)\ [0] == "ReutersWorldNews") def test_match_names(self): """ Make sure a SQL database table name can be matched to a table name used in an insert query. """ # Set parameters query_name = "ReutersWorldNews" # Run test assert_true(self.capture_feeds.match_names(query_name) == True) # Note that this method is actually part of the # PopulateExistingTables() test class. However, the file path # used for testing is shared with the TestNewTablesCreated2() # class so here we've arrived. def test_do_tables_exist(self): """ See if tables which should be created are not found when they do not exist The error should be raised here from read_file() """ tables = self.capture_feeds.do_tables_exist assert_false(tables()) def test_create_these_tables(self): """ Make sure an error is raised if no RSS links have been added to 'feeds_list.txt' """ # Set parameters current_feeds_list = [] previous_feeds_list = [] # empty list should never be returned here # Run test assert_raises(UserWarning, self.capture_feeds.create_these_tables, current_feeds_list, previous_feeds_list)
def do_everything2(path): """ load_db() in 0.2.0 achieves the same thing as the benchmark function populate_and_rm_dups() seen above """ start = time.time() capture_feeds = CaptureFeeds(path) capture_feeds.load_db() total = time.time() - start print "Elapsed time: ", total return total
def do_everything(path): start = time.time() capture_feeds = CaptureFeeds(path) capture_feeds.create_tables() capture_feeds.populate_db() capture_feeds.rm_duplicates() total = time.time() - start print "Elapsed time: ", total return total # database must be removed each time
class BenchmarkgRSS: def __init__(self): # File path to where "feeds_list.txt" is located file_path = "/home/tyler/code/GatherNews/gathernews/tests/" self.capture_feeds = CaptureFeeds(file_path) def benchmark_create_tables(self): """ Test create_tables() method as well as supporting methods get_RSS_link() get_tablenames() # do_tables_exist() -- this is not working """ start = time.time() self.capture_feeds.load_db() total = time.time() - start print "Elapsed time: ", total return total
def populate_and_rm_dups(path): start = time.time() capture_feeds = CaptureFeeds(path) capture_feeds.populate_db() capture_feeds.rm_duplicates() total = time.time() - start print "Elapsed time: ", total return total
def setUp(self): assert not self.cls_initialized self.cls_initialized = True self.path = os.path.abspath("") + "/GatherNews/gathernews/tests/" self.capture_feeds = CaptureFeeds(self.path) self.rm_garbage = FilterGarbage()
""" ======================================== Loading multiple RSS feeds into SQLite3 ======================================== An example showing how GatherNews can be used load news articles from RSS feeds into a database. This example allows you to create new tables and load a SQLite3 database with News from multiple RSS feeds. Feel free to contact me if you run into any problems. """ print(__doc__) # Author: Tyler Brown <*****@*****.**> # Import RSS feed capture class from gathernews.gRSS import CaptureFeeds # File path to where "feeds_list.txt" is located file_path = "/home/tyler/code/GatherNews/examples/" # Instantiate the class capture_feeds = CaptureFeeds(file_path) # Create tables, load database, remove duplicates capture_feeds.load_db()
def setUp(self): assert not self.cls_initialized self.cls_initialized = True #self.path = os.path.abspath("") + "/GatherNews/gathernews/tests/" self.path = os.path.abspath("") + "/gathernews/tests/" self.capture_feeds = CaptureFeeds(self.path)
def __init__(self): # File path to where "feeds_list.txt" is located file_path = "/home/tyler/code/GatherNews/gathernews/tests/" self.capture_feeds = CaptureFeeds(file_path)