def get_entries(self, n=None): ''' Returns n or all (if n=None) tuples with entries consisting of phone number, number of entries with this number, and comment. ''' # get the entries by scraping webpages (PHONE_SITE points to the URL) with urllib.request.urlopen(scraper.PHONE_SITE) as response: html = response.read() entries = scraper.Parser(html).parse() # store entries in the DB self._insert_db_entries(entries) # depending on the number of requested entries (n), return 60 of them # or first n of them. Go to the database for additional results, # if necessary entries_length = len(entries) if n is None: self._get_db_entries() # _get_db_entries() sets n to 60 by default return self.entries elif n <= entries_length: return entries[:n] elif n > entries_length: warn_msg = 'Asking to display %d results, ' +\ 'but only %d results found on the page. ' +\ 'Looking for more results in the database.' logging.warning(warn_msg % (n, entries_length)) self._get_db_entries(n) return self.entries
def get_all_entries(self): parser = scraper.Parser(scraper.ValidUAOpener().open(scraper.PHONE_SITE).read()) entries = parser.parse() self.insert_entries(entries) try: con = lite.connect('numbers.db') rows = None with con: cur = con.cursor() cur.execute('SELECT * FROM Numbers ORDER BY date;') rows = cur.fetchall() entries = [] for row in rows: entries[len(entries):] = [scraper.PhoneNumberEntry(row[0], row[1], row[2])] except Exception as e: print('Failed to connect to numbers.db', e) raise if con: con.close() return entries
def __init__(self, names, target_dir, template_name, max_page=2): self.names = names self.target_dir = target_dir self.template_name = template_name self.max_page = max_page self._parser = scraper.Parser()
def get_entries(self, n=None): parser = scraper.Parser(scraper.ValidUAOpener().open(scraper.PHONE_SITE).read()) entries = parser.parse() self.insert_entries(entries) if n is None: return entries elif n < len(entries): return entries[:n] else: return self.get_db_entries(n)