def _insert_show(self): assert not self.exists() show = Show(name=self.title, id=self.number, last_updated=datetime.now()) show.name = strip_4_bytes(show.name) show.save() assert self.exists()
def save(self): assert not Show.exists(self.number) show = Show(id=self.number, name=self.title, last_updated=datetime.now()) show.save() topics = [e for e in self.list_div.findChildren(recursive=False)] re_outline = re.compile('outline') for t in topics: topic_item = t.findChild('li', {'class': re_outline}) notes = topic_item.findChildren( 'ul', {'class': re_outline}, recursive=False) if notes is None or len(notes) == 0: continue topic_name = topic_item.text[:topic_item.text.index( topic_item.findChild().text)] topic = ensure_topic_exists(topic_name) notes = topic_item.findChildren( 'ul', {'class': re_outline}, recursive=False) # print topic_name # print len(notes) for n in notes: note_item = n.findChild( 'li', {'class': re.compile('outline')}) if note_item.findChild() is None: continue note_title = note_item.text[:note_item.text.index( note_item.findChild().text)] note = Note(show=show, topic=topic, title=note_title) note.save() contents = note_item.findChildren( 'ul', {'class': re_outline}) text = [] for content in contents: link = content.findChild('a') if link: UrlEntry(note=note, text=link.text, url=link['href']).save() else: text.append(content.text) TextEntry(note=note, text='<br>'.join(text).replace('\n', '')).save()
def save(self): assert not Show.exists(self.number) show = Show(id=self.number, name=self.title, last_updated=datetime.now()) show.save() topics = [e for e in self.list_div.findChildren(recursive=False)] re_outline = re.compile('outline') for t in topics: topic_item = t.findChild('li', {'class': re_outline}) notes = topic_item.findChildren('ul', {'class': re_outline}, recursive=False) if notes is None or len(notes) == 0: continue topic_name = topic_item.text[:topic_item.text. index(topic_item.findChild().text)] topic = ensure_topic_exists(topic_name) notes = topic_item.findChildren('ul', {'class': re_outline}, recursive=False) # print topic_name # print len(notes) for n in notes: note_item = n.findChild('li', {'class': re.compile('outline')}) if note_item.findChild() is None: continue note_title = note_item.text[:note_item.text. index(note_item.findChild().text)] note = Note(show=show, topic=topic, title=note_title) note.save() contents = note_item.findChildren('ul', {'class': re_outline}) text = [] for content in contents: link = content.findChild('a') if link: UrlEntry(note=note, text=link.text, url=link['href']).save() else: text.append(content.text) TextEntry(note=note, text='<br>'.join(text).replace('\n', '')).save()
def save(self): assert not Show.exists(self.number) show = Show(id=self.number, name=self.title, last_updated=datetime.now()) show.save() topics = [ e for e in self.list_div.childGenerator() if hasattr(e, 'findChild') ] i = 0 while i < len(topics): outline_list = topics[i + 1].findAll('div', {'class': 'divOutlineList'}) if outline_list is None or len(outline_list) <= 1: i += 1 continue topic_name = topics[i].text topic = ensure_topic_exists(topic_name) notes = outline_list[0].findChildren('p', recursive=False) for note_div in notes: next_sibling = note_div.findNextSibling() if next_sibling is None or next_sibling.name != u'div': continue note = Note(show=show, topic=topic, title=note_div.text) note.save() contents = note_div.findNextSibling('div').findChildren( '', {'class': 'divOutlineItem'}) text = [] for content in contents: link = content.findChild('a') if link: UrlEntry(note=note, text=link.text, url=link['href']).save() else: text.append(content.text) TextEntry(note=note, text='<br>'.join(text).replace('\n', '')).save() i += 2
def save(self): assert not Show.exists(self.number) show = Show(id=self.number, name=self.title, last_updated=datetime.now()) show.save() topics = [e for e in self.list_div.childGenerator() if hasattr(e, 'findChild')] i = 0 while i < len(topics): outline_list = topics[i + 1].findAll( 'div', {'class': 'divOutlineList'}) if outline_list is None or len(outline_list) <= 1: i += 1 continue topic_name = topics[i].text topic = ensure_topic_exists(topic_name) notes = outline_list[0].findChildren('p', recursive=False) for note_div in notes: next_sibling = note_div.findNextSibling() if next_sibling is None or next_sibling.name != u'div': continue note = Note(show=show, topic=topic, title=note_div.text) note.save() contents = note_div.findNextSibling('div').findChildren( '', {'class': 'divOutlineItem'}) text = [] for content in contents: link = content.findChild('a') if link: UrlEntry(note=note, text=link.text, url=link['href']).save() else: text.append(content.text) TextEntry(note=note, text='<br>'.join(text).replace('\n', '')).save() i += 2
def set_up_database(): show = Show(id=1, name='show name', last_updated=datetime.now()) show.save() for i in xrange(1, 4): Topic(id=i, name='topic name').save() topic = Topic.objects.get(id=1) note = Note(id=1, show=show, topic=topic, title='note 1') note.save() TextEntry(note=note, text='text entry 1').save() TextEntry(note=note, text='text entry 2').save() UrlEntry(id=1, note=note, text='url text 1', url='http://foo.com').save() UrlEntry(id=2, note=note, text='url text 2', url='http://bar.com').save() UrlEntry(id=3, note=note, text='url text 3', url='http://baz.com').save() note = Note(id=2, show=show, topic=topic, title='note 2') note.save() TextEntry(note=note, text='text entry 3').save() UrlEntry(id=4, note=note, text='url text 4', url='http://foo.com').save() note = Note(id=3, show=show, topic=topic, title='note 3') note.save()
def html_getter(*show_number): show_number = show_number[0] direct_cutoff = SHOWNOTE_MID_CUTOFF main_url = 'http://{}.nashownotes.com' shownote_url = 'http://{}.nashownotes.com/shownotes' for number in reversed(xrange(SHOWNOTE_CUTOFF, show_number + 1)): if number in BAD_LIST: continue if not Show.exists(number): if ShowSource.exists(number): HTML_LIST.append(number) continue if number < direct_cutoff: url = shownote_url.format(number) else: url = main_url.format(number) try: text = netutils.get_html(url) except: print('Error loading html from: {}'.format(url)) continue # check for opml links opml_links = netutils.get_links_to('^http://.*\.opml$', text) if len(opml_links) > 0: assert(len(opml_links) == 1) try: opml = netutils.get_html(opml_links[0]) except: print('Error loading opml from: {}'.format( opml_links[0])) continue print(' -> {} opml'.format(number)) ShowSource(filetype=ShowSource.OPML, text=opml, show_number=number).save() HTML_LIST.append(number) else: print(' -> {} html'.format(number)) ShowSource(filetype=ShowSource.HTML, text=text, show_number=number).save() HTML_LIST.append(number) time.sleep(0.01) else: print('Show {} already imported'.format(number))
def test_show_to_unicode(self): show = Show(id=7, name='showname', last_updated=datetime.now()) self.assertEqual(unicode(show), u'7: showname')
def test_no_show_exists(self): self.assertIs(Show.exists(1), False)
def test_show_exists(self): Show(id=7, name='showname', last_updated=datetime.now()).save() self.assertIs(Show.exists(7), True)