def fds(self): self.subject = 'feminisme' # self.title = 'fds '+ findTextBetweenTag (self.text, 'title').lower() self.title = 'fds ' + self.title self.text = findTextBetweenTag(self.text, 'article') self.author = findTextBetweenTag(self.text, 'li') d = self.index('<article>') + 9 d = self.index('<', d) self.text = self.text[d:] self.replace('<div>') self.replace('</div>')
def unisciel(self, subject): self.subject = 'cours' self.author = 'unisciel' self.text = findTextBetweenTag(self.text, 'body') self.cleanWeb() self.delScript() d = self.index('<td>') + 4 f = self.index('</td>', d) self.title = self.text[d:f].lower() d = self.index('<p>') f = self.rindex('</p>') + 4 self.text = self.text[d:f] self.replace('<div>') self.replace('</div>') self.replace('<img', '</p><img') self.replace("png'>", "png'><p>") self.replace('<p><p>', '<p>') self.replace('</p></p>', '</p>') self.replace('<p>', '</p><p>') self.replace('</p>', '</p><p>') self.replace('<p></p>') self.replace('<p><p>', '<p>') self.replace('<p><img', '<img') self.replace("png'></p>", "png'>") self.text = self.text[4:-3] self.replace('../res/', 'bv-' + subject + '/') self.replace("</p><img src='bv-" + subject + "/apprendre_ch2_01.png'><p>", ' <b>ADP +P --> ATP</b> ') self.replace( "</p><img src='bv-" + subject + "/apprendre_ch2_01_1.png'><p>", ' <b>ATP --> ADP +P</b> ') self.styles.append('unisciel.css')
def aooo(self, subject=None): if 'This work could have adult content. If you proceed you have agreed that you are willing to see such content' in self.text: print('fichier protégé', self.title) return self.cleanWeb() self.replace('<br>', '</p><p>') self.text = findTextBetweenTag(self.text, 'body') # le titre d = self.index('<h2>') + 4 f = self.index('</h2>', d) self.title = self.text[d:f] self.title = self.title.strip() self.title = self.title.strip('.') # l'auteur et sa page d = self.index("<a href='/users/", f) + 9 f = self.index("'", d) self.autlink = self.text[d:f] d = self.index('>', d) + 1 f = self.index('</a>', d) self.author = self.text[d:f] # le sujet self.findSubject(subject) if self.subject == 'histoire': d = self.index('Additional Tags:<ul>') + 24 d = self.index('>', d) + 1 f = self.index('<', d) self.subject = self.text[d:f] # le texte ne compte qu'un seul chapître d = self.index('<h3>Work Text:</h3>') + 19 # le texte compte plusieurs chapîtres if d == 18: d = self.index("<h3><a href='/works/") f = self.rindex('<h3>Actions</h3>') self.text = self.text[d:f] self.replace('<div>') self.replace('</div>') self.replace('<h3>Chapter Text</h3>') if self.contain("<h3><a href='/works/"): chapters = List() chapters.fromText("<h3><a href='/works/", self.text) chapterRange = chapters.range(1) for c in chapterRange: d = chapters[c].find('>') + 1 chapters[c] = chapters[c][d:] self.text = '<h2>'.join(chapters) self.replace('</a></h3>', '</h2>') if self.contain('<h2>Chapter ') and not self.contain('</h2>'): chapters = List() chapters.fromText('<h2>Chapter ', self.text) chapterRange = chapters.range(1) for c in chapterRange: d = chapters[c].find('</a>: ') + 6 chapters[c] = chapters[c][d:] chapters[c] = chapters[c].replace('</h3>', '</h2>', 1) self.text = '<h2>'.join(chapters) # nettoyer le texte if self.contain('<h3>Notes:</h3>'): halfText = self.length() / 2 d = self.index('<h3>Notes:</h3>') if d > halfText: self.text = self.text[:d] self.usePlaceholders()
def ficAooo(self, subject=None): if 'This work could have adult content. If you proceed you have agreed that you are willing to see such content' in self.text: print('fichier protégé', self.title) return self.cleanWeb() self.replace('<br>', '</p><p>') self.text = findTextBetweenTag(self.text, 'body') # le titre d = self.index('<h2>') + 4 f = self.index('</h2>', d) self.title = self.text[d:f] self.title = self.title.strip() self.title = self.title.strip('.') self.ficAoooCommon()