def fds(self):
    self.subject = 'feminisme'
    # self.title = 'fds '+ findTextBetweenTag (self.text, 'title').lower()
    self.title = 'fds ' + self.title
    self.text = findTextBetweenTag(self.text, 'article')
    self.author = findTextBetweenTag(self.text, 'li')
    d = self.index('<article>') + 9
    d = self.index('<', d)
    self.text = self.text[d:]
    self.replace('<div>')
    self.replace('</div>')
def unisciel(self, subject):
    self.subject = 'cours'
    self.author = 'unisciel'
    self.text = findTextBetweenTag(self.text, 'body')
    self.cleanWeb()
    self.delScript()
    d = self.index('<td>') + 4
    f = self.index('</td>', d)
    self.title = self.text[d:f].lower()
    d = self.index('<p>')
    f = self.rindex('</p>') + 4
    self.text = self.text[d:f]
    self.replace('<div>')
    self.replace('</div>')
    self.replace('<img', '</p><img')
    self.replace("png'>", "png'><p>")
    self.replace('<p><p>', '<p>')
    self.replace('</p></p>', '</p>')
    self.replace('<p>', '</p><p>')
    self.replace('</p>', '</p><p>')
    self.replace('<p></p>')
    self.replace('<p><p>', '<p>')
    self.replace('<p><img', '<img')
    self.replace("png'></p>", "png'>")
    self.text = self.text[4:-3]
    self.replace('../res/', 'bv-' + subject + '/')
    self.replace("</p><img src='bv-" + subject + "/apprendre_ch2_01.png'><p>",
                 ' <b>ADP +P --> ATP</b> ')
    self.replace(
        "</p><img src='bv-" + subject + "/apprendre_ch2_01_1.png'><p>",
        ' <b>ATP --> ADP +P</b> ')
    self.styles.append('unisciel.css')
def aooo(self, subject=None):
    if 'This work could have adult content. If you proceed you have agreed that you are willing to see such content' in self.text:
        print('fichier protégé', self.title)
        return
    self.cleanWeb()
    self.replace('<br>', '</p><p>')
    self.text = findTextBetweenTag(self.text, 'body')
    # le titre
    d = self.index('<h2>') + 4
    f = self.index('</h2>', d)
    self.title = self.text[d:f]
    self.title = self.title.strip()
    self.title = self.title.strip('.')
    # l'auteur et sa page
    d = self.index("<a href='/users/", f) + 9
    f = self.index("'", d)
    self.autlink = self.text[d:f]
    d = self.index('>', d) + 1
    f = self.index('</a>', d)
    self.author = self.text[d:f]
    # le sujet
    self.findSubject(subject)
    if self.subject == 'histoire':
        d = self.index('Additional Tags:<ul>') + 24
        d = self.index('>', d) + 1
        f = self.index('<', d)
        self.subject = self.text[d:f]
    # le texte ne compte qu'un seul chapître
    d = self.index('<h3>Work Text:</h3>') + 19
    # le texte compte plusieurs chapîtres
    if d == 18: d = self.index("<h3><a href='/works/")
    f = self.rindex('<h3>Actions</h3>')
    self.text = self.text[d:f]
    self.replace('<div>')
    self.replace('</div>')
    self.replace('<h3>Chapter Text</h3>')
    if self.contain("<h3><a href='/works/"):
        chapters = List()
        chapters.fromText("<h3><a href='/works/", self.text)
        chapterRange = chapters.range(1)
        for c in chapterRange:
            d = chapters[c].find('>') + 1
            chapters[c] = chapters[c][d:]
        self.text = '<h2>'.join(chapters)
        self.replace('</a></h3>', '</h2>')
    if self.contain('<h2>Chapter ') and not self.contain('</h2>'):
        chapters = List()
        chapters.fromText('<h2>Chapter ', self.text)
        chapterRange = chapters.range(1)
        for c in chapterRange:
            d = chapters[c].find('</a>: ') + 6
            chapters[c] = chapters[c][d:]
            chapters[c] = chapters[c].replace('</h3>', '</h2>', 1)
        self.text = '<h2>'.join(chapters)
    # nettoyer le texte
    if self.contain('<h3>Notes:</h3>'):
        halfText = self.length() / 2
        d = self.index('<h3>Notes:</h3>')
        if d > halfText: self.text = self.text[:d]
    self.usePlaceholders()
Пример #4
0
 def ficAooo(self, subject=None):
     if 'This work could have adult content. If you proceed you have agreed that you are willing to see such content' in self.text:
         print('fichier protégé', self.title)
         return
     self.cleanWeb()
     self.replace('<br>', '</p><p>')
     self.text = findTextBetweenTag(self.text, 'body')
     # le titre
     d = self.index('<h2>') + 4
     f = self.index('</h2>', d)
     self.title = self.text[d:f]
     self.title = self.title.strip()
     self.title = self.title.strip('.')
     self.ficAoooCommon()