Пример #1
0
    def _descr_html2ubb(string: str) -> str:
        """
        Build-in function to make a string from html to bbcode

        :param string: str
        :return: str
        """
        return str(HTML2BBCode().feed(string))
Пример #2
0
 def format(cls, fanfic: Story) -> None:
     new_line_regex = re.compile("\n")
     for chapter in fanfic.chapters:
         story_text = chapter.processed_body
         # remove new lines due to pretty print
         story_text = new_line_regex.sub('', story_text)
         parser = HTML2BBCode()
         chapter.processed_body = parser.feed(story_text)
 def forwards(self, orm):
     "Write your forwards methods here."
     # Note: Don't use "from appname.models import ModelName".
     # Use orm.ModelName to refer to models in this application,
     # and orm['appname.ModelName'] for models in other applications.
     for comment in orm['offers.Comment'].objects.all():
         parser = HTML2BBCode()
         comment.bbcode_content = unicode(parser.feed(comment.content))
         comment.save()
Пример #4
0
def parse_content(forum_url, sleep_time, autoposting):
    bbcode_parser = HTML2BBCode()
    base_html = get_html(forum_url, sleep_time)
    base_url = re.search('.+\.[a-zA-Z]+/', forum_url)[0]
    base_folder = re.search('://(.+)/', base_url)[0][3:-1]

    if autoposting == False:
        try:
            os.mkdir(base_folder)
            print(f'Folder with name {base_folder} successfully created')
        except FileExistsError:
            print(f'Folder with name "{base_folder}" already exists')

    bs = BeautifulSoup(base_html, 'html5lib')
    forum_structItemContainer = bs.find(class_='structItemContainer')
    threads = forum_structItemContainer.find_all(
        'a', {'data-xf-init': 'preview-tooltip'})
    threads_links = []

    for thread in threads:
        threads_links.append(base_url + thread.get('href'))
    for thread_link in threads_links:
        thread_tags = []
        thread_html = get_html(thread_link)
        thread = BeautifulSoup(thread_html, 'html5lib')
        thread_title = thread.find('h1', {'class': 'p-title-value'}).text
        print('Parsing:' + thread_title)
        thread_tagList = thread.find('span', {'class': 'js-tagList'})
        if thread_tagList is not None:
            for tag_text in thread_tagList.find_all('a'):
                thread_tags.append(tag_text.text)
        thread_creator = thread.find('h4', {'class': 'message-name'}).text

        thread_content = thread.find('div', {'class': 'bbWrapper'})
        thread_content = str(bbcode_parser.feed(str(thread_content)[23:-6]))
        if API_SETTINGS is not None and autoposting == True:
            api_create_thread(thread_title, thread_content, thread_tags)
        else:
            try:
                thread_file = thread_link.replace(base_url, "").replace(
                    "/", "").replace("threads", "")
                file = codecs.open(base_folder + '/' + thread_file + '.txt',
                                   'w+', 'utf-8')
                content = f'Thread title:\n{thread_title}\n\n' \
                        f'Thread tags:\n{thread_tags}\n\n' \
                        f'Thread creator:\n{thread_creator}\n\n' \
                        f'Thread text:\n{thread_title}'
                file.write(content)
                file.close()
                print(f'Threade {thread_file} successfully saved\n')
            except Exception as exc:
                print(f'An error occurred while saving the theme: {exc}')
Пример #5
0
def html2ubb(html: str) -> str:
    return str(HTML2BBCode().feed(html))
Пример #6
0
def html2ubb(html: str) -> str:
    ret = str(HTML2BBCode().feed(html))
    ret = re.sub("\n\n", "\n", ret)
    return ret
    def export(self):
        path_documentation = Path(
            BASE_DIR) / '..' / 'docs' / 'releases' / '{}.txt'.format(
                self.version)

        with path_documentation.open(mode='r') as file_object:
            content = []

            content.append('New version of Mayan EDMS available\n')
            content.append('===================================\n\n\n\n\n\n')

            content.append(
                'Please read the release notes before upgrading: '
                'https://docs.mayan-edms.com/releases/{}.html\n\n'.format(
                    self.version))

            content.append('Package locations\n')
            content.append('=================\n\n\n\n\n\n\n\n')

            content.append('Docker image available at: '
                           'https://hub.docker.com/r/mayanedms/mayanedms\n\n')
            content.append('Python packages available at: '
                           'https://pypi.org/project/mayan-edms/{}/ and '
                           'installed via:\n\n'.format(self.version))

            content.append('``pip install mayan-edms=={}``\n\n'.format(
                self.version))
            file_object.readline()
            file_object.readline()
            file_object.readline()
            file_object.readline()
            for line in file_object:
                if ':gitlab-issue:' in line:
                    line_parts = line.split('`')

                    result = (
                        '- `GitLab issue #{} '
                        '<https://gitlab.com/mayan-edms/mayan-edms/issues/{}>`_ {}'
                        .format(line_parts[1], line_parts[1], line_parts[2]))

                    content.append(result)
                else:
                    content.append(line)

        parts = core.publish_parts(source=''.join(content), writer_name='html')
        html_fragment = '{}{}'.format(parts['body_pre_docinfo'],
                                      parts['fragment'])

        result = ReleaseNoteExporter.filter_elements(
            tree=html.fromstring(html_fragment))

        result = result[1:]
        html_output = str(b''.join(result))

        html_replace_list = (
            ('<tt', '<code'),
            ('</tt>', '</code>'),
        )

        for html_replace_item in html_replace_list:
            html_output = html_output.replace(*html_replace_item)

        parser = HTML2BBCode()

        result = str(parser.feed(html_output))

        bbcode_replace_list = (
            ('[h1]', '\n[size=150]'),
            ('[/h1]', '[/size]\n'),
            ('[h2]', '\n[size=150]'),
            ('[/h2]', '[/size]\n'),
            ('[h3]', '\n[b]'),
            ('[/h3]', '[/b]\n'),
            ('[li]', '\n[*]'),
            ('[/li]', ''),
            ('[code]', '[b][i]'),
            ('[/code]', '[/i][/b]'),
        )

        for bbcode_replace_item in bbcode_replace_list:
            result = result.replace(*bbcode_replace_item)

        return result
Пример #8
0
def to_bbcode(descr):

    parser = HTML2BBCode()
    bbcode = parser.feed(descr)
    return bbcode
Пример #9
0
import json
import logging as log
import os
import pickle
import shutil
import subprocess
import sys

from fuzzywuzzy import fuzz
from html2bbcode.parser import HTML2BBCode
from settings import (ANNOUNCE, FUZZ_RATIO, PASSWORD, USERNAME, WM2_MEDIA,
                      WM2_ROOT, WORKING_ROOT)
from whatapi import WhatAPI, ext_matcher, locate
from wmapi import artistInfo, releaseInfo, torrentGroup

html_to_bbcode = HTML2BBCode()

VERSION = "0.7b"
gazelle_url = 'https://passtheheadphones.me/'
resumeList = set([])
potential_uploads = 0
headers = {
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3)'\
        'AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.79'\
        'Safari/535.11',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9'\
        ',*/*;q=0.8',
    'Accept-Encoding': 'gzip,deflate,sdch',
    'Accept-Language': 'en-US,en;q=0.8',
Пример #10
0
def leech_one_entry (entry_url):
    html = get_html (entry_url)
    soup = BeautifulSoup(html)
    class_entry_name = 'div.'+'entry-content'
    category = soup.select('section.entry-category')[0].find('a').string
    category =  u'{0}'.format(category)
    print category
    print 'type(category)',type(category)
    quotes_wrappers = soup.select(class_entry_name)[0]
    noidung =  u'{0}'.format(quotes_wrappers)
    print 'type of entry-title' ,type(quotes_wrappers)
    titlea = quotes_wrappers.find('h1', attrs={'class': 'entry-title'}).string
    print 'type of titlea',type(titlea)
    titlea =  u'{0}'.format(titlea)
    #titlea = titlea.replace(u'–', u'-')
    print 'title' , titlea

    print '#########'

    parser = HTML2BBCode()
    bbcode = parser.feed(noidung)
    bbcode = bbcode.encode('utf-8')
    p = re.compile( '\s*Posted on.*?(?P<caiquaigi>\[img\])',re.DOTALL)
    bbcode = p.sub( '\g<caiquaigi>', bbcode)
    prefix_links = ['http://rapidgator.net/file/','http://uploaded.net/file/','http://www.uploadable.ch/file/']
    stuff = map(lambda w: bbcode.find(w) , prefix_links)
    print stuff
    min_index_bbcode_host =  min(i for i in stuff if i > 0)
    print 'min_index_bbcode_host',min_index_bbcode_host

    code_part = bbcode[min_index_bbcode_host:]
    n_last_index = bbcode.rfind('\n',min_index_bbcode_host-10,min_index_bbcode_host)
    description =  bbcode[:n_last_index]
    description = description.replace('[b] [/b]',u'').replace(u'[/img][/url]',u'[/img]')
    print 'description',description
    
    print '## Find links'
    prefix_links = {'rg':'http://rapidgator.net/file/','ul':'http://uploaded.net/file/','up':'http://www.uploadable.ch/file/'}
    links_dict ={}
    for key,prefix_link in prefix_links.iteritems():
        links = re.findall('('+prefix_link+'.*?)[\[\]\n\r]', code_part, re.DOTALL)
        links =  unique_list(links)
        linktxt = ''
        for link in links:
            linktxt = linktxt + urllib.unquote(link).decode('utf-8') +'\n'
        print linktxt
        links_dict[key] = linktxt
    print links_dict
    
    new_instance = Ulnew.objects.get_or_create (
                                         title= titlea
              

                                        )[0]
    new_instance.category = category                                    
    new_instance.description = description
    new_instance.rg= links_dict['rg']
    new_instance.ul = links_dict['ul']
    new_instance.up = links_dict['up']
    new_instance.save()
    print 'ok'   
Пример #11
0
def leech_one_entry_freedl2u(ahref_title):
    print 'ahref_title', ahref_title
    entry_url = ahref_title[0]
    title = ahref_title[1]
    if Ulnew.objects.filter(title=title).exists():
        print 'entry nay da ton tai %s' % title
        return ''
    html = get_html(entry_url)
    #print 'full entry html',html
    soup = BeautifulSoup(html)
    title = u'{0}'.format(soup.find(id="news-title").string)
    print 'title', title
    category = soup.find_all('span', attrs={'itemprop': 'title'})[-1].string
    print 'category', category
    print 'type of category', type(category)
    '''
    dlecontent = soup.find(id="dle-content")
    print dlecontent
    '''
    class_entry_name = 'div.' + 'base.fullstory'
    fullstory = soup.select(class_entry_name)[0]
    #print fullstory
    subfull = fullstory.select('div.maincont')[0]
    print 'subfull', subfull
    #category = soup.select('section.entry-category')[0].find('a').string
    #category =  u'{0}'.format(category)
    #print category
    noidung = u'{0}'.format(subfull).replace('<br/>', '\n').replace(
        '<br>', '\n').replace('Rapidgator.net:', '')
    parser = HTML2BBCode()
    bbcode = parser.feed(noidung).replace(
        'Buy Premium To Support Me  Get Resumable Support  Max Speed', '')

    #print 'type of enoidung' ,type(noidung)
    print 'noidung', noidung, bbcode
    '''
    titlea = subfull.find('h1', attrs={'class': 'entry-title'}).string
    print 'type of titlea',type(titlea)
    titlea =  u'{0}'.format(titlea)
    print 'type of titlea',type(titlea)
    #titlea =  titlea.string
    #titlea = titlea.replace(u'–', u'-')
    print 'title' , titlea
    '''
    print '#########'
    '''
    #bbcode = bbcode.encode('utf-8')
    p = re.compile( '\s*Posted on.*?(?P<caiquaigi>\[img\])',re.DOTALL)
    bbcode = p.sub( '\g<caiquaigi>', bbcode)
    p = re.compile( '\[url=.*?\]',re.DOTALL)
    bbcode = p.sub( '', bbcode)
    '''

    prefix_links = [
        'http://rapidgator.net/file/', 'http://uploaded.net/file/',
        'http://www.uploadable.ch/file/', 'http://www.nitroflare.com/view/',
        'http://nitroflare.com/view'
    ]
    stuff = map(lambda w: bbcode.find(w), prefix_links)
    print 'stuff', stuff
    try:
        min_index_bbcode_host = min(i for i in stuff if i > 0)
    except:
        return ''
    print 'min_index_bbcode_host', min_index_bbcode_host

    code_part = bbcode[min_index_bbcode_host:]
    n_last_index = bbcode.rfind('\n', min_index_bbcode_host - 10,
                                min_index_bbcode_host)
    description = bbcode[:n_last_index]
    description = description.replace('[b] [/b]', u'').replace(
        u'[/img][/url]', u'[/img]'
    ).replace(
        '[img]http://sharenxs.com/photos/2014/10/24/54498d402d290/tn-f0912e470f81f8acf2c127d9a94b5983.jpg[/img]',
        '').replace('nitroflare', '')
    print 'description', description

    print '## Find links'
    prefix_links = {
        'rg': 'http://rapidgator.net/file/',
        'ul': 'http://uploaded.net/file/',
        'up': 'http://www.uploadable.ch/file/'
    }
    links_dict = {}
    for key, prefix_link in prefix_links.iteritems():
        links = re.findall('(' + prefix_link + '.*?)[\[\]\n\r]', code_part,
                           re.DOTALL)
        links = unique_list(links)
        linktxt = ''
        for link in links:
            linktxt = linktxt + urllib.unquote(link).decode('utf-8') + '\n'
        print linktxt
        links_dict[key] = linktxt
    print links_dict

    new_instance = Ulnew.objects.get_or_create(title=title)[0]
    new_instance.category = category
    new_instance.description = description
    new_instance.rg = links_dict['rg']
    new_instance.ul = links_dict['ul']
    new_instance.up = links_dict['up']
    new_instance.save()
    print 'ok'
Пример #12
0
    return Plugin(f'HandyNotes_{name[3:]}', version, tag, dir)


def get_changelog(plugin, bbcode=False, full=False):
    with open(path.join(plugin.dir, 'CHANGELOG.md')) as f:
        if full:
            changelog = f.read()
        elif match := re.match(r'^# v\d+\s+(.+?)# v\d+', f.read(),
                               re.M | re.S):
            changelog = match.group(1).strip() + '\n'
        else:
            changelog = 'No changelog entries for this release.\n'

    if bbcode:
        changelog = markdown(changelog)  # to html
        changelog = HTML2BBCode('bbcode.ini').feed(changelog)  # to bbcode

    return changelog


def get_wow_version(plugin):
    with open(path.join(plugin.dir, f'{plugin.name}.toc')) as f:
        inum = re.search(r'## Interface: (\d+)', f.read(), re.M).group(1)
        inum = f'0{inum}' if len(inum) == 5 else inum
    return '.'.join(
        map(str, map(int, [inum[i:i + 2] for i in range(0, len(inum), 2)])))


# -----------------------------------------------------------------------------

Пример #13
0
 def convert(self, text):
     soup = BeautifulSoup(text, 'lxml')
     self.__formalize_html(soup)
     fontsize = self._get_default_fontsize(text)
     return self.__postprocess_text(
         str(HTML2BBCode().feed(str(soup), default_fontsize=fontsize)))