Пример #1
0
    def test_page_parser(self):
        """Makes sure we can parse pages correctly."""
        args = util.parse_page('key: value\nkeys: one, two\n#ignore: me\n---\nhello, world.')
        self.assertEquals(3, len(args))
        self.assertEquals(args.get('key'), 'value')
        self.assertEquals(args.get('keys'), ['one', 'two'])
        self.assertEquals(args.get('text'), 'hello, world.')

        # windows line endings
        args = util.parse_page('key: value\nkeys: one, two\n#ignore: me\r\n---\r\nhello, world.')
        self.assertEquals(3, len(args))

        # old mac line endings
        args = util.parse_page('key: value\nkeys: one, two\n#ignore: me\r---\rhello, world.')
        self.assertEquals(3, len(args))
Пример #2
0
def can_read_page(title, user, is_admin):
    """Returns True if the user is allowed to read the specified page.

    Admins and global readers and editors are allowed to read all pages.  Other
    users are allowed to read all pages if the wiki is open or if the user is
    listed in the readers/editors page property.

    Otherwise no access."""
    if is_admin:
        return True

    is_user_reader = user and (user.email() in settings.get('readers', [])
                               or user.email() in settings.get('editors', []))
    if is_user_reader:
        return True

    page = model.WikiContent.get_by_title(title)
    options = util.parse_page(page.body or '')

    is_open_wiki = settings.get('open-reading', 'yes') == 'yes'
    if is_open_wiki:
        if options.get('private') != 'yes':
            return True
        return user and (user.email() in options.get('readers', [])
                         or user.email() in options.get('editors', []))
    elif settings.get('open-reading') == 'login':
        return options.get('public') == 'yes' or user
    else:
        return options.get('public') == 'yes'
Пример #3
0
def can_read_page(title, user, is_admin):
    """Returns True if the user is allowed to read the specified page.

    Admins and global readers and editors are allowed to read all pages.  Other
    users are allowed to read all pages if the wiki is open or if the user is
    listed in the readers/editors page property.

    Otherwise no access."""
    if is_admin:
        return True

    is_user_reader = user and (user.email() in settings.get('readers', []) or user.email() in settings.get('editors', []))
    if is_user_reader:
        return True

    page = model.WikiContent.get_by_title(title)
    options = util.parse_page(page.body or '')

    is_open_wiki = settings.get('open-reading', 'yes') == 'yes'
    if is_open_wiki:
        if options.get('private') != 'yes':
            return True
        return user and (user.email() in options.get('readers', []) or user.email() in options.get('editors', []))
    elif settings.get('open-reading') == 'login':
        return options.get('public') == 'yes' or user
    else:
        return options.get('public') == 'yes'
Пример #4
0
    def put(self):
        """Adds the gaewiki:parent: labels transparently."""
        if self.body is not None:
            options = util.parse_page(self.body)
            self.redirect = options.get('redirect')
            self.pread = options.get(
                'public') == 'yes' and options.get('private') != 'yes'
            self.labels = options.get('labels', [])
            if 'date' in options:
                try:
                    self.created = datetime.datetime.strptime(
                        options['date'], '%Y-%m-%d %H:%M:%S')
                except ValueError:
                    pass
            if 'name' in options and options['name'] != self.title:
                if self.get_by_title(options['name'],
                                     create_if_none=False) is not None:
                    raise ValueError('A page named "%s" already exists.' %
                                     options['name'])
                self.title = options['name']
            self.__update_geopt()

        self.links = util.extract_links(self.body)
        self.add_implicit_labels()
        db.Model.put(self)
        settings.check_and_flush(self)
Пример #5
0
def fetch_offers_details():
    links = util.read_json_from_file(output_filename)
    log.info('\nstarting offer fetch, %s to go', len(links))

    succesful = 0
    for item_chunk in util.chunks(
        [link for link in links if link['Fetched'] is False], 5):
        for i, item in enumerate(item_chunk):
            link = item['Link']
            page = util.parse_page(link)
            try:
                data = extract_offer_details(page)
                item.update(data)
                succesful += 1
            except:
                log.error('extracting details from page faild url=%s',
                          link,
                          exc_info=True)
        log.info('pages parsed succesfully %s, unsuccesfull %s', succesful,
                 len(links) - succesful)

        log.info('pages parsed succesfully %s, unsuccesfull %s', succesful,
                 len(links) - succesful)
        util.write_json_to_file(output_filename, links)

    log.info('pages parsed succesfully %s, unsuccesfull %s', succesful,
             len(links) - succesful)
    util.write_json_to_file(output_filename, links)
Пример #6
0
def get_chapters():
  with util.connect() as conn:
    cur = conn.cursor()
    for url in util.links:
      _, data = cur.execute('SELECT * FROM dump WHERE url = ?', (url,)).fetchone()
      title, content = util.parse_page(data)
      yield url, title, content
Пример #7
0
    def test_page_parser(self):
        """Makes sure we can parse pages correctly."""
        args = util.parse_page(
            'key: value\nkeys: one, two\n#ignore: me\n---\nhello, world.')
        self.assertEquals(3, len(args))
        self.assertEquals(args.get('key'), 'value')
        self.assertEquals(args.get('keys'), ['one', 'two'])
        self.assertEquals(args.get('text'), 'hello, world.')

        # windows line endings
        args = util.parse_page(
            'key: value\nkeys: one, two\n#ignore: me\r\n---\r\nhello, world.')
        self.assertEquals(3, len(args))

        # old mac line endings
        args = util.parse_page(
            'key: value\nkeys: one, two\n#ignore: me\r---\rhello, world.')
        self.assertEquals(3, len(args))
Пример #8
0
def get_all():
    settings = memcache.get('gaewiki:settings')
    if settings is None:
        settings = util.parse_page(get_host_page().body)
        try:
            pytz.timezone(settings['timezone'])
        except pytz.UnknownTimeZoneError as e:
            logging.warning('Unknown timezone: %s, reset to UTC' %
                            settings['timezone'])
            settings['timezone'] = 'UTC'
        memcache.set('gaewiki:settings', settings)
    return settings
Пример #9
0
    def put(self):
        """Adds the gaewiki:parent: labels transparently."""
        if self.body is not None:
            options = util.parse_page(self.body)
            self.redirect = options.get('redirect')
            self.pread = options.get('public') == 'yes' and options.get('private') != 'yes'
            self.labels = options.get('labels', [])
            if 'date' in options:
                try:
                    self.created = datetime.datetime.strptime(options['date'], '%Y-%m-%d %H:%M:%S')
                except ValueError:
                    pass
            if 'name' in options:
                self.title = options['name']
            self.__update_geopt()

        self.add_implicit_labels()
        db.Model.put(self)
        settings.check_and_flush(self)
Пример #10
0
    def put(self):
        """Adds the gaewiki:parent: labels transparently."""
        if self.body is not None:
            options = util.parse_page(self.body)
            self.redirect = options.get('redirect')
            self.pread = options.get('public') == 'yes' and options.get('private') != 'yes'
            self.labels = options.get('labels', [])
            if 'date' in options:
                try:
                    self.created = datetime.datetime.strptime(options['date'], '%Y-%m-%d %H:%M:%S')
                except ValueError:
                    pass
            if 'name' in options and options['name'] != self.title:
                if self.get_by_title(options['name'], create_if_none=False) is not None:
                    raise ValueError('A page named "%s" already exists.' % options['name'])
                self.title = options['name']
            self.__update_geopt()

        self.links = util.extract_links(self.body)
        self.add_implicit_labels()
        db.Model.put(self)
        settings.check_and_flush(self)
Пример #11
0
def extract_offers_links(url=None, page=None, links=None):
    assert not (url is None and page is None), 'page or url required'

    if not page:
        page = util.parse_page(url)
    if not links:
        links = []

    links_from_page = [
        {
            'Id': selection["data-ad-id"],
            'Fetched': False,
            'Link': selection["href"]
        } for selection in page.find_all("a", {"class": "offer-title__link"})
        if not util.find(links, lambda x: x['Link'] == selection["href"])
    ]

    log.info('links extracted %s', len(links_from_page))
    if next_page_url := page.find('li', {"class": "next abs"}):
        next_page_url = next_page_url.find('a')['href']

        log.info('next_page_url %s', next_page_url)
        return extract_offers_links(url=next_page_url,
                                    links=links_from_page + links)
Пример #12
0
def get_all():
    help_page = memcache.get('gaewiki:syntax')
    if help_page is None:
        help_page = util.parse_page(get_page().body)
        memcache.set('gaewiki:syntax', help_page)
    return help_page['text']
Пример #13
0
def get_all():
    settings = memcache.get('gaewiki:settings')
    if settings is None:
        settings = util.parse_page(get_host_page().body)
        memcache.set('gaewiki:settings', settings)
    return settings
Пример #14
0
def get_all():
    settings = memcache.get('gaewiki:settings')
    if settings is None:
        settings = util.parse_page(get_host_page().body)
        memcache.set('gaewiki:settings', settings)
    return settings
Пример #15
0
def pretty():
  url = request.args['url']
  data, = get_db().execute('SELECT data FROM dump WHERE url = ?', (url,)).fetchone()
  title, content = util.parse_page(data)
  paragraphs = (p for p in content.split('\n') if p.strip())
  return pretty_tmpl.render(title=title, paragraphs=paragraphs)