def create(path, **kwargs): with open(path, 'w') as fp: fp.write('---\n') for k, v in iteritems(kwargs): fp.write('%s: %s\n' % (k, v)) fp.write('---\n')
def transform(self, text, entry, *args): acros = self.acronyms if len(args) > 0: acros = dict(filter(lambda k: any(k[0] == v for v in args), iteritems(acros))) try: abbr = re.compile(r'\b(%s)\b' % '|'.join((pat.pattern for pat in acros))) except re.error as e: log.warn("acronyms: %s", e.args[0]) def repl(match): abbr = match.group(0) desc = acros.get(abbr, None) if desc is None: for pat in acros: if pat.match(abbr): desc = acros.get(pat) break return '<abbr title="%s">%s</abbr>' % (desc, abbr) try: return ''.join(Acrynomify(text, abbr, repl).result) except: log.exception('could not acronymize ' + entry.filename) return text
def recurse(category, tree): yield category, sorted(list(fetch(tree)), key=lambda k: k.date, reverse=True) for subtree in iteritems(tree[0]): for item in recurse(category + "/" + safeslug(subtree[0]), subtree[1]): yield item
def generate(item): entry = {} for k, v in iteritems({ 'title': 'title', 'date': 'pubDate', 'link': 'link', 'content': 'description' }): try: entry[k] = item.find(v).text if k != 'content' \ else unescape(item.find(v).text) except (AttributeError, TypeError): pass if any( filter(lambda k: k not in entry, ['title', 'date', 'link', 'content'])): raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \ + 'link, content and pubDate!') return { 'title': entry['title'], 'content': entry['content'], 'date': parse_date_time(entry['date']), 'link': entry['link'], 'tags': [cat.text for cat in item.findall('category')] }
def transform(self, text, entry, *args): acros = self.acronyms if len(args) > 0: acros = dict( filter(lambda k: any(k[0] == v for v in args), iteritems(acros))) try: abbr = re.compile(r'\b(%s)\b' % '|'.join( (pat.pattern for pat in acros))) except re.error as e: log.warn("acronyms: %s", e.args[0]) def repl(match): abbr = match.group(0) desc = acros.get(abbr, None) if desc is None: for pat in acros: if pat.match(abbr): desc = acros.get(pat) break return '<abbr title="%s">%s</abbr>' % (desc, abbr) try: return ''.join(Acrynomify(text, abbr, repl).result) except HTMLParseError: log.exception('could not acronymize ' + entry.filename) return text
def compile(conf, env): """Copy/Compile assets to output directory. All assets from the theme directory (except for templates) and static directories can be compiled or just copied using several built-in writers.""" global __writers, __defaultwriter __writers = {} __defaultwriter = Writer(conf, env) files = defaultdict(set) for cls in [globals()[writer](conf, env) for writer in conf.static_filter]: if isinstance(cls.ext, (list, tuple)): for ext in cls.ext: __writers[ext] = cls else: __writers[cls.ext] = cls excludes = list(env.engine.loader.modified.keys()) + env.webassets.excludes(conf['theme']) for path, directory in relfilelist(conf['theme'], conf['theme_ignore'], excludes): files[(splitext(path)[1], directory)].add(path) excludes = env.webassets.excludes(conf['static'] or '') for path, directory in relfilelist(conf['static'], conf['static_ignore'], excludes): files[(splitext(path)[1], directory)].add(path) list(map(partial(worker, conf, env), iteritems(files)))
def generate(self, conf, env, data): iterator = chain(*map(lambda args: recurse(*args), iteritems(self.tree[0]))) for category, entrylist in iterator: data["entrylist"] = entrylist for res in Paginator.generate(self, conf, env, data, category=category, name=category): yield res
def recurse(category, tree): yield category, sorted(list(fetch(tree)), key=lambda k: k.date, reverse=True) for subtree in iteritems(tree[0]): for item in recurse(category + '/' + safeslug(subtree[0]), subtree[1]): yield item
def do_tags(conf, env, options): limit = options.max if options.max > 0 else 100 entrylist = readers.load(conf)[0] if options.coverage: for tag, entries in sorted(iteritems(fetch(entrylist))): if len(entries) <= options.coverage: print(blue(tag).encode('utf-8'), end=' ') print(', '.join(e.filename.encode('utf-8') for e in entries)) return tags = ['%i %s' % (len(value), key) for key, value in sorted(iteritems(fetch(entrylist)), key=lambda k: len(k[1]), reverse=True)] colprint( list(izip(*list(batch(tags[:limit], ceil(len(tags)/4.0))), fillvalue='')), os.popen('stty size', 'r').read().split()[1] )
def do_tags(conf, env, options): limit = options.max if options.max > 0 else 100 entrylist = readers.load(conf)[0] if options.coverage: for tag, entries in sorted(iteritems(fetch(entrylist))): if len(entries) <= options.coverage: print(blue(tag).encode('utf-8'), end=' ') print(', '.join(e.filename.encode('utf-8') for e in entries)) return tags = [ '%i %s' % (len(value), key) for key, value in sorted( iteritems(fetch(entrylist)), key=lambda k: len(k[1]), reverse=True) ] colprint( list( izip(*list(batch(tags[:limit], ceil(len(tags) / 4.0))), fillvalue='')), os.popen('stty size', 'r').read().split()[1])
def generate(self, conf, env, data): iterator = chain( *map(lambda args: recurse(*args), iteritems(self.tree[0]))) for category, entrylist in iterator: data['entrylist'] = entrylist for res in Paginator.generate(self, conf, env, data, category=category, name=category): yield res
def __init__(self, tags, steps=4, max_items=100, start=0, shuffle=False): lst = sorted([(k, len(v)) for k, v in iteritems(tags)], key=lambda x: x[0])[:max_items] # stolen from pelican/generators.py:286 max_count = max(lst, key=lambda k: k[1])[1] if lst else None self.lst = [(tag, int(math.floor(steps - (steps - 1) * math.log(count) / (math.log(max_count) or 1)))+start-1) for tag, count in lst] if shuffle: random.shuffle(self.lst) self.tags = tags
def __init__(self, conf, meta): self.props = Metadata((k, v) for k, v in iteritems(conf) if k in ['author', 'lang', 'email', 'date_format', 'entry_permalink', 'page_permalink']) self.props.update(meta) self.type = meta.get('type', 'entry') # redirect singular -> plural for key, to in [('tag', 'tags'), ('filter', 'filters'), ('template', 'layout')]: if key in self.props: self.props.redirect(key, to) self.filters = self.props.get('filters', []) self.hashvalue = hash(self.filename, self.title, self.date.ctime())
def __init__(self, tags, steps=4, max_items=100, start=0, shuffle=False): lst = sorted([(k, len(v)) for k, v in iteritems(tags)], key=lambda x: x[0])[:max_items] # stolen from pelican/generators.py:286 max_count = max(lst, key=lambda k: k[1])[1] if lst else None self.lst = [(tag, count, int( math.floor(steps - (steps - 1) * math.log(count) / (math.log(max_count) or 1))) + start - 1) for tag, count in lst] if shuffle: random.shuffle(self.lst) self.tags = tags
def load(path): """Load default configuration, prepare namespace and update configuration with `conf.py`'s uppercase values and normalizes ambiguous values. """ conf = Configuration(defaults.conf) ns = dict([(k.upper(), v) for k, v in iteritems(defaults.conf)]) os.chdir(dirname(find(basename(path), u(dirname(path) or os.getcwd())))) if PY2K: execfile(path, ns) else: exec(compile(open(path).read(), path, 'exec'), ns) conf.update(dict([(k.lower(), ns[k]) for k in ns if k.upper() == k])) # append trailing slash to *_dir and place certain values into an array return defaults.normalize(conf)
def entry(**kw): L = [('title', 'Hänsel and Gretel!'), ('date', '12.02.2012 15:46')] res = ['---'] for k, v in L: if k not in kw: res.append('%s: %s' % (k, v)) for k, v in iteritems(kw): res.append('%s: %s' % (k, v)) res.append('---') res.append('') res.append('# Test') res.append('') res.append('This is supercalifragilisticexpialidocious.') return '\n'.join(res)
def initialize(conf, env): global pool hooks, blocks = conf.get('hooks', {}), not conf.get('hooks_mt', True) pool = Threadpool(1 if blocks else multiprocessing.cpu_count(), wait=blocks) force = env.options.force normalize = lambda path: path.replace(conf['output_dir'], '') for pattern, action in iteritems(hooks): if isinstance(action, (types.FunctionType, string_types)): event.register( callback=partial(simple, pool, pattern, normalize, action), to=['create', 'update'] if not force else event.events) else: event.register( callback=partial(advanced, pool, pattern, force, normalize, *action), to=event.events)
def generate(item): entry = {} for k, v in iteritems({'title': 'title', 'date': 'pubDate', 'link': 'link', 'content': 'description'}): try: entry[k] = item.find(v).text if k != 'content' \ else unescape(item.find(v).text) except (AttributeError, TypeError): pass if any(filter(lambda k: k not in entry, ['title', 'date', 'link', 'content'])): raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \ + 'link, content and pubDate!') return {'title': entry['title'], 'content': entry['content'], 'date': parse_date_time(entry['date']), 'link': entry['link'], 'tags': [cat.text for cat in item.findall('category')]}
def markdownstyle(fileobj): """Parse Markdown Metadata without converting the source code. Mostly copy&paste from the 'meta' extension but slighty modified to fit to Acrylamid: we try to parse a value into a python value (via :func:`distinguish`).""" # -- from markdown.extensions.meta meta_re = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9._-]+):\s*(?P<value>.*)') meta_more_re = re.compile(r'^[ ]{4,}(?P<value>.*)') i = 0 meta, key = {}, None while True: line = fileobj.readline() i += 1 if line.strip() == '': break # blank line - done m1 = meta_re.match(line) if m1: key = m1.group('key').lower().strip() value = distinguish(m1.group('value').strip()) meta.setdefault(key, []).append(value) else: m2 = meta_more_re.match(line) if m2 and key: # Add another line to existing key meta[key].append(m2.group('value').strip()) else: break # no meta data - done if not meta: raise AcrylamidException("no meta information in %r found" % fileobj.name) for key, values in iteritems(meta): if len(values) == 1: meta[key] = values[0] return i, meta
def img(header, body=None): """Alternate to Markdown's image tag. See http://octopress.org/docs/plugins/image-tag/ for usage.""" attrs = re.match(__img_re, header).groupdict() m = re.match(__img_re_title, attrs['title']) if m: attrs['title'] = m.groupdict()['title'] attrs['alt'] = m.groupdict()['alt'] elif 'title' in attrs: attrs['alt'] = attrs['title'].replace('"', '"') if 'class' in attrs: attrs['class'] = attrs['class'].replace('"', '') if attrs: return '<img ' + ' '.join('%s="%s"' % (k, v) for k, v in iteritems(attrs) if v) + ' />' return ("Error processing input, expected syntax: " "{% img [class name(s)] [http[s]:/]/path/to/image [width [height]] " "[title text | \"title text\" [\"alt text\"]] %}")
def initialize(directories, conf, env): global __views_list __views_list, urlmap = [], [] for rule, view in iteritems(conf.views): if 'views' not in view: view['views'] = [view.pop('view'), ] for name in view['views']: item = view.copy() item.pop('views') item['name'] = name urlmap.append((rule, item)) directories += [os.path.dirname(__file__)] helpers.discover(directories, partial(index_views, conf, env, urlmap), lambda path: path.rpartition('.')[0] != __file__.rpartition('.')[0]) for rule, item in urlmap: log.warn("unable to locate '%s' view", item['name'])
def initialize(conf, env): global pool hooks, blocks = conf.get('hooks', {}), not conf.get('hooks_mt', True) pool = Threadpool(1 if blocks else multiprocessing.cpu_count(), wait=blocks) force = env.options.force normalize = lambda path: path.replace(conf['output_dir'], '') for pattern, action in iteritems(hooks): if isinstance(action, (types.FunctionType, string_types)): event.register( callback=partial(simple, pool, pattern, normalize, action), to=['create', 'update'] if not force else event.events) else: event.register( callback=partial(advanced, pool, pattern, force, normalize, *action), to=event.events) discover([conf.get('HOOKS_DIR', 'hooks/')], lambda x: x)
def index(entrylist): """Build compressed suffix tree in something around O(n * log(n)), but with huge time constants. It is *really* slow but more space efficient, hopefully.""" tree, meta = {}, [] words = defaultdict(set) for num, entry in enumerate(entrylist): meta.append((entry.permalink, entry.title)) for word in re.split(r"[.:,\s!?=\(\)]+", entry.content): if len(word) < 3: continue for i in range(len(word) - 3): words[word[i:].lower()].add(num) for key, value in iteritems(words): insert(tree, key, list(value)) del words return tree, meta
def run(conf, env, options): """Subcommand: deploy -- run the shell command specified in DEPLOYMENT[task] using Popen. Each string value from :doc:`conf.py` is added to the execution environment. Every argument after ``acrylamid deploy task ARG1 ARG2`` is appended to cmd.""" if options.list: for task in iterkeys(conf.get('deployment', {})): print(task) sys.exit(0) task, args = options.task or 'default', options.args cmd = conf.get('deployment', {}).get(task, None) if not cmd: raise AcrylamidException('no tasks named %r in conf.py' % task) # apply ARG1 ARG2 ... and -v --long-args to the command, e.g.: # $> acrylamid deploy task arg1 -b --foo cmd += ' ' + ' '.join(args) enc = sys.getfilesystemencoding() env = os.environ env.update( dict([(k.upper(), v.encode(enc, 'replace') if PY2K else v) for k, v in iteritems(conf) if isinstance(v, string_types)])) log.info('execute %s', cmd) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while True: output = p.stdout.read(1) if output == b'' and p.poll() != None: break if output != b'': sys.stdout.write(output.decode(enc)) sys.stdout.flush()
def markdownstyle(fileobj): """Parse Markdown Metadata without converting the source code. Mostly copy&paste from the 'meta' extension but slighty modified to fit to Acrylamid: we try to parse a value into a python value (via :func:`distinguish`).""" # -- from markdown.extensions.meta meta_re = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9._-]+):\s*(?P<value>.*)') meta_more_re = re.compile(r'^[ ]{4,}(?P<value>.*)') i = 0 meta, key = {}, None while True: line = fileobj.readline(); i += 1 if line.strip() == '': break # blank line - done m1 = meta_re.match(line) if m1: key = m1.group('key').lower().strip() value = distinguish(m1.group('value').strip()) meta.setdefault(key, []).append(value) else: m2 = meta_more_re.match(line) if m2 and key: # Add another line to existing key meta[key].append(m2.group('value').strip()) else: break # no meta data - done if not meta: raise AcrylamidException("no meta information in %r found" % fileobj.name) for key, values in iteritems(meta): if len(values) == 1: meta[key] = values[0] return i, meta
def __new__(cls, name, bases, attrs): def intercept(func): """decorator which calls callback registered to this method.""" name = func.func_name if compat.PY2K else func.__name__ def dec(cls, ns, path, *args, **kwargs): for callback in cls.callbacks[name]: callback(ns, path) if name in cls.events: attrs['counter'][name] += 1 return func(cls, path, *args, **kwargs) dec.__doc__ = func.__doc__ # sphinx return dec for name, func in iteritems(attrs): if not name.startswith('_') and callable(func): if name in attrs['events']: func = intercept(func) attrs[name] = classmethod(func) return type.__new__(cls, name, bases, attrs)
def img(header, body=None): """Alternate to Markdown's image tag. See http://octopress.org/docs/plugins/image-tag/ for usage.""" attrs = re.match(__img_re, header).groupdict() m = re.match(__img_re_title, attrs['title']) if m: attrs['title'] = m.groupdict()['title'] attrs['alt'] = m.groupdict()['alt'] elif 'title' in attrs: attrs['alt'] = attrs['title'].replace('"', '"') if 'class' in attrs: attrs['class'] = attrs['class'].replace('"', '') if attrs: return '<img ' + ' '.join('%s="%s"' % (k, v) for k, v in iteritems(attrs) if v) + ' />' return ( "Error processing input, expected syntax: " "{% img [class name(s)] [http[s]:/]/path/to/image [width [height]] " "[title text | \"title text\" [\"alt text\"]] %}")
def run(conf, env, options): """Subcommand: deploy -- run the shell command specified in DEPLOYMENT[task] using Popen. Each string value from :doc:`conf.py` is added to the execution environment. Every argument after ``acrylamid deploy task ARG1 ARG2`` is appended to cmd.""" if options.list: for task in iterkeys(conf.get('deployment', {})): print(task) sys.exit(0) task, args = options.task or 'default', options.args cmd = conf.get('deployment', {}).get(task, None) if not cmd: raise AcrylamidException('no tasks named %r in conf.py' % task) # apply ARG1 ARG2 ... and -v --long-args to the command, e.g.: # $> acrylamid deploy task arg1 -b --foo cmd += ' ' + ' '.join(args) enc = sys.getfilesystemencoding() env = os.environ env.update(dict([(k.upper(), v.encode(enc, 'replace') if PY2K else v) for k, v in iteritems(conf) if isinstance(v, string_types)])) log.info('execute %s', cmd) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while True: output = p.stdout.read(1) if output == b'' and p.poll() != None: break if output != b'': sys.stdout.write(output.decode(enc)) sys.stdout.flush()
def compile(conf, env): """The compilation process.""" hooks.initialize(conf, env) hooks.run(conf, env, 'pre') if env.options.force: cache.clear(conf.get('cache_dir')) # time measurement ctime = time.time() # populate env and corrects some conf things data = initialize(conf, env) # load pages/entries and store them in env rv = dict( zip(['entrylist', 'pages', 'translations', 'drafts'], map(HashableList, readers.load(conf)))) entrylist, pages = rv['entrylist'], rv['pages'] translations, drafts = rv['translations'], rv['drafts'] # load references refs.load(entrylist, pages, translations, drafts) data.update(rv) env.globals.update(rv) # here we store all found filter and their aliases ns = defaultdict(set) # [<class head_offset.Headoffset at 0x1014882c0>, <class html.HTML at 0x101488328>,...] aflist = filters.get_filters() # ... and get all configured views _views = views.get_views() # filters found in all entries, views and conf.py (skip translations, has no items) found = sum((x.filters for x in chain(entrylist, pages, drafts, _views, [conf])), []) for val in found: # first we for `no` and get the function name and arguments f = val[2:] if val.startswith('no') else val fname, fargs = f.split('+')[:1][0], f.split('+')[1:] try: # initialize the filter with its function name and arguments fx = aflist[fname](conf, env, val, *fargs) if val.startswith('no'): fx = filters.disable(fx) except ValueError: try: fx = aflist[val.split('+')[:1][0]](conf, env, val, *fargs) except ValueError: raise AcrylamidException('no such filter: %s' % val) ns[fx].add(val) # include actual used filters to trigger modified state env.filters = HashableList(iterkeys(ns)) for entry in chain(entrylist, pages, drafts): for v in _views: # a list that sorts out conflicting and duplicated filters flst = filters.FilterList() # filters found in this specific entry plus views and conf.py found = entry.filters + v.filters + data['conf']['filters'] for fn in found: fx, _ = next((k for k in iteritems(ns) if fn in k[1])) if fx not in flst: flst.append(fx) # sort them ascending because we will pop within filters.add entry.filters.add(sorted(flst, key=lambda k: (-k.priority, k.name)), context=v) # lets offer a last break to populate tags and such for v in _views: env = v.context(conf, env, data) # now teh real thing! for v in _views: for entry in chain(entrylist, pages, translations, drafts): entry.context = v for var in 'entrylist', 'pages', 'translations', 'drafts': data[var] = HashableList(filter(v.condition, locals()[var])) \ if v.condition else locals()[var] tt = time.time() for buf, path in v.generate(conf, env, data): try: helpers.mkfile(buf, path, time.time() - tt, ns=v.name, force=env.options.force, dryrun=env.options.dryrun) except UnicodeError: log.exception(path) finally: buf.close() tt = time.time() # copy modified/missing assets to output assets.compile(conf, env) # wait for unfinished hooks hooks.shutdown() # run post hooks (blocks) hooks.run(conf, env, 'post') # save conf/environment hash and new/changed/unchanged references helpers.memoize('Configuration', hash(conf)) helpers.memoize('Environment', hash(env)) refs.save() # remove abandoned cache files cache.shutdown() # print a short summary log.info('%i new, %i updated, %i skipped [%.2fs]', event.count('create'), event.count('update'), event.count('identical') + event.count('skip'), time.time() - ctime)
def fetch(self, ns): return Configuration( (lchop(k, ns), v) for k, v in iteritems(self) if k.startswith(ns))
def populate_tags(self, request): tags = fetch(request['entrylist']) self.tags = dict([(safeslug(k), v) for k, v in iteritems(tags)]) return tags
def transform(self, text, entry, *args): for tag, func in iteritems(self.directives): text = re.sub(self.block(tag), lambda m: func(*m.groups()), text) return text
def fetch(self, ns): return Configuration((lchop(k, ns), v) for k, v in iteritems(self) if k.startswith(ns))
def update(self, dikt): for key, value in iteritems(dikt): self[key] = value
def pandocstyle(fileobj): """A function to parse the so called 'Title block' out of Pandoc-formatted documents. Provides very simple parsing so that Acrylamid won't choke on plain Pandoc documents. See http://johnmacfarlane.net/pandoc/README.html#title-block Currently not implemented: - Formatting within title blocks - Man-page writer title block extensions """ meta_pan_re = re.compile(r'^[ ]{0,3}%+\s*(?P<value>.*)') meta_pan_more_re = re.compile(r'^\s*(?P<value>.*)') meta_pan_authsplit = re.compile(r';+\s*') i, j = 0, 0 meta, key = {}, None poss_keys = ['title', 'author', 'date'] while True: line = fileobj.readline() i += 1 if line.strip() == '': break # blank line - done if j + 1 > len(poss_keys): raise AcrylamidException( "%r has too many items in the Pandoc title block." % fileobj.name) m1 = meta_pan_re.match(line) if m1: key = poss_keys[j] j += 1 valstrip = m1.group('value').strip() if not valstrip: continue value = distinguish(m1.group('value').strip()) if key == 'author': value = value.strip(';') value = meta_pan_authsplit.split(value) meta.setdefault(key, []).append(value) else: m2 = meta_pan_more_re.match(line) if m2 and key: # Add another line to existing key value = m2.group('value').strip() if key == 'author': value = value.strip(';') value = meta_pan_authsplit.split(value) meta[key].append(value) else: break # no meta data - done if 'title' not in meta: raise AcrylamidException('No title given in %r' % fileobj.name) if len(meta['title']) > 1: meta['title'] = ' '.join(meta['title']) if 'author' in meta: meta['author'] = sum(meta['author'], []) else: log.warn('%s does not have an Author in the Pandoc title block.' % fileobj.name) for key, values in iteritems(meta): if len(values) == 1: meta[key] = values[0] return i, meta
def init(self, conf, env, **kwargs): for key, value in iteritems(kwargs): self.__dict__[key] = value
def compile(conf, env): """The compilation process.""" hooks.initialize(conf, env) hooks.run(conf, env, 'pre') if env.options.force: cache.clear(conf.get('cache_dir')) # time measurement ctime = time.time() # populate env and corrects some conf things data = initialize(conf, env) # load pages/entries and store them in env rv = dict(zip(['entrylist', 'pages', 'translations', 'drafts'], map(HashableList, readers.load(conf)))) entrylist, pages = rv['entrylist'], rv['pages'] translations, drafts = rv['translations'], rv['drafts'] # load references refs.load(entrylist, pages, translations, drafts) data.update(rv) env.globals.update(rv) # here we store all found filter and their aliases ns = defaultdict(set) # [<class head_offset.Headoffset at 0x1014882c0>, <class html.HTML at 0x101488328>,...] aflist = filters.get_filters() # ... and get all configured views _views = views.get_views() # filters found in all entries, views and conf.py (skip translations, has no items) found = sum((x.filters for x in chain(entrylist, pages, drafts, _views, [conf])), []) for val in found: # first we for `no` and get the function name and arguments f = val[2:] if val.startswith('no') else val fname, fargs = f.split('+')[:1][0], f.split('+')[1:] try: # initialize the filter with its function name and arguments fx = aflist[fname](conf, env, val, *fargs) if val.startswith('no'): fx = filters.disable(fx) except ValueError: try: fx = aflist[val.split('+')[:1][0]](conf, env, val, *fargs) except ValueError: raise AcrylamidException('no such filter: %s' % val) ns[fx].add(val) # include actual used filters to trigger modified state env.filters = HashableList(iterkeys(ns)) for entry in chain(entrylist, pages, drafts): for v in _views: # a list that sorts out conflicting and duplicated filters flst = filters.FilterList() # filters found in this specific entry plus views and conf.py found = entry.filters + v.filters + data['conf']['filters'] for fn in found: fx, _ = next((k for k in iteritems(ns) if fn in k[1])) if fx not in flst: flst.append(fx) # sort them ascending because we will pop within filters.add entry.filters.add(sorted(flst, key=lambda k: (-k.priority, k.name)), context=v) # lets offer a last break to populate tags and such for v in _views: env = v.context(conf, env, data) # now teh real thing! for v in _views: for entry in chain(entrylist, pages, translations, drafts): entry.context = v for var in 'entrylist', 'pages', 'translations', 'drafts': data[var] = HashableList(filter(v.condition, locals()[var])) \ if v.condition else locals()[var] tt = time.time() for buf, path in v.generate(conf, env, data): try: helpers.mkfile(buf, path, time.time()-tt, ns=v.name, force=env.options.force, dryrun=env.options.dryrun) except UnicodeError: log.exception(path) finally: buf.close() tt = time.time() # copy modified/missing assets to output assets.compile(conf, env) # wait for unfinished hooks hooks.shutdown() # run post hooks (blocks) hooks.run(conf, env, 'post') # save conf/environment hash and new/changed/unchanged references helpers.memoize('Configuration', hash(conf)) helpers.memoize('Environment', hash(env)) refs.save() # remove abandoned cache files cache.shutdown() # print a short summary log.info('%i new, %i updated, %i skipped [%.2fs]', event.count('create'), event.count('update'), event.count('identical') + event.count('skip'), time.time() - ctime)
def __iter__(self): for category, subtree in sorted(iteritems(self.tree[0]), key=lambda k: k[0]): yield Subcategory(self.parent + [category], category, subtree, self.route)
def rss(xml): if 'xmlns:wp' in xml: raise InputError('WordPress dump') def parse_date_time(stamp): ts = parsedate_tz(stamp) ts = mktime_tz(ts) return datetime.fromtimestamp(ts) def generate(item): entry = {} for k, v in iteritems({ 'title': 'title', 'date': 'pubDate', 'link': 'link', 'content': 'description' }): try: entry[k] = item.find(v).text if k != 'content' \ else unescape(item.find(v).text) except (AttributeError, TypeError): pass if any( filter(lambda k: k not in entry, ['title', 'date', 'link', 'content'])): raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \ + 'link, content and pubDate!') return { 'title': entry['title'], 'content': entry['content'], 'date': parse_date_time(entry['date']), 'link': entry['link'], 'tags': [cat.text for cat in item.findall('category')] } try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({ 'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author' }): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item'))) try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({ 'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author' }): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item')))
def rss(xml): if 'xmlns:wp' in xml: raise InputError('WordPress dump') def parse_date_time(stamp): ts = parsedate_tz(stamp) ts = mktime_tz(ts) return datetime.fromtimestamp(ts) def generate(item): entry = {} for k, v in iteritems({'title': 'title', 'date': 'pubDate', 'link': 'link', 'content': 'description'}): try: entry[k] = item.find(v).text if k != 'content' \ else unescape(item.find(v).text) except (AttributeError, TypeError): pass if any(filter(lambda k: k not in entry, ['title', 'date', 'link', 'content'])): raise AcrylamidException('invalid RSS 2.0 feed: provide at least title, ' \ + 'link, content and pubDate!') return {'title': entry['title'], 'content': entry['content'], 'date': parse_date_time(entry['date']), 'link': entry['link'], 'tags': [cat.text for cat in item.findall('category')]} try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author'}): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item'))) try: tree = ElementTree.fromstring(xml.encode('utf-8')) except ElementTree.ParseError: raise InputError('no well-formed XML') if tree.tag != 'rss' or tree.attrib.get('version') != '2.0': raise InputError('no RSS 2.0 feed') defaults = {'author': None} channel = tree.getchildren()[0] for k, v in iteritems({'title': 'sitename', 'link': 'www_root', 'language': 'lang', 'author': 'author'}): try: defaults[v] = channel.find(k).text except AttributeError: pass return defaults, list(map(generate, channel.findall('item')))
def build(conf, env, defaults, items, options): def create(defaults, item): global USED_WORDPRESS fd, tmp = tempfile.mkstemp(suffix='.txt') with io.open(fd, 'w', encoding='utf-8') as f: f.write(u'---\n') f.write(u'title: %s\n' % safe(item['title'])) if item.get('author') != defaults.get('author'): f.write(u'author: %s\n' % (item.get('author') or defaults.get('author'))) f.write(u'date: %s\n' % item['date'].strftime(conf['date_format'])) #f.write(u'filter: %s\n' % item['filter']) if 'draft' in item: f.write(u'draft: %s\n' % item['draft']) if 'tags' in item: f.write(u'tags: [%s]\n' % ', '.join(item['tags'])) if item.get('description'): f.write(u'description: %s\n' % item['description']) if 'permalink' in item: f.write(u'permalink: %s\n' % item['permalink']) if item.get('type', 'entry') != 'entry': f.write(u'type: %s\n' % item['type']) for arg in options.args: f.write(arg.strip() + u'\n') f.write(u'---\n\n') # this are fixes for WordPress because they don't save HTML but a # stupid mixed-in form of HTML making it very difficult to get either HTML # or reStructuredText/Markdown if USED_WORDPRESS and item['filter'] == 'markdown': item['content'] = item['content'].replace("\n ", " \n") elif USED_WORDPRESS and item['filter'] == 'rst': item['content'] = item['content'].replace('\n ', '\n\n') f.write(item['content']+u'\n') entry = Entry(tmp, conf) p = join(conf['content_dir'], dirname(entry.permalink)[1:]) try: os.makedirs(p.rsplit('/', 1)[0]) except OSError: pass filepath = p + '.txt' if isfile(filepath) and not options.force: raise AcrylamidException('Entry already exists %r' % filepath) shutil.move(tmp, filepath) event.create('import', filepath) for item in items: if options.keep: m = urlsplit(item['link']) if m.path != '/': item['permalink'] = m.path item['content'], item['filter'] = convert(item.get('content', ''), options.fmt, options.pandoc) create(defaults, item) print("\nImport was successful. Edit your conf.py with these new settings:") for key, value in iteritems(defaults): if value is None: continue print(" %s = '%s'" % (key.upper(), value))
def build(conf, env, defaults, items, options): def create(defaults, item): global USED_WORDPRESS fd, tmp = tempfile.mkstemp(suffix='.txt') with io.open(fd, 'w', encoding='utf-8') as f: f.write(u'---\n') f.write(u'title: %s\n' % safe(item['title'])) if item.get('author') != defaults.get('author'): f.write(u'author: %s\n' % (item.get('author') or defaults.get('author'))) f.write(u'date: %s\n' % item['date'].strftime(conf['date_format'])) #f.write(u'filter: %s\n' % item['filter']) if 'draft' in item: f.write(u'draft: %s\n' % item['draft']) if 'tags' in item: f.write(u'tags: [%s]\n' % ', '.join(item['tags'])) if item.get('description'): f.write(u'description: %s\n' % item['description']) if 'permalink' in item: f.write(u'permalink: %s\n' % item['permalink']) if item.get('type', 'entry') != 'entry': f.write(u'type: %s\n' % item['type']) for arg in options.args: f.write(arg.strip() + u'\n') f.write(u'---\n\n') # this are fixes for WordPress because they don't save HTML but a # stupid mixed-in form of HTML making it very difficult to get either HTML # or reStructuredText/Markdown if USED_WORDPRESS and item['filter'] == 'markdown': item['content'] = item['content'].replace("\n ", " \n") elif USED_WORDPRESS and item['filter'] == 'rst': item['content'] = item['content'].replace('\n ', '\n\n') f.write(item['content'] + u'\n') entry = Entry(tmp, conf) p = join(conf['content_dir'], dirname(entry.permalink)[1:]) try: os.makedirs(p.rsplit('/', 1)[0]) except OSError: pass filepath = p + '.txt' if isfile(filepath) and not options.force: raise AcrylamidException('Entry already exists %r' % filepath) shutil.move(tmp, filepath) event.create('import', filepath) for item in items: if options.keep: m = urlsplit(item['link']) if m.path != '/': item['permalink'] = m.path item['content'], item['filter'] = convert(item.get('content', ''), options.fmt, options.pandoc) create(defaults, item) print( "\nImport was successful. Edit your conf.py with these new settings:") for key, value in iteritems(defaults): if value is None: continue print(" %s = '%s'" % (key.upper(), value))
def pandocstyle(fileobj): """A function to parse the so called 'Title block' out of Pandoc-formatted documents. Provides very simple parsing so that Acrylamid won't choke on plain Pandoc documents. See http://johnmacfarlane.net/pandoc/README.html#title-block Currently not implemented: - Formatting within title blocks - Man-page writer title block extensions """ meta_pan_re = re.compile(r'^[ ]{0,3}%+\s*(?P<value>.*)') meta_pan_more_re = re.compile(r'^\s*(?P<value>.*)') meta_pan_authsplit = re.compile(r';+\s*') i, j = 0, 0 meta, key = {}, None poss_keys = ['title', 'author', 'date'] while True: line = fileobj.readline(); i += 1 if line.strip() == '': break # blank line - done if j + 1 > len(poss_keys): raise AcrylamidException( "%r has too many items in the Pandoc title block." % fileobj.name) m1 = meta_pan_re.match(line) if m1: key = poss_keys[j]; j += 1 valstrip = m1.group('value').strip() if not valstrip: continue value = distinguish(m1.group('value').strip()) if key == 'author': value = value.strip(';') value = meta_pan_authsplit.split(value) meta.setdefault(key, []).append(value) else: m2 = meta_pan_more_re.match(line) if m2 and key: # Add another line to existing key value = m2.group('value').strip() if key == 'author': value = value.strip(';') value = meta_pan_authsplit.split(value) meta[key].append(value) else: break # no meta data - done if 'title' not in meta: raise AcrylamidException('No title given in %r' % fileobj.name) if len(meta['title']) > 1: meta['title'] = ' '.join(meta['title']) if 'author' in meta: meta['author'] = sum(meta['author'], []) else: log.warn('%s does not have an Author in the Pandoc title block.' % fileobj.name) for key, values in iteritems(meta): if len(values) == 1: meta[key] = values[0] return i, meta