def value_from_raw(self, raw): if raw.value is None: return raw.missing_value('Missing flow') if raw.pad is None: return raw.missing_value('Flow value was technically present ' 'but used in a place where it cannot ' 'be used.') db = raw.pad.db rv = [] try: for block, block_lines in process_flowblock_data(raw.value): # Unknown flow blocks are skipped for the moment if self.flow_blocks is not None and \ block not in self.flow_blocks: continue flowblock = db.flowblocks.get(block) if flowblock is None: continue d = {} for key, lines in tokenize(block_lines): d[key] = u''.join(lines) rv.append(flowblock.process_raw_data(d, pad=raw.pad)) except BadFlowBlock as e: return raw.bad_value(str(e)) return FlowDescriptor(rv, raw.pad)
def process_node(self, fields, sections, source, zone, root_path): """For a given node (), identify all fields to translate, and add new fields to translations memory. Flow blocks are handled recursively.""" for field in fields: if ('translate' in field.options) \ and (source.alt in (PRIMARY_ALT, self.content_language)) \ and (field.options['translate'] in ('True', 'true', '1', 1)): if field.name in sections.keys(): section = sections[field.name] # if blockwise, each paragraph is one translatable message, # otherwise each line chunks = (split_paragraphs(section) if self.trans_parwise else [x.strip() for x in section if x.strip()]) for chunk in chunks: translations.add(chunk.strip('\r\n'), "%s (%s:%s.%s)" % ( urljoin(self.url_prefix, source.url_path), relpath(source.source_filename, root_path), zone, field.name) ) if isinstance(field.type, FlowType): if field.name in sections: section = sections[field.name] for blockname, blockvalue in process_flowblock_data("".join(section)): flowblockmodel = source.pad.db.flowblocks[blockname] blockcontent=dict(tokenize(blockvalue)) self.process_node(flowblockmodel.fields, blockcontent, source, blockname, root_path)
def load_raw_data(self, path, alt=PRIMARY_ALT, cls=None): """Internal helper that loads the raw record data. This performs very little data processing on the data. """ path = cleanup_path(path) if cls is None: cls = dict fn_base = self.to_fs_path(path) rv = cls() choiceiter = _iter_filename_choices(fn_base, [alt], self.config) for fs_path, source_alt, is_attachment in choiceiter: try: with open(fs_path, 'rb') as f: for key, lines in metaformat.tokenize(f, encoding='utf-8'): rv[key] = u''.join(lines) except IOError as e: if e.errno not in (errno.ENOTDIR, errno.ENOENT): raise if not is_attachment or not os.path.isfile(fs_path[:-3]): continue rv = {} rv['_path'] = path rv['_id'] = posixpath.basename(path) rv['_gid'] = hashlib.md5(path.encode('utf-8')).hexdigest() rv['_alt'] = alt rv['_source_alt'] = source_alt if is_attachment: rv['_attachment_for'] = posixpath.dirname(path) return rv
def value_from_raw(self, raw): if raw.value is None: return raw.missing_value('Missing flow') if raw.pad is None: return raw.missing_value('Flow value was technically present ' 'but used in a place where it cannot ' 'be used.') db = raw.pad.db rv = [] try: for block, block_lines in process_flowblock_data(raw.value): # Unknown flow blocks are skipped for the moment if self.flow_blocks is not None and \ block not in self.flow_blocks: continue flowblock = db.flowblocks.get(block) if flowblock is None: continue d = {} for key, lines in tokenize(block_lines): d[key] = u''.join(lines) rv.append(flowblock.process_raw_data(d, pad=raw.pad)) except BadFlowBlock as e: return raw.bad_value(e.message) return FlowDescriptor(rv, raw.pad)
def process_node(self, fields, sections, source, zone, root_path): """For a give node (), identify all fields to translate, and add new fields to translations memory. Flow blocks are handled recursively.""" for field in fields: if ('translate' in field.options) \ and (source.alt in (PRIMARY_ALT, self.content_language)) \ and (field.options['translate'] in ('True', 'true', '1', 1)): if field.name in sections.keys(): section = sections[field.name] # if blockwise, each paragraph is one translatable message, # otherwise each line chunks = (split_paragraphs(section) if self.trans_parwise else [x.strip() for x in section if x.strip()]) for chunk in chunks: translations.add(chunk.strip('\r\n'), "%s (%s:%s.%s)" % ( urljoin(self.url_prefix, source.url_path), relpath(source.source_filename, root_path), zone, field.name) ) if isinstance(field.type, FlowType): if sections.has_key(field.name): section = sections[field.name] for blockname, blockvalue in process_flowblock_data("".join(section)): flowblockmodel = source.pad.db.flowblocks[blockname] blockcontent=dict(tokenize(blockvalue)) self.process_node(flowblockmodel.fields, blockcontent, source, blockname, root_path)
def process_node(self, fields, sections, source, zone, root_path): """For a give node (), identify all fields to translate, and add new fields to translations memory. Flow blocks are handled recursively.""" for field in fields: if ('translate' in field.options) \ and (source.alt in (PRIMARY_ALT, self.content_language)) \ and (field.options['translate'] in ('True', 'true', '1', 1)): if field.name in sections.keys(): section = sections[field.name] for line in [x.strip() for x in section if x.strip()]: translations.add( line, "%s (%s:%s.%s)" % (urlparse.urljoin(self.url_prefix, source.url_path), relpath(source.source_filename, root_path), zone, field.name)) if isinstance(field.type, FlowType): if field.name in sections.keys(): section = sections[field.name] for blockname, blockvalue in process_flowblock_data( "".join(section)): flowblockmodel = source.pad.db.flowblocks[blockname] blockcontent = dict(tokenize(blockvalue)) self.process_node(flowblockmodel.fields, blockcontent, source, blockname, root_path)
def load_raw_data(self, path, cls=None): """Internal helper that loads the raw record data. This performs very little data processing on the data. """ path = cleanup_path(path) if cls is None: cls = dict fn_base = self.to_fs_path(path) rv = cls() for fs_path, is_attachment in _iter_filename_choices(fn_base): try: with open(fs_path, 'rb') as f: for key, lines in metaformat.tokenize(f, encoding='utf-8'): rv[key] = u''.join(lines) except IOError as e: if e.errno not in (errno.ENOTDIR, errno.ENOENT): raise if not is_attachment or not os.path.isfile(fs_path[:-3]): continue rv = {} rv['_path'] = path rv['_id'] = posixpath.basename(path) rv['_gid'] = hashlib.md5(path.encode('utf-8')).hexdigest() if is_attachment: rv['_attachment_for'] = posixpath.dirname(path) return rv
def on_after_build(self, builder, build_state, source, prog, **extra): if self.enabled and isinstance(source,Page): try: text = source.contents.as_text() except IOError: pass else: fields = source.datamodel.fields sections = dict(tokenize(text.splitlines())) # {'sectionname':[list of section texts]} self.process_node(fields, sections, source, source.datamodel.id, builder.env.root_path)
def on_after_build(self, builder, build_state, source, prog): if self.enabled and isinstance(source,Page): try: text = source.contents.as_text() except IOError: pass else: fields = source.datamodel.fields sections = dict(tokenize(text.splitlines())) # {'sectionname':[list of section texts]} self.process_node(fields, sections, source, source.datamodel.id, builder.env.root_path)
def process_node(self, fields, sections, source, zone, root_path): for field in fields: if ('translate' in field.options) \ and (field.options['translate'] in ('True', 'true', '1', 1)): if field.name in sections.keys(): section = sections[field.name] for line in [x.strip() for x in section if x.strip()]: translations.add( line, "%s:%s.%s" % ( relpath(source.source_filename, root_path), zone, field.name) ) if isinstance(field.type, FlowType): if sections.has_key(field.name): section = sections[field.name] for blockname, blockvalue in process_flowblock_data("".join(section)): flowblockmodel = source.pad.db.flowblocks[blockname] blockcontent=dict(tokenize(blockvalue)) self.process_node(flowblockmodel.fields, blockcontent, source, blockname, root_path)
def on_before_build(self, builder, build_state, source, prog): if isinstance(source,Page) and source.alt==PRIMARY_ALT: text = source.contents.as_text() fields = source.datamodel.fields sections = list(tokenize(text.splitlines())) # ('sectionname',[list of section texts]) flowblocks = source.pad.db.flowblocks for language in self.translations_languages: translator = gettext.translation("contents", join(self.i18npath,'_compiled'), languages=[language], fallback = True) translated_filename=join(dirname(source.source_filename), "contents+%s.lr"%language) with open(translated_filename,"w") as f: count_lines_block = 0 # counting the number of lines of the current block is_content = False for line in source.contents.open(encoding='utf-8').readlines():#text.splitlines(): stripped_line = line.strip() if not stripped_line: f.write('\n') continue if _line_is_dashes(stripped_line) or _block_re.match(stripped_line): count_lines_block=0 is_content = False f.write("%s"%line) else: count_lines_block+=1 if count_lines_block==1 and not is_content: if _command_re.match(stripped_line): key,value=stripped_line.split(':',1) value=value.strip() if value: try: f.write("%s: %s\n"%(key.encode('utf-8'), translator.ugettext(value).encode('utf-8') )) except UnicodeError: import ipdb; ipdb.set_trace() else: f.write("%s:\n"%key.encode('utf-8')) else: is_content=True if is_content: f.write("%s\n"%translator.gettext(stripped_line).encode('utf-8') )
def on_before_build(self, builder, build_state, source, prog): """Before building a page, eventualy produce all its alternatives (=translated pages) using the gettext translations available.""" # if isinstance(source,Page) and source.alt==PRIMARY_ALT: if self.enabled and self.flag_is_present(builder.extra_flags) \ and isinstance(source, Page) \ and source.alt in (PRIMARY_ALT, self.content_language): contents = None for fn in source.iter_source_filenames(): try: contents = FileContents(fn) except IOError: pass # next text = contents.as_text() fields = source.datamodel.fields sections = list(tokenize( text.splitlines())) # ('sectionname',[list of section texts]) flowblocks = source.pad.db.flowblocks for language in self.translations_languages: translator = gettext.translation("contents", join(self.i18npath, '_compiled'), languages=[language], fallback=True) translated_filename = join(dirname(source.source_filename), "contents+%s.lr" % language) #if language == 'de': # continue with open(translated_filename, "wb") as f: count_lines_block = 0 # counting the number of lines of the current block is_content = False with contents.open(encoding='utf-8') as contents_file: for line in contents_file.readlines( ): #text.splitlines(): stripped_line = line.strip() if not stripped_line: # empty line f.write(b'\n') continue if _line_is_dashes( stripped_line) or _block2_re.match( stripped_line ): # line like "---*" or a new block tag count_lines_block = 0 is_content = False f.write(("%s" % line).encode('utf-8')) else: count_lines_block += 1 if count_lines_block == 1 and not is_content: # handle first line, while not in content if _command_re.match(stripped_line): key, value = stripped_line.split( ':', 1) value = value.strip() if value: if six.PY3: f.write(( "%s: %s\n" % (key, translator.gettext(value)) ).encode('utf-8')) else: f.write( "%s: %s\n" % (key.encode('utf-8'), translator.ugettext(value) .encode('utf-8'))) else: if six.PY3: f.write(("%s:\n" % key).encode('utf-8')) else: f.write("%s:\n" % key.encode('utf-8')) else: is_content = True if is_content: if six.PY2: translated_stripline = translator.ugettext( stripped_line ) # translate the stripped version else: translated_stripline = translator.gettext( stripped_line ) # translate the stripped version translation = line.replace( stripped_line, translated_stripline, 1 ) # and re-inject the stripped translation into original line (not stripped) f.write(translation.encode('utf-8'))