Exemplo n.º 1
0
 def handle_declare(self, declare):
     # Split out the official coding name and its aliases.
     if isinstance(declare, str):
         official = declare
         aliases = ()
     else:
         official = declare[0]
         aliases = [recode.cleaned_alias(alias) for alias in declare[1:]]
     coding = recode.cleaned_alias(official)
     # Use coding's cluster, create it if necessary.  The first element
     # of a cluster is always the official name, other elements are clean.
     if coding not in self.clusters:
         self.clusters[coding] = [official]
     cluster = self.clusters[coding]
     # Add aliases or clusters to the current cluster.
     for alias in aliases:
         if alias in self.clusters:
             # Add this cluster to the pre-existing alias' cluster.
             # Clean the first element of cluster into a mere alias.
             existing = self.clusters[alias]
             element = recode.cleaned_alias(cluster[0])
             existing.append(element)
             self.clusters[element] = existing
             for element in cluster[1:]:
                 existing.append(element)
                 self.clusters[element] = existing
             cluster = existing
         else:
             # Add alias to the current cluster.
             cluster.append(alias)
             self.clusters[alias] = cluster
Exemplo n.º 2
0
 def produce_counts(self):
     self.recode_calls = 0
     self.original = Stats()
     self.shrunk = Stats()
     # Get the list of charsets.
     if self.charset_options:
         befores = [(charset, charset) for charset in self.charset_options]
         afters = befores
     else:
         befores = {}
         afters = {}
         for before, after in recode.registry.methods:
             if recode.TRIVIAL_SURFACE not in (before, after):
                 befores[recode.cleaned_alias(before)] = before
                 afters[recode.cleaned_alias(after)] = after
         befores = befores.items()
         befores.sort()
         afters = afters.items()
         afters.sort()
     # Recode in all combinations.
     sys.stderr.write("Attempting %d (%d x %d) recodings.\n"
                      % (len(befores)*len(afters),
                         len(befores), len(afters)))
     count = 0
     for _, before in befores:
         count += 1
         sys.stderr.write("  %d/%d. %s..*\n"
                          % (count, len(befores), before))
         for _, after in afters:
             if after != before:
                 request = '%s..%s' % (before, after)
                 arcs = recode.Recodec(request).encoding_arcs()
                 self.recode_calls += 1
                 self.original.count_request(arcs)
                 self.shrunk.count_request(arcs)
Exemplo n.º 3
0
 def handle_declare(self, declare):
     # Split out the official coding name and its aliases.
     if isinstance(declare, str):
         official = declare
         aliases = ()
     else:
         official = declare[0]
         aliases = [recode.cleaned_alias(alias) for alias in declare[1:]]
     coding = recode.cleaned_alias(official)
     # Use coding's cluster, create it if necessary.  The first element
     # of a cluster is always the official name, other elements are clean.
     if coding not in self.clusters:
         self.clusters[coding] = [official]
     cluster = self.clusters[coding]
     # Add aliases or clusters to the current cluster.
     for alias in aliases:
         if alias in self.clusters:
             # Add this cluster to the pre-existing alias' cluster.
             # Clean the first element of cluster into a mere alias.
             existing = self.clusters[alias]
             element = recode.cleaned_alias(cluster[0])
             existing.append(element)
             self.clusters[element] = existing
             for element in cluster[1:]:
                 existing.append(element)
                 self.clusters[element] = existing
             cluster = existing
         else:
             # Add alias to the current cluster.
             cluster.append(alias)
             self.clusters[alias] = cluster
Exemplo n.º 4
0
 def produce_counts(self):
     self.recode_calls = 0
     self.original = Stats()
     self.shrunk = Stats()
     # Get the list of charsets.
     if self.charset_options:
         befores = [(charset, charset) for charset in self.charset_options]
         afters = befores
     else:
         befores = {}
         afters = {}
         for before, after in recode.registry.methods:
             if recode.TRIVIAL_SURFACE not in (before, after):
                 befores[recode.cleaned_alias(before)] = before
                 afters[recode.cleaned_alias(after)] = after
         befores = befores.items()
         befores.sort()
         afters = afters.items()
         afters.sort()
     # Recode in all combinations.
     sys.stderr.write(
         "Attempting %d (%d x %d) recodings.\n" %
         (len(befores) * len(afters), len(befores), len(afters)))
     count = 0
     for _, before in befores:
         count += 1
         sys.stderr.write("  %d/%d. %s..*\n" %
                          (count, len(befores), before))
         for _, after in afters:
             if after != before:
                 request = '%s..%s' % (before, after)
                 arcs = recode.Recodec(request).encoding_arcs()
                 self.recode_calls += 1
                 self.original.count_request(arcs)
                 self.shrunk.count_request(arcs)
Exemplo n.º 5
0
 def main(self, *arguments):
     # Decode options.
     import getopt
     options, arguments = getopt.getopt(arguments, 'v')
     for option, value in options:
         if option == '-v':
             self.verbose = True
     # Import all modules.
     modules = [getattr(__import__('Recode.' + module_name), module_name)
                for module_name in arguments]
     # Register aliases into clusters.
     self.clusters = {}
     self.handle_declare(recode.UNICODE_STRING)
     self.handle_declare((recode.TRIVIAL_SURFACE, 'Data'))
     for module in modules:
         try:
             declares = module.declares
         except AttributeError:
             sys.stderr.write("No `declares' in `%s'\n" % module.__file__)
         else:
             for declare in declares:
                 self.handle_declare(declare)
     # Register implied surfaces.
     self.implied = {}
     for module in modules:
         if hasattr(module, 'implied_surfaces'):
             for alias, surface in module.implied_surfaces:
                 self.implied[recode.cleaned_alias(alias)] = (
                     recode.cleaned_alias(surface))
     # Register recode methods.
     self.methods = {}
     for module, module_name in zip(modules, arguments):
         for name in dir(module):
             codec = getattr(module, name)
             if (hasattr(codec, 'internal_coding')
                 and hasattr(codec, 'external_coding')):
                 self.handle_codec(module_name, name, codec)
     # Write out the Python source.
     write = common.Output('preset.py', 'Python').write
     write('\n'
           'aliases = {\n')
     items = self.clusters.items()
     items.sort()
     for alias, cluster in items:
         write('    %r: (%r, %r),\n' % (alias, cluster[0],
                                        self.implied.get(alias)))
     write('    }\n'
           '\n'
           'methods = {\n')
     items = self.methods.items()
     items.sort()
     for (before, after), (module_name, codec_name, use_encode) in items:
         write('    (%r, %r): (%r, %r, %r),\n' %
               (before, after, module_name, codec_name, use_encode))
     write('    }\n')
Exemplo n.º 6
0
 def main(self, *arguments):
     # Decode options.
     import getopt
     options, arguments = getopt.getopt(arguments, 'v')
     for option, value in options:
         if option == '-v':
             self.verbose = True
     # Import all modules.
     modules = [getattr(__import__('Recode.' + module_name), module_name)
                for module_name in arguments]
     # Register aliases into clusters.
     self.clusters = {}
     self.handle_declare(recode.UNICODE_STRING)
     self.handle_declare((recode.TRIVIAL_SURFACE, 'Data'))
     for module in modules:
         try:
             declares = module.declares
         except AttributeError:
             sys.stderr.write("No `declares' in `%s'\n" % module.__file__)
         else:
             for declare in declares:
                 self.handle_declare(declare)
     # Register implied surfaces.
     self.implied = {}
     for module in modules:
         if hasattr(module, 'implied_surfaces'):
             for alias, surface in module.implied_surfaces:
                 self.implied[recode.cleaned_alias(alias)] = (
                     recode.cleaned_alias(surface))
     # Register recode methods.
     self.methods = {}
     for module, module_name in zip(modules, arguments):
         for name in dir(module):
             codec = getattr(module, name)
             if (hasattr(codec, 'internal_coding')
                 and hasattr(codec, 'external_coding')):
                 self.handle_codec(module_name, name, codec)
     # Write out the Python source.
     write = common.Output('preset.py', 'Python').write
     write('\n'
           'aliases = {\n')
     items = self.clusters.items()
     items.sort()
     for alias, cluster in items:
         write('    %r: (%r, %r),\n' % (alias, cluster[0],
                                        self.implied.get(alias)))
     write('    }\n'
           '\n'
           'methods = {\n')
     items = self.methods.items()
     items.sort()
     for (before, after), (module_name, codec_name, use_encode) in items:
         write('    (%r, %r): (%r, %r, %r),\n' %
               (before, after, module_name, codec_name, use_encode))
     write('    }\n')
Exemplo n.º 7
0
 def handle_codec(self, module_name, codec_name, codec):
     internal = self.clusters[recode.cleaned_alias(codec.internal_coding)][0]
     external = self.clusters[recode.cleaned_alias(codec.external_coding)][0]
     for check, before, after, direction in (
         (codec.encode, internal, external, True),
         (codec.decode, external, internal, False)):
         if check is not None:
             if (before, after) in self.methods:
                 if module_name == 'builtin':
                     if self.verbose:
                         sys.stderr.write(
                             "Overriding `%s' by `%s' for `%s..%s'.\n"
                             % (self.methods[before, after][0], module_name,
                                before, after))
                 else:
                     if self.verbose:
                         sys.stderr.write(
                             "Overriding `%s' by `%s' for `%s..%s'.\n"
                             % (module_name, self.methods[before, after][0],
                                before, after))
                     continue
             self.methods[before, after] = (
                 module_name, codec_name, direction)
Exemplo n.º 8
0
 def handle_codec(self, module_name, codec_name, codec):
     internal = self.clusters[recode.cleaned_alias(codec.internal_coding)][0]
     external = self.clusters[recode.cleaned_alias(codec.external_coding)][0]
     for check, before, after, direction in (
         (codec.encode, internal, external, True),
         (codec.decode, external, internal, False),
     ):
         if check is not None:
             if (before, after) in self.methods:
                 if module_name == "builtin":
                     if self.verbose:
                         sys.stderr.write(
                             "Overriding `%s' by `%s' for `%s..%s'.\n"
                             % (self.methods[before, after][0], module_name, before, after)
                         )
                 else:
                     if self.verbose:
                         sys.stderr.write(
                             "Overriding `%s' by `%s' for `%s..%s'.\n"
                             % (module_name, self.methods[before, after][0], before, after)
                         )
                     continue
             self.methods[before, after] = (module_name, codec_name, direction)
Exemplo n.º 9
0
 def save_alias(self, base, alias):
     if base not in self.aliases:
         self.aliases[base] = []
     if recode.cleaned_alias(alias) != recode.cleaned_alias(base):
         self.aliases[base].append(alias)
Exemplo n.º 10
0
    def main(self, *arguments):
	assert not arguments, arguments
	# Rewrite strip data, merging common strips as we go.
	self.strips = []
	self.strip_index = {}
	self.add_strip(u'\uFFFF' * recode.STRIP_SIZE)
	strip_data = []
	for charset, data, indices in common.all_strip_data():
	    strip_data.append(
		(recode.cleaned_alias(charset), charset,
		 [self.add_strip(data[index:index+recode.STRIP_SIZE])
		  for index in indices]))
	# Write the strip pool.
	write = common.Output('strip.c', 'C').write
	write('\n'
	      '#include \"common.h\"\n'
	      '\n'
	      'const recode_ucs2 ucs2_data_pool[%d] =\n'
	      '  {'
	      % (len(self.strips) * recode.STRIP_SIZE))
	count = 0
	for strip in self.strips:
	    for character in strip:
		if count % 8 == 0:
		    if count != 0:
			write(',')
		    write('\n    /* %4d */ ' % count)
		else:
		    write(', ')
		write('0x%0.4X' % ord(character))
		count += 1
	write('\n'
	      '  };\n')
	# Write out all strip codecs.
	strip_data.sort()
	ordinal = 0
	for key, charset, indices in strip_data:
	    write('\n'
		  '/* %s */\n'
		  '\n'
		  'static struct strip_data data_%d =\n'
		  '  {\n'
		  '    ucs2_data_pool,\n'
		  '    {\n'
		  % (charset, ordinal))
	    count = 0
	    for indice in indices:
		if count % 12 == 0:
		    if count != 0:
			write(',\n')
		    write('      ')
		else:
		    write(', ')
		write('%4d' % indice)
		count += 1
	    write('\n'
		  '    }\n'
		  '  };\n')
	    ordinal += 1
	# Print the collectable initialisation function.
	write('\n'
	      'bool\n'
	      'module_strips (struct recode_outer *outer)\n'
	      '{\n'
	      '  RECODE_ALIAS alias;\n')
	charsets = {}
	for key, charset, indices in strip_data:
	    charsets[charset] = []
	for alias, (charset, surface) in recode.registry.aliases.iteritems():
	    if charset in charsets:
		charsets[charset].append((alias, surface))
	ordinal = 0
	for key, charset, indices in strip_data:
            write('\n'
                  '  if (!declare_strip_data (outer, &data_%d, "%s"))\n'
		  '    return false;\n'
		  % (ordinal, charset))
	    for alias, surface in charsets[charset]:
		if surface is None:
		    write('  if (!declare_alias (outer, "%s", "%s"))\n'
			  '    return false;\n'
			  % (alias, charset))
		else:
		    write('  if (alias = declare_alias (outer, "%s", "%s"),'
			  ' !alias)\n'
			  '    return false;\n'
			  % (alias, charset))
		    write('  if (!declare_implied_surface (outer, alias,'
			  ' outer->%s_surface))\n'
			  '    return false;\n'
			  % surface)
	    ordinal += 1
	write('\n'
	      '  return true;\n'
	      '}\n')
Exemplo n.º 11
0
 def save_alias(self, base, alias):
     if base not in self.aliases:
         self.aliases[base] = []
     if recode.cleaned_alias(alias) != recode.cleaned_alias(base):
         self.aliases[base].append(alias)