def _make_encoder_builders_file((schema_in, rows_in)): assert os.path.isfile(rows_in) schema = json_load(schema_in) with csv_reader(rows_in) as reader: header = reader.next() builders = [] seen = set() for name in header: if name in schema: if name in seen: raise LoomError('Repeated column {} in csv file {}'.format( name, rows_in)) seen.add(name) model = schema[name] Builder = ENCODER_BUILDERS[model] builder = Builder(name, model) else: builder = None builders.append(builder) if all(builder is None for builder in builders): raise LoomError( 'Csv file has no known features;' ', try adding a header to {}'.format(rows_in)) missing_features = sorted(set(schema) - seen) if missing_features: raise LoomError('\n '.join( ['Csv file is missing features:'] + missing_features)) for row in reader: for value, builder in izip(row, builders): if builder is not None: value = value.strip() if value: builder.add_value(value) return [b for b in builders if b is not None]
def _import_rows_file(args): encoding_in, rows_csv_in, rows_out, id_offset, id_stride = args assert os.path.isfile(rows_csv_in) encoders = json_load(encoding_in) message = loom.cFormat.Row() add_field = { 'booleans': message.add_booleans, 'counts': message.add_counts, 'reals': message.add_reals, } with open_compressed(rows_csv_in, 'rb') as f: reader = csv.reader(f) feature_names = list(reader.next()) name_to_pos = {name: i for i, name in enumerate(feature_names)} schema = [] for encoder in encoders: pos = name_to_pos.get(encoder['name']) add = add_field[loom.schema.MODEL_TO_DATATYPE[encoder['model']]] encode = load_encoder(encoder) schema.append((pos, add, encode)) def rows(): for i, row in enumerate(reader): message.id = id_offset + id_stride * i for pos, add, encode in schema: value = None if pos is None else row[pos].strip() observed = bool(value) message.add_observed(observed) if observed: add(encode(value)) yield message message.Clear() loom.cFormat.row_stream_dump(rows(), rows_out)
def related( name=None, sample_count=loom.preql.SAMPLE_COUNT, debug=False, profile='time'): ''' Run related query. ''' loom.store.require(name, [ 'ingest.schema', 'ingest.encoding', 'samples.0.config', 'samples.0.model', 'samples.0.groups', ]) inputs, results = get_paths(name, 'related') loom.config.config_dump({}, inputs['query']['config']) root = inputs['root'] encoding = inputs['ingest']['encoding'] features = sorted(json_load(inputs['ingest']['schema']).keys()) print 'starting server' with loom.preql.get_server(root, encoding, debug, profile) as preql: print 'querying {} features'.format(len(features)) preql.relate(features, sample_count=sample_count)
def test_metis(): if os.path.exists(METIS_ARGS_TEMPFILE): print 'Loading metis args from %s' % METIS_ARGS_TEMPFILE args = json_load(METIS_ARGS_TEMPFILE) else: print 'Using simple metis args' args = { 'nparts': 2, 'adjacency': [[0, 2, 3], [1, 2], [0, 1, 2], [0, 3]], 'eweights': [1073741824, 429496736, 357913952, 1073741824, 536870912, 429496736, 536870912, 1073741824, 357913952, 1073741824], } assert len(args['eweights']) == sum(map(len, args['adjacency'])) print 'Running unweighted metis...' unweighted = dict(args) del unweighted['eweights'] edge_cut, partition = pymetis.part_graph(**unweighted) print 'Finished unweighted metis' print 'Running metis...' edge_cut, partition = pymetis.part_graph(**args) print 'Finished metis'
def test_predict(root, rows_csv, encoding, **unused): COUNT = 10 with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): with loom.query.get_server(root, debug=True) as query_server: result_out = 'predictions_out.csv' rows_in = os.listdir(rows_csv)[0] rows_in = os.path.join(rows_csv, rows_in) encoders = json_load(encoding) name_to_encoder = {e['name']: load_encoder(e) for e in encoders} preql = loom.preql.PreQL(query_server, encoding) preql.predict(rows_in, COUNT, result_out, id_offset=False) with open_compressed(rows_in, 'rb') as fin: with open(result_out, 'r') as fout: in_reader = csv.reader(fin) out_reader = csv.reader(fout) fnames = in_reader.next() out_reader.next() for in_row in in_reader: for i in range(COUNT): out_row = out_reader.next() bundle = zip(fnames, in_row, out_row) for name, in_val, out_val in bundle: encode = name_to_encoder[name] observed = bool(in_val.strip()) if observed: assert_almost_equal( encode(in_val), encode(out_val)) else: assert_true(bool(out_val.strip()))
def load(name, schema, rows_csv): ''' Load a csv dataset for testing and benchmarking. ''' assert os.path.exists(schema) assert schema.endswith('.json') assert os.path.exists(rows_csv) if os.path.isfile(rows_csv): assert rows_csv.endswith('.csv') or rows_csv.endswith('.csv.gz') else: assert os.path.isdir(rows_csv) paths = loom.store.get_paths(name) assert not os.path.exists(paths['root']), 'dataset already loaded' json_dump(json_load(schema), paths['ingest']['schema']) loom.format.make_schema_row( schema_in=paths['ingest']['schema'], schema_row_out=paths['ingest']['schema_row']) if os.path.isdir(rows_csv): os.symlink(rows_csv, paths['ingest']['rows_csv']) else: os.makedirs(paths['ingest']['rows_csv']) os.symlink( rows_csv, os.path.join( paths['ingest']['rows_csv'], os.path.basename(rows_csv)))
def _make_encoder_builders_file((schema_in, rows_in)): assert os.path.isfile(rows_in) schema = json_load(schema_in) with csv_reader(rows_in) as reader: header = reader.next() builders = [] seen = set() for name in header: if name in schema: if name in seen: raise LoomError('Repeated column {} in csv file {}'.format( name, rows_in)) seen.add(name) model = schema[name] Builder = ENCODER_BUILDERS[model] builder = Builder(name, model) else: builder = None builders.append(builder) if all(builder is None for builder in builders): raise LoomError('Csv file has no known features;' ', try adding a header to {}'.format(rows_in)) missing_features = sorted(set(schema) - seen) if missing_features: raise LoomError('\n '.join(['Csv file is missing features:'] + missing_features)) for row in reader: for value, builder in izip(row, builders): if builder is not None: value = value.strip() if value: builder.add_value(value) return [b for b in builders if b is not None]
def make_consensus(name, config=None, debug=False): ''' Combine samples into a single consensus sample. Arguments: name A unique identifier for consensus config An optional json config file currently doesn't do anything but will be used to support e.g. cluster coarseness in the future debug Whether to run debug versions of C++ code Environment varibles: LOOM_VERBOSITY Verbosity level ''' paths = loom.store.get_paths(name) LOG('making config') if config is None: config = {} elif isinstance(config, basestring): if not os.path.exists(config): raise LoomError('Missing config file: {}'.format(config)) config = json_load(config) else: config = copy.deepcopy(config) loom.config.config_dump(config, paths['samples'][0]['config']) LOG('finding consensus') loom.consensus.make_consensus(paths=paths, debug=debug)
def _check_predictions(rows_in, result_out, encoding): encoders = json_load(encoding) name_to_encoder = {e['name']: load_encoder(e) for e in encoders} with open_compressed(rows_in, 'rb') as fin: with open(result_out, 'r') as fout: in_reader = csv.reader(fin) out_reader = csv.reader(fout) fnames = in_reader.next() out_reader.next() for in_row in in_reader: for i in range(COUNT): out_row = out_reader.next() bundle = zip(fnames, in_row, out_row) for name, in_val, out_val in bundle: if name == '_id': assert_equal(in_val, out_val) continue encode = name_to_encoder[name] observed = bool(in_val.strip()) if observed: assert_almost_equal( encode(in_val), encode(out_val)) else: assert_true(bool(out_val.strip()))
def make_fake_encoding(schema_in, model_in, encoding_out): ''' Make a fake encoding from json schema + model. Assume that feature names in schema correspond to featureids in model e.g. schema was generated from loom.format.make_schema ''' schema = json_load(schema_in) fields = [] builders = [] name_to_builder = {} for name, model in sorted(schema.iteritems()): fields.append(loom.schema.MODEL_TO_DATATYPE[model]) Builder = FAKE_ENCODER_BUILDERS[model] builder = Builder(name, model) builders.append(builder) name_to_builder[name] = builder cross_cat = loom.schema_pb2.CrossCat() with open_compressed(model_in, 'rb') as f: cross_cat.ParseFromString(f.read()) for kind in cross_cat.kinds: featureid = iter(kind.featureids) for model in loom.schema.MODELS.iterkeys(): for shared in getattr(kind.product_model, model): feature_name = '{:06d}'.format(featureid.next()) assert feature_name in schema if model == 'dd': for i in range(len(shared.alphas)): name_to_builder[feature_name].add_value(str(i)) elif model == 'dpd': for val in shared.values: name_to_builder[feature_name].add_value(str(val)) encoders = [b.build() for b in builders] ensure_fake_encoders_are_sorted(encoders) json_dump(encoders, encoding_out)
def pretty_print(filename, message_type='guess'): ''' Print text/json/protobuf messages from a raw/gz/bz2 file. ''' parts = os.path.basename(filename).split('.') if parts[-1] in ['gz', 'bz2']: parts.pop() protocol = parts[-1] if protocol == 'json': data = json_load(filename) print json.dumps(data, sort_keys=True, indent=4) elif protocol == 'pb': message = get_message(filename, message_type) with open_compressed(filename) as f: message.ParseFromString(f.read()) print message elif protocol == 'pbs': message = get_message(filename, message_type) for string in protobuf_stream_load(filename): message.ParseFromString(string) print message elif protocol == 'pickle': data = pickle_load(filename) print repr(data) else: with open_compressed(filename) as f: for line in f: print line,
def test_group_runs(root, schema, encoding, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): with loom.preql.get_server(root, encoding, debug=True) as preql: test_columns = json_load(schema).keys()[:10] for column in test_columns: groupings_csv = 'group.{}.csv'.format(column) preql.group(column, result_out=groupings_csv) print open(groupings_csv).read()
def _test_modify_schema(modify, name, schema, rows_csv, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR) as store: with mock.patch('loom.store.STORE', new=store): modified_schema = os.path.join(store, 'schema.json') data = json_load(schema) data = modify(data) json_dump(data, modified_schema) loom.tasks.ingest(name, modified_schema, rows_csv, debug=True)
def generate_init(encoding_in, model_out, seed=0): ''' Generate an initial model for inference. ''' numpy.random.seed(seed) encoders = json_load(encoding_in) features = import_features(encoders) cross_cat = generate_model(features) with open_compressed(model_out, 'wb') as f: f.write(cross_cat.SerializeToString())
def test_relate_pandas(root, rows_csv, schema, **unused): feature_count = len(json_load(schema)) with loom.preql.get_server(root, debug=True) as preql: result_string = preql.relate(preql.feature_names) result_df = pandas.read_csv(StringIO(result_string), index_col=0) print 'result_df =' print result_df assert_equal(result_df.ndim, 2) assert_equal(result_df.shape[0], feature_count) assert_equal(result_df.shape[1], feature_count)
def infer_one(name, seed=0, config=None, debug=False): ''' Infer a single sample. Arguments: name A unique identifier for ingest + inference seed The seed, i.e., sample number typically 0-9 config An optional json config file, e.g., {"schedule": {"extra_passes": 500.0}} debug Whether to run debug versions of C++ code Environment variables: LOOM_VERBOSITY Verbosity level ''' paths = loom.store.get_paths(name, sample_count=(1 + seed)) sample = paths['samples'][seed] LOG('making config') if config is None: config = {} elif isinstance(config, basestring): if not os.path.exists(config): raise LoomError('Missing config file: {}'.format(config)) config = json_load(config) else: config = copy.deepcopy(config) if 'seed' not in config: config['seed'] = seed loom.config.config_dump(config, sample['config']) LOG('generating init') loom.generate.generate_init( encoding_in=paths['ingest']['encoding'], model_out=sample['init'], seed=seed) LOG('shuffling rows') loom.runner.shuffle( rows_in=paths['ingest']['diffs'], rows_out=sample['shuffled'], seed=seed, debug=debug) LOG('inferring, watch {}'.format(sample['infer_log'])) loom.runner.infer( config_in=sample['config'], rows_in=sample['shuffled'], tares_in=paths['ingest']['tares'], model_in=sample['init'], model_out=sample['model'], groups_out=sample['groups'], assign_out=sample['assign'], log_out=sample['infer_log'], debug=debug)
def _retrieve_featureid_to_cgpm(path): """Returns a dict mapping loom's 0-based featureid to cgpm.outputs.""" # Loom orders features alphabetically based on statistical types: # i.e. 'bb' < 'dd' < 'nich'. The ordering is stored in # `ingest/encoding.json.gz`. encoding_in = os.path.join(path, 'ingest', 'encoding.json.gz') features = json_load(encoding_in) def colname_to_output(cname): # Convert dummy column name from 'c00012' to the integer 12. return int(cname.replace('c', '')) return {i: colname_to_output(f['name']) for i, f in enumerate(features)}
def make_schema_row(schema_in, schema_row_out): ''' Convert json schema to protobuf schema row. ''' schema = json_load(schema_in) value = loom.schema_pb2.ProductValue() value.observed.sparsity = loom.schema_pb2.ProductValue.Observed.DENSE for model in schema.itervalues(): field = loom.schema.MODEL_TO_DATATYPE[model] value.observed.dense.append(True) getattr(value, field).append(EXAMPLE_VALUES[field]) with open_compressed(schema_row_out, 'wb') as f: f.write(value.SerializeToString())
def infer_one(name, seed=0, config=None, debug=False): ''' Infer a single sample. Arguments: name A unique identifier for ingest + inference seed The seed, i.e., sample number typically 0-9 config An optional json config file, e.g., {"schedule": {"extra_passes": 500.0}} debug Whether to run debug versions of C++ code Environment variables: LOOM_VERBOSITY Verbosity level ''' paths = loom.store.get_paths(name, sample_count=(1 + seed)) sample = paths['samples'][seed] LOG('making config') if config is None: config = {} elif isinstance(config, basestring): if not os.path.exists(config): raise LoomError('Missing config file: {}'.format(config)) config = json_load(config) else: config = copy.deepcopy(config) config['seed'] = seed loom.config.config_dump(config, sample['config']) LOG('generating init') loom.generate.generate_init( encoding_in=paths['ingest']['encoding'], model_out=sample['init'], seed=seed) LOG('shuffling rows') loom.runner.shuffle( rows_in=paths['ingest']['diffs'], rows_out=sample['shuffled'], seed=seed, debug=debug) LOG('inferring, watch {}'.format(sample['infer_log'])) loom.runner.infer( config_in=sample['config'], rows_in=sample['shuffled'], tares_in=paths['ingest']['tares'], model_in=sample['init'], model_out=sample['model'], groups_out=sample['groups'], assign_out=sample['assign'], log_out=sample['infer_log'], debug=debug)
def test_tiled_entropy(root, schema, **unused): feature_count = len(json_load(schema)) feature_sets = [frozenset([i]) for i in xrange(feature_count)] kwargs = { 'row_sets': feature_sets, 'col_sets': feature_sets, 'sample_count': 10 } with loom.query.get_server(root, debug=True) as server: expected = set(server.entropy(**kwargs)) for tile_size in xrange(1, 1 + feature_count): print 'tile_size = {}'.format(tile_size) actual = set(server.entropy(tile_size=tile_size, **kwargs)) assert_set_equal(expected, actual)
def make_schema_row(schema_in, schema_row_out): ''' Convert json schema to protobuf schema row. ''' schema = json_load(schema_in) if not schema: raise LoomError('Schema is empty: {}'.format(schema_in)) value = loom.schema_pb2.ProductValue() value.observed.sparsity = loom.schema_pb2.ProductValue.Observed.DENSE for model in schema.itervalues(): try: field = loom.schema.MODEL_TO_DATATYPE[model] except KeyError: raise LoomError('Unknown model {} in schema {}'.format( model, schema_in)) value.observed.dense.append(True) getattr(value, field).append(EXAMPLE_VALUES[field]) with open_compressed(schema_row_out, 'wb') as f: f.write(value.SerializeToString())
def test_predict_pandas(root, rows_csv, schema, **unused): feature_count = len(json_load(schema)) with loom.preql.get_server(root, debug=True) as preql: rows_filename = os.path.join(rows_csv, os.listdir(rows_csv)[0]) with open_compressed(rows_filename) as f: rows_df = pandas.read_csv(f, converters=preql.converters, index_col='_id') print 'rows_df =' print rows_df row_count = rows_df.shape[0] assert_equal(rows_df.shape[1], feature_count) rows_io = StringIO(rows_df.to_csv()) result_string = preql.predict(rows_io, COUNT, id_offset=True) result_df = pandas.read_csv(StringIO(result_string), index_col=False) print 'result_df =' print result_df assert_equal(result_df.ndim, 2) assert_equal(result_df.shape[0], row_count * COUNT) assert_equal(result_df.shape[1], 1 + feature_count)
def export_rows(encoding_in, rows_in, rows_csv_out, chunk_size=1000000): ''' Export rows from gzipped-protobuf-stream to directory-of-gzipped-csv-files. ''' rows_csv_out = os.path.abspath(rows_csv_out) if rows_csv_out == os.getcwd(): raise LoomError('Cannot export_rows to working directory') for ext in ['.csv', '.gz', '.bz2']: if rows_csv_out.endswith(ext): raise LoomError( 'Expected rows_csv_out to be a dirname, actual'.format( rows_csv_out)) if not (chunk_size > 0): raise LoomError('Invalid chunk_size {}, must be positive'.format( chunk_size)) encoders = json_load(encoding_in) fields = [loom.schema.MODEL_TO_DATATYPE[e['model']] for e in encoders] decoders = [load_decoder(e) for e in encoders] header = ['_id'] + [e['name'] for e in encoders] if os.path.exists(rows_csv_out): shutil.rmtree(rows_csv_out) os.makedirs(rows_csv_out) row_count = sum(1 for _ in protobuf_stream_load(rows_in)) rows = loom.cFormat.row_stream_load(rows_in) chunk_count = (row_count + chunk_size - 1) / chunk_size chunks = sorted( os.path.join(rows_csv_out, 'rows.{}.csv.gz'.format(i)) for i in xrange(chunk_count) ) with ExitStack() as stack: with_ = stack.enter_context writers = [with_(csv_writer(f)) for f in chunks] for writer in writers: writer.writerow(header) for row, writer in izip(rows, cycle(writers)): data = row.iter_data() schema = izip(data['observed'], fields, decoders) csv_row = [row.id] for observed, field, decode in schema: csv_row.append(decode(data[field].next()) if observed else '') writer.writerow(csv_row)
def make_fake_encoding(schema_in, rows_in, encoding_out): ''' Make a fake encoding from json schema + protobuf rows. ''' schema = json_load(schema_in) fields = [] builders = [] for name, model in sorted(schema.iteritems()): fields.append(loom.schema.MODEL_TO_DATATYPE[model]) Builder = FAKE_ENCODER_BUILDERS[model] builder = Builder(name, model) builders.append(builder) for row in loom.cFormat.row_stream_load(rows_in): data = row.iter_data() observeds = data['observed'] for observed, field, builder in izip(observeds, fields, builders): if observed: builder.add_value(str(data[field].next())) encoders = [builder.build() for builder in builders] ensure_fake_encoders_are_sorted(encoders) json_dump(encoders, encoding_out)
def test_predict_pandas(root, rows_csv, schema, **unused): feature_count = len(json_load(schema)) with loom.preql.get_server(root, debug=True) as preql: rows_filename = os.path.join(rows_csv, os.listdir(rows_csv)[0]) with open_compressed(rows_filename) as f: rows_df = pandas.read_csv( f, converters=preql.converters, index_col='_id') print 'rows_df =' print rows_df row_count = rows_df.shape[0] assert_equal(rows_df.shape[1], feature_count) rows_io = StringIO(rows_df.to_csv()) result_string = preql.predict(rows_io, COUNT, id_offset=True) result_df = pandas.read_csv(StringIO(result_string), index_col=False) print 'result_df =' print result_df assert_equal(result_df.ndim, 2) assert_equal(result_df.shape[0], row_count * COUNT) assert_equal(result_df.shape[1], 1 + feature_count)
def _make_encoder_builders_file((schema_in, rows_in)): assert os.path.isfile(rows_in) schema = json_load(schema_in) with open_compressed(rows_in, 'rb') as f: reader = csv.reader(f) header = reader.next() builders = [] for name in header: if name in schema: model = schema[name] Builder = ENCODER_BUILDERS[model] builder = Builder(name, model) else: builder = None builders.append(builder) for row in reader: for value, builder in izip(row, builders): if builder is not None: value = value.strip() if value: builder.add_value(value) return [b for b in builders if b is not None]
def export_rows(encoding_in, rows_in, rows_csv_out, chunk_size=1000000): ''' Export rows from protobuf stream to csv. ''' for ext in ['.csv', '.gz', '.bz2']: assert not rows_csv_out.endswith(ext),\ 'rows_csv_out should be a dirname' assert chunk_size > 0 encoders = json_load(encoding_in) fields = [loom.schema.MODEL_TO_DATATYPE[e['model']] for e in encoders] decoders = [load_decoder(e) for e in encoders] header = [e['name'] for e in encoders] if os.path.exists(rows_csv_out): shutil.rmtree(rows_csv_out) os.makedirs(rows_csv_out) rows = loom.cFormat.row_stream_load(rows_in) try: empty = None for i in xrange(MAX_CHUNK_COUNT): file_out = os.path.join( rows_csv_out, 'rows_{:06d}.csv.gz'.format(i)) with open_compressed(file_out, 'wb') as f: writer = csv.writer(f) writer.writerow(header) empty = file_out for j in xrange(chunk_size): data = rows.next().iter_data() schema = izip(data['observed'], fields, decoders) row = [ decode(data[field].next()) if observed else '' for observed, field, decode in schema ] writer.writerow(row) empty = None except StopIteration: if empty: os.remove(empty)
def transform_rows(schema_in, transforms_in, rows_in, rows_out, id_field=None): transforms = pickle_load(transforms_in) if not transforms: cp_ns(rows_in, rows_out) else: transform = TransformSequence(transforms) transformed_header = sorted(json_load(schema_in).iterkeys()) if id_field is not None: assert id_field not in transformed_header transformed_header = [id_field] + transformed_header tasks = [] if os.path.isdir(rows_in): loom.util.mkdir_p(rows_out) for f in os.listdir(rows_in): tasks.append(( transform, transformed_header, os.path.join(rows_in, f), os.path.join(rows_out, f), )) else: tasks.append((transform, transformed_header, rows_in, rows_out)) parallel_map(_transform_rows, tasks)
def _import_rows_file(args): rows_csv_in, rows_out, id_offset, id_stride, encoding_in = args assert os.path.isfile(rows_csv_in) encoders = json_load(encoding_in) message = loom.cFormat.Row() add_field = { 'booleans': message.add_booleans, 'counts': message.add_counts, 'reals': message.add_reals, } with csv_reader(rows_csv_in) as reader: feature_names = list(reader.next()) header_length = len(feature_names) name_to_pos = {name: i for i, name in enumerate(feature_names)} schema = [] for encoder in encoders: pos = name_to_pos.get(encoder['name']) add = add_field[loom.schema.MODEL_TO_DATATYPE[encoder['model']]] encode = load_encoder(encoder) schema.append((pos, add, encode)) def rows(): for i, row in enumerate(reader): if len(row) != header_length: raise LoomError('row {} has wrong length {}:\n{}'.format( i, len(row), row)) message.id = id_offset + id_stride * i for pos, add, encode in schema: value = None if pos is None else row[pos].strip() observed = bool(value) message.add_observed(observed) if observed: add(encode(value)) yield message message.Clear() loom.cFormat.row_stream_dump(rows(), rows_out)
def __init__(self, query_server, encoding=None, debug=False): self._paths = loom.store.get_paths(query_server.root) if encoding is None: encoding = self._paths['ingest']['encoding'] self._query_server = query_server self._encoders = json_load(encoding) transforms = self._paths['ingest']['transforms'] self._transform = loom.transforms.load_transforms(transforms) self._feature_names = [e['name'] for e in self._encoders] self._feature_set = frozenset(self._feature_names) self._name_to_pos = { name: i for i, name in enumerate(self._feature_names) } self._name_to_decode = { e['name']: load_decoder(e) for e in self._encoders } self._name_to_encode = { e['name']: load_encoder(e) for e in self._encoders } self._rowid_map = None self._debug = debug
def __init__(self, query_server, encoding, debug=False): self.query_server = query_server self.encoders = json_load(encoding) self.feature_names = [e['name'] for e in self.encoders] self.debug = debug