def _make_encoder_builders_file((schema_in, rows_in)): assert os.path.isfile(rows_in) schema = json_load(schema_in) with csv_reader(rows_in) as reader: header = reader.next() builders = [] seen = set() for name in header: if name in schema: if name in seen: raise LoomError('Repeated column {} in csv file {}'.format( name, rows_in)) seen.add(name) model = schema[name] Builder = ENCODER_BUILDERS[model] builder = Builder(name, model) else: builder = None builders.append(builder) if all(builder is None for builder in builders): raise LoomError( 'Csv file has no known features;' ', try adding a header to {}'.format(rows_in)) missing_features = sorted(set(schema) - seen) if missing_features: raise LoomError('\n '.join( ['Csv file is missing features:'] + missing_features)) for row in reader: for value, builder in izip(row, builders): if builder is not None: value = value.strip() if value: builder.add_value(value) return [b for b in builders if b is not None]
def _make_encoder_builders_file((schema_in, rows_in)): assert os.path.isfile(rows_in) schema = json_load(schema_in) with csv_reader(rows_in) as reader: header = reader.next() builders = [] seen = set() for name in header: if name in schema: if name in seen: raise LoomError('Repeated column {} in csv file {}'.format( name, rows_in)) seen.add(name) model = schema[name] Builder = ENCODER_BUILDERS[model] builder = Builder(name, model) else: builder = None builders.append(builder) if all(builder is None for builder in builders): raise LoomError('Csv file has no known features;' ', try adding a header to {}'.format(rows_in)) missing_features = sorted(set(schema) - seen) if missing_features: raise LoomError('\n '.join(['Csv file is missing features:'] + missing_features)) for row in reader: for value, builder in izip(row, builders): if builder is not None: value = value.strip() if value: builder.add_value(value) return [b for b in builders if b is not None]
def synthesize_clusters(name, sample_count, cluster_count, pixel_count): with csv_reader(SAMPLES) as reader: reader.next() samples = map(tuple, reader) pts = random.sample(samples, sample_count) samples = random.sample(samples, pixel_count) root = loom.store.get_paths(name)['root'] with loom.preql.get_server(root) as server: sample_labels = server.cluster( rows_to_cluster=samples, seed_rows=pts, cluster_count=cluster_count) labels = set(zip(*sample_labels)[0]) label_count = max(labels) + 1 shape = IMAGE.shape image = IMAGE.reshape(shape[0], shape[1], 1).repeat(3, 2) colors = pyplot.cm.Set1(numpy.linspace(0, 1, label_count)) colors = (255 * colors[:, :3]).astype(numpy.uint8) for label, sample in sample_labels: x, y = to_image_coordinates(float(sample[0]), float(sample[1])) image[x, y] = colors[label] return image
def load_rows_csv(dirname): filenames = os.listdir(dirname) rows = [None] for filename in filenames: filename = os.path.join(dirname, filename) with csv_reader(filename) as reader: header = reader.next() rows += reader rows[0] = header return rows
def _import_rowids_file(args): rows_csv_in, rowids_out, id_offset, id_stride, id_field = args assert os.path.isfile(rows_csv_in) with csv_reader(rows_csv_in) as reader: header = reader.next() if id_field is None: basename = os.path.basename(rows_csv_in) get_rowid = lambda i, row: '{}:{}'.format(basename, i) else: pos = header.index(id_field) get_rowid = lambda i, row: row[pos] with csv_writer(rowids_out) as writer: for i, row in enumerate(reader): writer.writerow((id_offset + id_stride * i, get_rowid(i, row)))
def cluster(cluster_count=5, sample_count=1000, pixel_count=None): ''' Draw a fox map ''' cluster_count = int(cluster_count) sample_count = int(sample_count) if pixel_count is None: with csv_reader(SAMPLES) as reader: pixel_count = len(list(reader)) - 1 else: pixel_count = int(pixel_count) assert loom.store.get_paths(NAME)['samples'], 'first compress image' image = synthesize_clusters(NAME, sample_count, cluster_count, pixel_count) scipy.misc.imsave(os.path.join(RESULTS, 'cluster.png'), image)
def create_dataset(row_count=ROW_COUNT): ''' Extract dataset from image. ''' scipy.misc.imsave(os.path.join(RESULTS, 'original.png'), IMAGE) print 'sampling {} points from image'.format(row_count) with open_compressed(SAMPLES, 'w') as f: writer = csv.writer(f) writer.writerow(['x', 'y']) for row in sample_from_image(IMAGE, row_count): writer.writerow(row) with csv_reader(SAMPLES) as reader: reader.next() image = visualize_dataset(map(float, row) for row in reader) scipy.misc.imsave(os.path.join(RESULTS, 'samples.png'), image)
def synthesize_search(name, image_pos): shape = IMAGE.shape image = IMAGE.reshape(shape[0], shape[1], 1).repeat(3, 2) image[image_pos] = [0, 255, 0] with csv_reader(SAMPLES) as reader: rows = list(reader)[1:] rows = [map(float, r) for r in rows] root = loom.store.get_paths(name)['root'] with loom.preql.get_server(root) as server: x, y = to_loom_coordinates(*image_pos) search = server.search((str(x), str(y))) search = csv.reader(StringIO(search)) search.next() for row_id, score in search: score = numpy.exp(float(score)) if score < 1.: return image row_id = int(row_id.split(':')[1]) sample_x, sample_y = rows[row_id] x, y = to_image_coordinates(sample_x, sample_y) image[x, y] = [255 * (1 - 1 / score), 0, 0] return image
def synthesize_search(name, image_pos): shape = IMAGE.shape image = IMAGE.reshape(shape[0], shape[1], 1).repeat(3, 2) image[image_pos] = [0, 255, 0] with csv_reader(SAMPLES) as reader: rows = list(reader)[1:] rows = [map(float, r) for r in rows] root = loom.store.get_paths(name)['root'] with loom.preql.get_server(root) as server: x, y = to_loom_coordinates(*image_pos) search = server.search((str(x), str(y))) search = csv.reader(StringIO(search)) search.next() for row_id, score in search: score = numpy.exp(float(score)) if score < 1.: return image row_id = int(row_id.split(':')[1]) sample_x, sample_y = rows[row_id] x, y = to_image_coordinates(sample_x, sample_y) image[x, y] = [255 * (1 - 1/score), 0, 0] return image
def _import_rows_file(args): rows_csv_in, rows_out, id_offset, id_stride, encoding_in = args assert os.path.isfile(rows_csv_in) encoders = json_load(encoding_in) message = loom.cFormat.Row() add_field = { 'booleans': message.add_booleans, 'counts': message.add_counts, 'reals': message.add_reals, } with csv_reader(rows_csv_in) as reader: feature_names = list(reader.next()) header_length = len(feature_names) name_to_pos = {name: i for i, name in enumerate(feature_names)} schema = [] for encoder in encoders: pos = name_to_pos.get(encoder['name']) add = add_field[loom.schema.MODEL_TO_DATATYPE[encoder['model']]] encode = load_encoder(encoder) schema.append((pos, add, encode)) def rows(): for i, row in enumerate(reader): if len(row) != header_length: raise LoomError('row {} has wrong length {}:\n{}'.format( i, len(row), row)) message.id = id_offset + id_stride * i for pos, add, encode in schema: value = None if pos is None else row[pos].strip() observed = bool(value) message.add_observed(observed) if observed: add(encode(value)) yield message message.Clear() loom.cFormat.row_stream_dump(rows(), rows_out)