def test_read_csv_header_noheader(): tmpfile = tempfile.NamedTemporaryFile( mode='w', suffix='.csv', prefix='test_read_csv_header_noheader') tmpfile.write(textwrap.dedent("""\ 0,1,2 3,4,5 """)) tmpfile.flush() assert read_csv_header(tmpfile.name) is None
def test_read_csv_header_singlecol(): tmpfile = tempfile.NamedTemporaryFile( mode='w', suffix='.csv', prefix='test_read_csv_header_singlecol') tmpfile.write(textwrap.dedent("""\ a 0 3 """)) tmpfile.flush() assert read_csv_header(tmpfile.name) == ['a']
def _sanitize_config(self): """ - Normalize/overwrite certain config values - Check for config mistakes - Simple sanity checks """ # Convert input/output CSV to absolute paths options = self.config["samplepoints"] header = read_csv_header(options["input-table"]) if header is None: raise RuntimeError(f"Input table does not have a header row: {options['input-table']}") if set('zyx') - set(header): raise RuntimeError(f"Input table does not have the expected column names: {options['input-table']}")
def _sanitize_config(self): """ - Normalize/overwrite certain config values - Check for config mistakes - Simple sanity checks """ # Convert input/output CSV to absolute paths options = self.config_data["options"] options["input-table"] = self.relpath_to_abspath(options["input-table"]) options["output-table"] = self.relpath_to_abspath(options["output-table"]) header = read_csv_header(options["input-table"]) if header is None: raise RuntimeError(f"Input table does not have a header row: {options['input-table']}") if set('zyx') - set(header): raise RuntimeError(f"Input table does not have the expected column names: {options['input-table']}")
def load_body_list(config_data, is_supervoxels): if isinstance(config_data, list): return np.array(config_data, dtype=np.uint64) bodies_csv = config_data del config_data assert os.path.exists(bodies_csv), \ f"CSV file does not exist: {bodies_csv}" if is_supervoxels: col = 'sv' else: col = 'body' if col in read_csv_header(bodies_csv): bodies = pd.read_csv(bodies_csv)[col].drop_duplicates() else: # Just read the first column, no matter what it's named logger.warning( f"No column named {col}, so reading first column instead") bodies = read_csv_col(bodies_csv, 0, np.uint64).drop_duplicates() return bodies.values.astype(np.uint64)
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '--use-mapping', action='store_true', help= 'Use in-memory map + /exists instead of /missing, as described in the general help text above.' ) parser.add_argument( '--output', '-o', default='missing-from-tsv.csv', help='Where to write the output CSV (default: missing-from-tsv.csv)') parser.add_argument( '--kafka-timestamp', '-k', type=str, help='Alternative to providing your own bodies list.\n' 'Use the kafka log automatically determine the list of bodies that have changed after the given timestamp.\n' 'Examples: -k="2018-11-22" -k="2018-11-22 17:34:00"') parser.add_argument('server', help='dvid server, e.g. emdata3:8900') parser.add_argument( 'uuid', help= 'dvid node to analyze or "master" for the latest master branch uuid') parser.add_argument( 'tsv_instance', help="Name of a tarsupervoxels instance, e.g. segmentation_sv_meshes.\n" "Must be sync'd to a labelmap (segmentation) instance.") parser.add_argument( 'bodies_csv', nargs='?', help='CSV containing a column named "body", which will be read.\n' 'If no "body" column exists, the first column is used, regardless of the name.\n' '(Omit this arg if you are using --kafka-timestamp)') args = parser.parse_args() if not (bool(args.kafka_timestamp) ^ bool(args.bodies_csv)): print( "You must provide either --kafka-timestamp or a bodies list (not both)", file=sys.stderr) sys.exit(1) if args.uuid == "master": args.uuid = find_master(args.server) # Determine segmentation instance info = fetch_instance_info(args.server, args.uuid, args.tsv_instance) seg_instance = info["Base"]["Syncs"][0] kafka_msgs = None if args.bodies_csv: if 'body' in read_csv_header(args.bodies_csv): bodies = pd.read_csv(args.bodies_csv)['body'].drop_duplicates() else: # Just read the first column, no matter what it's named bodies = read_csv_col(args.bodies_csv, 0, np.uint64).drop_duplicates() elif args.kafka_timestamp: # Validate timestamp format before fetching kafka log, which takes a while. parse_timestamp(args.kafka_timestamp) kafka_msgs = read_kafka_messages(args.server, args.uuid, seg_instance) filtered_kafka_msgs = filter_kafka_msgs_by_timerange( kafka_msgs, min_timestamp=args.kafka_timestamp) new_bodies, changed_bodies, _removed_bodies, new_supervoxels, _deleted_svs = compute_affected_bodies( filtered_kafka_msgs) sv_split_bodies = set( fetch_mapping(args.server, args.uuid, seg_instance, new_supervoxels)) - set([0]) bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies)) bodies = np.fromiter(bodies, np.uint64) bodies.sort() else: raise AssertionError("Shouldn't get here.") if args.use_mapping: missing_entries = check_tarsupervoxels_status_via_exists( args.server, args.uuid, args.tsv_instance, bodies, seg_instance, kafka_msgs=kafka_msgs) else: missing_entries = check_tarsupervoxels_status_via_missing( args.server, args.uuid, args.tsv_instance, bodies) logger.info(f"Writing to {args.output}") missing_entries.to_csv(args.output, index=True, header=True) logging.info("DONE")
def main(): configure_default_logging() parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--fraction', type=float, help='Fraction of vertices to retain in the decimated mesh. Between 0.0 and 1.0') parser.add_argument('--max-vertices', type=float, default=1e9, help='If necessary, decimate the mesh even further so that it has no more than this vertex count (approximately).') parser.add_argument('--format', help='Either obj or drc', required=True) parser.add_argument('--rescale', type=float, help='Multiply all vertex coordinates by this factor before storing the mesh. Important for writing to ngmesh format.') parser.add_argument('--output-directory', '-d', help='Directory to dump decimated meshes.') parser.add_argument('--output-url', '-u', help='DVID keyvalue instance to write decimated mesh files to, ' 'specified as a complete URL, e.g. http://emdata1:8000/api/node/123abc/my-meshes') parser.add_argument('server', help='dvid server, e.g. emdata3:8900') parser.add_argument('uuid', help='dvid node') parser.add_argument('tsv_instance', help='name of a tarsupervoxels instance, e.g. segmentation_sv_meshes') parser.add_argument('bodies', nargs='+', help='A list of body IDs OR a path to a CSV containing a column named "body", which will be read.\n' 'If no "body" column exists, the first column is used, regardless of the name.') args = parser.parse_args() if args.fraction is None: raise RuntimeError("Please specify a decimation fraction.") if args.format is None: raise RuntimeError("Please specify an output format (either 'drc' or 'obj' via --format") if args.output_directory: os.makedirs(args.output_directory, exist_ok=True) if args.format == "ngmesh" and args.rescale is None: raise RuntimeError("When writing to ngmesh, please specify an explict rescale factor.") args.rescale = args.rescale or 1.0 output_dvid = None if args.output_url: if '/api/node' not in args.output_url: raise RuntimeError("Please specify the output instance as a complete URL, " "e.g. http://emdata1:8000/api/node/123abc/my-meshes") # drop 'http://' (if present) url = args.output_url.split('://')[-1] parts = url.split('/') assert parts[1] == 'api' assert parts[2] == 'node' output_server = parts[0] output_uuid = parts[3] output_instance = parts[4] output_dvid = (output_server, output_uuid, output_instance) all_bodies = [] for body in args.bodies: if body.endswith('.csv'): if 'body' in read_csv_header(body): bodies = pd.read_csv(body)['body'].drop_duplicates() else: # Just read the first column, no matter what it's named bodies = read_csv_col(body, 0, np.uint64).drop_duplicates() else: try: body = int(body) except ValueError: raise RuntimeError(f"Invalid body ID: '{body}'") all_bodies.extend(bodies) for body_id in tqdm_proxy(all_bodies): output_path = None if args.output_directory: output_path = f'{args.output_directory}/{body_id}.{args.format}' decimate_existing_mesh(args.server, args.uuid, args.tsv_instance, body_id, args.fraction, args.max_vertices, args.rescale, args.format, output_path, output_dvid)