Exemplo n.º 1
0
Arquivo: split.py Projeto: zruan/topaz
def main(args):

    fmt = args._from

    # detect the input file formats
    path = args.file
    if fmt == 'auto':
        try:
            fmt = file_utils.detect_format(path)
        except file_utils.UnknownFormatError as e:
            print('Error: unrecognized input coordinates file extension (' +
                  e.ext + ')',
                  file=sys.stderr)
            sys.exit(1)
    _, ext = os.path.splitext(path)

    suffix = args.suffix

    t = args.threshold
    base = args.output
    if fmt == 'star':
        with open(path, 'r') as f:
            table = star.parse(f)
        # apply score threshold
        if star.SCORE_COLUMN_NAME in table.columns:
            table = table.loc[table[star.SCORE_COLUMN_NAME] >= t]

        # write per micrograph files
        for image_name, group in table.groupby('MicrographName'):
            image_name, _ = os.path.splitext(image_name)
            path = base + '/' + image_name + suffix + ext
            with open(path, 'w') as f:
                star.write(group, f)
    else:  # format is coordinate table
        table = pd.read_csv(path, sep='\t')
        if 'score' in table.columns:
            table = table.loc[table['score'] >= t]
        # write per micrograph files
        for image_name, group in table.groupby('image_name'):
            path = base + '/' + image_name + suffix + ext
            group.to_csv(path, sep='\t', index=False)
Exemplo n.º 2
0
def read_coordinates(path, format='auto'):
    if format == 'auto':
        format = detect_format(path)

    if format == 'star':
        with open(path, 'r') as f:
            table = star.parse(f)

        # standardize the image name, x, y, and score column names
        d = {
            star.SCORE_COLUMN_NAME: 'score',
            'MicrographName': 'image_name',
            star.X_COLUMN_NAME: 'x_coord',
            star.Y_COLUMN_NAME: 'y_coord',
            star.VOLTAGE: 'voltage',
            star.DETECTOR_PIXEL_SIZE: 'detector_pixel_size',
            star.MAGNIFICATION: 'magnification',
            star.AMPLITUDE_CONTRAST: 'amplitude_contrast',
        }

        for k, v in d.items():
            if k in table.columns:
                table[v] = table[k]
                table = table.drop(k, axis=1)
        # strip off image extension, but save this for later
        table['image_name'] = table['image_name'].apply(strip_ext)
        particles = table

    elif format == 'box':
        box = read_box(path)
        image_name = os.path.basename(os.path.splitext(path)[0])
        particles = boxes_to_coordinates(box, image_name=image_name)
    elif format == 'csv':
        # this is VIA CSV format
        particles = read_via_csv(path)
    else:  # default to coordiantes table format
        particles = pd.read_csv(path, sep='\t')

    return particles
Exemplo n.º 3
0
def main(args):
    with open(args.file, 'r') as f:
        table = star.parse(f)

    if 'ParticleScore' in table.columns:
        ## columns of interest are 'MicrographName', 'CoordinateX', 'CoordinateY', and 'ParticleScore'
        table = table[['MicrographName', 'CoordinateX', 'CoordinateY', 'ParticleScore']]
        table.columns = ['image_name', 'x_coord', 'y_coord', 'score']
    else:
        ## columns of interest are 'MicrographName', 'CoordinateX', and 'CoordinateY'
        table = table[['MicrographName', 'CoordinateX', 'CoordinateY']]
        table.columns = ['image_name', 'x_coord', 'y_coord']
    ## convert the coordinates to integers
    table['x_coord'] = table['x_coord'].astype(float).astype(int)
    table['y_coord'] = table['y_coord'].astype(float).astype(int)
    ## strip file extensions off the image names if present
    table['image_name'] = table['image_name'].apply(strip_ext) 

    out = sys.stdout
    if args.output is not None:
        out = args.output
    table.to_csv(out, sep='\t', index=False)
Exemplo n.º 4
0
def main(args):

    verbose = args.verbose

    form = args._from
    from_forms = [form for _ in range(len(args.files))]

    # detect the input file formats
    if form == 'auto':
        try:
            from_forms = [
                file_utils.detect_format(path) for path in args.files
            ]
        except file_utils.UnknownFormatError as e:
            print('Error: unrecognized input coordinates file extension (' +
                  e.ext + ')',
                  file=sys.stderr)
            sys.exit(1)
    formats_detected = list(set(from_forms))
    if verbose > 0:
        print('# INPUT formats detected: ' + str(formats_detected),
              file=sys.stderr)

    # determine the output file format
    output_path = args.output
    output = None
    to_form = args.to
    if output_path is None:
        output = sys.stdout
        # if output is to stdout and form is not set
        # then raise an error
        if to_form == 'auto':
            if len(formats_detected) == 1:
                # write the same output format as input format
                to_form = from_forms[0]
            else:
                print(
                    'Error: writing file to stdout and multiple input formats present with no output format (--to) set! Please tell me what format to write!'
                )
                sys.exit(1)
        if to_form == 'box' or to_form == 'json':
            print(
                'Error: writing BOX or JSON output files requires a destination directory. Please set the --output parameter!'
            )
            sys.exit(1)

    image_ext = args.image_ext
    boxsize = args.boxsize
    if to_form == 'auto':
        # first check for directory
        if output_path[-1] == '/':
            # image-ext must be set for these file formats
            if image_ext is None:
                print(
                    'Error: writing BOX or JSON output files requires setting the image file extension!'
                )
                sys.exit(1)
            # format is either json or box, check for boxsize to decide
            if boxsize > 0:
                # write boxes!
                if verbose > 0:
                    print(
                        '# Detected output format is BOX, because OUTPUT is a directory and boxsize > 0.',
                        file=sys.stderr)
                to_form = 'box'
            else:
                if verbose > 0:
                    print(
                        '# Detected output format is JSON, because OUTPUT is a directory and no boxsize set.',
                        file=sys.stderr)
                to_form = 'json'
        else:
            try:
                to_form = file_utils.detect_format(output_path)
            except file_utils.UnkownFormatError as e:
                print(
                    'Error: unrecognized output coordinates file extension (' +
                    e.ext + ')',
                    file=sys.stderr)
                sys.exit(1)
    if verbose > 0:
        print('# OUTPUT format: ' + to_form)

    suffix = args.suffix

    t = args.threshold
    down_scale = args.down_scale
    up_scale = args.up_scale
    scale = up_scale / down_scale

    # special case when inputs and outputs are all star files
    if len(formats_detected
           ) == 1 and formats_detected[0] == 'star' and to_form == 'star':
        dfs = []
        for path in args.files:
            with open(path, 'r') as f:
                table = star.parse(f)
            dfs.append(table)
        table = pd.concat(dfs, axis=0)
        # convert  score column to float and apply threshold
        if star.SCORE_COLUMN_NAME in table.columns:
            table = table.loc[table[star.SCORE_COLUMN_NAME] >= t]
        # scale coordinates
        if scale != 1:
            x_coord = table[star.X_COLUMN_NAME].values
            x_coord = np.round(scale * x_coord).astype(int)
            table[star.X_COLUMN_NAME] = x_coord
            y_coord = table[star.Y_COLUMN_NAME].values
            y_coord = np.round(scale * y_coord).astype(int)
            table[star.Y_COLUMN_NAME] = y_coord
        # add metadata if specified
        if args.voltage > 0:
            table[star.VOLTAGE] = args.voltage
        if args.detector_pixel_size > 0:
            table[star.DETECTOR_PIXEL_SIZE] = args.detector_pixel_size
        if args.magnification > 0:
            table[star.MAGNIFICATION] = args.magnification
        if args.amplitude_contrast > 0:
            table[star.AMPLITUDE_CONTRAST] = args.amplitude_contrast
        # write output file
        if output is None:
            with open(output_path, 'w') as f:
                star.write(table, f)
        else:
            star.write(table, output)

    else:  # general case

        # read the input files
        dfs = []
        for i in range(len(args.files)):
            path = args.files[i]
            coords = file_utils.read_coordinates(path, format=from_forms[i])
            dfs.append(coords)
        coords = pd.concat(dfs, axis=0)

        # threshold particles by score (if there is a score)
        if 'score' in coords.columns:
            coords = coords.loc[coords['score'] >= t]

        # scale coordinates
        if scale != 1:
            x_coord = coords['x_coord'].values
            x_coord = np.round(scale * x_coord).astype(int)
            coords['x_coord'] = x_coord
            y_coord = coords['y_coord'].values
            y_coord = np.round(scale * y_coord).astype(int)
            coords['y_coord'] = y_coord

        # add metadata if specified
        if args.voltage > 0:
            coords['voltage'] = args.voltage
        if args.detector_pixel_size > 0:
            coords['detector_pixel_size'] = args.detector_pixel_size
        if args.magnification > 0:
            coords['magnification'] = args.magnification
        if args.amplitude_contrast > 0:
            coords['amplitude_contrast'] = args.amplitude_contrast

        # invert y-axis coordinates if specified
        invert_y = args.invert_y
        if invert_y:
            if args.imagedir is None:
                print(
                    'Error: --imagedir must specify the directory of images in order to mirror the y-axis coordinates',
                    file=sys.stderr)
                sys.exit(1)
            dfs = []
            for image_name, group in coords.groupby('image_name'):
                impath = os.path.join(args.imagedir,
                                      image_name) + '.' + args.image_ext
                # use glob incase image_ext is '*'
                impath = glob.glob(impath)[0]
                im = load_image(impath)
                height = im.height

                group = mirror_y_axis(group, height)
                dfs.append(group)
            coords = pd.concat(dfs, axis=0)

        # output file format is decided and coordinates are processed, now write files
        if output is None and to_form != 'box' and to_form != 'json':
            output = open(output_path, 'w')
        if to_form == 'box' or to_form == 'json':
            output = output_path

        file_utils.write_coordinates(output,
                                     coords,
                                     format=to_form,
                                     boxsize=boxsize,
                                     image_ext=image_ext,
                                     suffix=suffix)