示例#1
0
文件: stack.py 项目: turalaksel/pyem
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))
    # apix = args.apix = hdr["xlen"] / hdr["nx"]

    for fn in args.input:
        if not (fn.endswith(".star") or fn.endswith(".mrcs")
                or fn.endswith(".mrc") or fn.endswith(".par")):
            log.error("Only .star, .mrc, .mrcs, and .par files supported")
            return 1

    first_ptcl = 0
    dfs = []
    with mrc.ZSliceWriter(args.output) as writer:
        for fn in args.input:
            if fn.endswith(".star"):
                df = star.parse_star(fn, keep_index=False)
                star.augment_star_ucsf(df)
                star.set_original_fields(df, inplace=True)
                df = df.sort_values([
                    star.UCSF.IMAGE_ORIGINAL_PATH,
                    star.UCSF.IMAGE_ORIGINAL_INDEX
                ])
                gb = df.groupby(star.UCSF.IMAGE_ORIGINAL_PATH)
                for name, g in gb:
                    with mrc.ZSliceReader(name) as reader:
                        for i in g[star.UCSF.IMAGE_ORIGINAL_INDEX].values:
                            writer.write(reader.read(i))
            elif fn.endswith(".par"):
                if args.stack_path is None:
                    log.error(".par file input requires --stack-path")
                    return 1
                df = metadata.par2star(metadata.parse_fx_par(fn),
                                       data_path=args.stack_path)
                # star.set_original_fields(df, inplace=True)  # Redundant.
                star.augment_star_ucsf(df)
            elif fn.endswith(".csv"):
                return 1
            elif fn.endswith(".cs"):
                return 1
            else:
                if fn.endswith(".mrcs"):
                    with mrc.ZSliceReader(fn) as reader:
                        for img in reader:
                            writer.write(img)
                        df = pd.DataFrame({
                            star.UCSF.IMAGE_ORIGINAL_INDEX:
                            np.arange(reader.nz)
                        })
                    df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn
                else:
                    print("Unrecognized input file type")
                    return 1
            if args.star is not None:
                df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl,
                                                      first_ptcl + df.shape[0])
                df[star.UCSF.IMAGE_PATH] = writer.path
                df["index"] = df[star.UCSF.IMAGE_INDEX]
                star.simplify_star_ucsf(df)
                dfs.append(df)
            first_ptcl += df.shape[0]

            if args.star is not None:
                df = pd.concat(dfs, join="inner")
                # df = pd.concat(dfs)
                # df = df.dropna(df, axis=1, how="any")
                star.write_star(args.star, df, reindex=True)

    return 0
示例#2
0
def main(args):
    if args.info:
        args.input.append(args.output)

    df = pd.concat(
        (star.parse_star(inp, augment=args.augment) for inp in args.input),
        join="inner")

    dfaux = None

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.info:
        if star.is_particle_star(df) and star.Relion.CLASS in df.columns:
            c = df[star.Relion.CLASS].value_counts()
            print("%s particles in %d classes" %
                  ("{:,}".format(df.shape[0]), len(c)))
            print("    ".join([
                '%d: %s (%.2f %%)' % (i, "{:,}".format(s), 100. * s / c.sum())
                for i, s in iteritems(c.sort_index())
            ]))
        elif star.is_particle_star(df):
            print("%s particles" % "{:,}".format(df.shape[0]))
        if star.Relion.MICROGRAPH_NAME in df.columns:
            mgraphcnt = df[star.Relion.MICROGRAPH_NAME].value_counts()
            print(
                "%s micrographs, %s +/- %s particles per micrograph" %
                ("{:,}".format(len(mgraphcnt)), "{:,.3f}".format(
                    np.mean(mgraphcnt)), "{:,.3f}".format(np.std(mgraphcnt))))
        try:
            print("%f A/px (%sX magnification)" %
                  (star.calculate_apix(df), "{:,.0f}".format(
                      df[star.Relion.MAGNIFICATION][0])))
        except KeyError:
            pass
        if len(df.columns.intersection(star.Relion.ORIGINS3D)) > 0:
            print("Largest shift is %f pixels" % np.max(
                np.abs(df[df.columns.intersection(
                    star.Relion.ORIGINS3D)].values)))
        return 0

    if args.drop_angles:
        df.drop(star.Relion.ANGLES, axis=1, inplace=True, errors="ignore")

    if args.drop_containing is not None:
        containing_fields = [
            f for q in args.drop_containing for f in df.columns if q in f
        ]
        if args.invert:
            containing_fields = df.columns.difference(containing_fields)
        df.drop(containing_fields, axis=1, inplace=True, errors="ignore")

    if args.offset_group is not None:
        df[star.Relion.GROUPNUMBER] += args.offset_group

    if args.restack is not None:
        if not args.augment:
            star.augment_star_ucsf(df, inplace=True)
        star.set_original_fields(df, inplace=True)
        df[star.UCSF.IMAGE_PATH] = args.restack
        df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0])

    if args.subsample_micrographs is not None:
        if args.bootstrap is not None:
            print("Only particle sampling allows bootstrapping")
            return 1
        mgraphs = df[star.Relion.MICROGRAPH_NAME].unique()
        if args.subsample_micrographs < 1:
            args.subsample_micrographs = np.int(
                max(np.round(args.subsample_micrographs * len(mgraphs)), 1))
        else:
            args.subsample_micrographs = np.int(args.subsample_micrographs)
        ind = np.random.choice(len(mgraphs),
                               size=args.subsample_micrographs,
                               replace=False)
        mask = df[star.Relion.MICROGRAPH_NAME].isin(mgraphs[ind])
        if args.auxout is not None:
            dfaux = df.loc[~mask]
        df = df.loc[mask]

    if args.subsample is not None and args.suffix == "":
        if args.subsample < 1:
            args.subsample = np.int(
                max(np.round(args.subsample * df.shape[0]), 1))
        else:
            args.subsample = np.int(args.subsample)
        ind = np.random.choice(df.shape[0], size=args.subsample, replace=False)
        mask = df.index.isin(ind)
        if args.auxout is not None:
            dfaux = df.loc[~mask]
        df = df.loc[mask]

    if args.copy_angles is not None:
        angle_star = star.parse_star(args.copy_angles, augment=args.augment)
        df = star.smart_merge(df,
                              angle_star,
                              fields=star.Relion.ANGLES,
                              key=args.merge_key)

    if args.copy_alignments is not None:
        align_star = star.parse_star(args.copy_alignments,
                                     augment=args.augment)
        df = star.smart_merge(df,
                              align_star,
                              fields=star.Relion.ALIGNMENTS,
                              key=args.merge_key)

    if args.copy_reconstruct_images is not None:
        recon_star = star.parse_star(args.copy_reconstruct_images,
                                     augment=args.augment)
        df[star.Relion.RECONSTRUCT_IMAGE_NAME] = recon_star[
            star.Relion.IMAGE_NAME]

    if args.transform is not None:
        if args.transform.count(",") == 2:
            r = geom.euler2rot(
                *np.deg2rad([np.double(s) for s in args.transform.split(",")]))
        else:
            r = np.array(json.loads(args.transform))
        df = star.transform_star(df, r, inplace=True)

    if args.invert_hand:
        df = star.invert_hand(df, inplace=True)

    if args.copy_paths is not None:
        path_star = star.parse_star(args.copy_paths)
        star.set_original_fields(df, inplace=True)
        df[star.Relion.IMAGE_NAME] = path_star[star.Relion.IMAGE_NAME]

    if args.copy_ctf is not None:
        ctf_star = pd.concat((star.parse_star(inp, augment=args.augment)
                              for inp in glob.glob(args.copy_ctf)),
                             join="inner")
        df = star.smart_merge(df,
                              ctf_star,
                              star.Relion.CTF_PARAMS,
                              key=args.merge_key)

    if args.copy_micrograph_coordinates is not None:
        coord_star = pd.concat(
            (star.parse_star(inp, augment=args.augment)
             for inp in glob.glob(args.copy_micrograph_coordinates)),
            join="inner")
        df = star.smart_merge(df,
                              coord_star,
                              fields=star.Relion.MICROGRAPH_COORDS,
                              key=args.merge_key)

    if args.scale is not None:
        star.scale_coordinates(df, args.scale, inplace=True)
        star.scale_origins(df, args.scale, inplace=True)
        star.scale_magnification(df, args.scale, inplace=True)

    if args.scale_particles is not None:
        star.scale_origins(df, args.scale_particles, inplace=True)
        star.scale_magnification(df, args.scale_particles, inplace=True)

    if args.scale_coordinates is not None:
        star.scale_coordinates(df, args.scale_coordinates, inplace=True)

    if args.scale_origins is not None:
        star.scale_origins(df, args.scale_origins, inplace=True)

    if args.scale_magnification is not None:
        star.scale_magnification(df, args.scale_magnification, inplace=True)

    if args.scale_apix is not None:
        star.scale_apix(df, args.scale_apix, inplace=True)

    if args.recenter:
        df = star.recenter(df, inplace=True)

    if args.zero_origins:
        df = star.zero_origins(df, inplace=True)

    if args.pick:
        df.drop(df.columns.difference(star.Relion.PICK_PARAMS),
                axis=1,
                inplace=True,
                errors="ignore")

    if args.subsample is not None and args.suffix != "":
        if args.subsample < 1:
            print("Specific integer sample size")
            return 1
        nsamplings = args.bootstrap if args.bootstrap is not None else df.shape[
            0] / np.int(args.subsample)
        inds = np.random.choice(df.shape[0],
                                size=(nsamplings, np.int(args.subsample)),
                                replace=args.bootstrap is not None)
        for i, ind in enumerate(inds):
            star.write_star(
                os.path.join(
                    args.output,
                    os.path.basename(args.input[0])[:-5] + args.suffix +
                    "_%d" % (i + 1)), df.iloc[ind])

    if args.to_micrographs:
        df = star.to_micrographs(df)

    if args.micrograph_range:
        df.set_index(star.Relion.MICROGRAPH_NAME, inplace=True)
        m, n = [int(tok) for tok in args.micrograph_range.split(",")]
        mg = df.index.unique().sort_values()
        outside = list(range(0, m)) + list(range(n, len(mg)))
        dfaux = df.loc[mg[outside]].reset_index()
        df = df.loc[mg[m:n]].reset_index()

    if args.micrograph_path is not None:
        df = star.replace_micrograph_path(df,
                                          args.micrograph_path,
                                          inplace=True)

    if args.min_separation is not None:
        gb = df.groupby(star.Relion.MICROGRAPH_NAME)
        dupes = []
        for n, g in gb:
            nb = algo.query_connected(
                g[star.Relion.COORDS].values - g[star.Relion.ORIGINS],
                args.min_separation / star.calculate_apix(df))
            dupes.extend(g.index[~np.isnan(nb)])
        dfaux = df.loc[dupes]
        df.drop(dupes, inplace=True)

    if args.merge_source is not None:
        if args.merge_fields is not None:
            if "," in args.merge_fields:
                args.merge_fields = args.merge_fields.split(",")
            else:
                args.merge_fields = [args.merge_fields]
        else:
            print("Merge fields must be specified using --merge-fields")
            return 1
        if args.merge_key is not None:
            if "," in args.merge_key:
                args.merge_key = args.merge_key.split(",")
        if args.by_original:
            args.by_original = star.original_field(args.merge_key)
        else:
            args.by_original = args.merge_key
        merge_star = star.parse_star(args.merge_source, augment=args.augment)
        df = star.smart_merge(df,
                              merge_star,
                              fields=args.merge_fields,
                              key=args.merge_key,
                              left_key=args.by_original)

    if args.revert_original:
        df = star.revert_original(df, inplace=True)

    if args.set_optics is not None:
        tok = args.set_optics.split(",")
        df = star.set_optics_groups(df,
                                    sep=tok[0],
                                    idx=int(tok[1]),
                                    inplace=True)
        df.dropna(axis=0, how="any", inplace=True)

    if args.drop_optics_group is not None:
        idx = df[star.Relion.OPTICSGROUP].isin(args.drop_optics_group)
        if not np.any(idx):
            idx = df[star.Relion.OPTICSGROUPNAME].isin(args.drop_optics_group)
        if not np.any(idx):
            print("No group found to drop")
            return 1
        df = df.loc[~idx]

    if args.split_micrographs:
        dfs = star.split_micrographs(df)
        for mg in dfs:
            star.write_star(
                os.path.join(args.output,
                             os.path.basename(mg)[:-4]) + args.suffix, dfs[mg])
        return 0

    if args.auxout is not None and dfaux is not None:
        if not args.relion2:
            df = star.remove_deprecated_relion2(dfaux, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=True)
        else:
            df = star.remove_new_relion31(dfaux, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=False)

    if args.output is not None:
        if not args.relion2:  # Relion 3.1 style output.
            df = star.remove_deprecated_relion2(df, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=True)
        else:
            df = star.remove_new_relion31(df, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=False)
    return 0
示例#3
0
文件: stack.py 项目: mahabul123/pyem
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))
    # apix = args.apix = hdr["xlen"] / hdr["nx"]

    for fn in args.input:
        if not (fn.endswith(".star") or fn.endswith(".mrcs") or
                fn.endswith(".mrc") or fn.endswith(".par")):
            log.error("Only .star, .mrc, .mrcs, and .par files supported")
            return 1

    first_ptcl = 0
    dfs = []
    with mrc.ZSliceWriter(args.output) as writer:
        for fn in args.input:
            if fn.endswith(".star"):
                df = star.parse_star(fn, augment=True)
                if args.cls is not None:
                    df = star.select_classes(df, args.cls)
                star.set_original_fields(df, inplace=True)
                if args.resort:
                    df = df.sort_values([star.UCSF.IMAGE_ORIGINAL_PATH,
                                         star.UCSF.IMAGE_ORIGINAL_INDEX])
                for idx, row in df.iterrows():
                    if args.stack_path is not None:
                        input_stack_path = os.path.join(args.stack_path, row[star.UCSF.IMAGE_ORIGINAL_PATH])
                    else:
                        input_stack_path = row[star.UCSF.IMAGE_ORIGINAL_PATH]
                    with mrc.ZSliceReader(input_stack_path) as reader:
                        i = row[star.UCSF.IMAGE_ORIGINAL_INDEX]
                        writer.write(reader.read(i))
            elif fn.endswith(".par"):
                if args.stack_path is None:
                    log.error(".par file input requires --stack-path")
                    return 1
                df = metadata.par2star(metadata.parse_fx_par(fn), data_path=args.stack_path)
                # star.set_original_fields(df, inplace=True)  # Redundant.
                star.augment_star_ucsf(df)
            elif fn.endswith(".csv"):
                return 1
            elif fn.endswith(".cs"):
                return 1
            else:
                if fn.endswith(".mrcs"):
                    with mrc.ZSliceReader(fn) as reader:
                        for img in reader:
                            writer.write(img)
                        df = pd.DataFrame(
                            {star.UCSF.IMAGE_ORIGINAL_INDEX: np.arange(reader.nz)})
                    df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn
                else:
                    print("Unrecognized input file type")
                    return 1
            if args.star is not None:
                df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl,
                                                      first_ptcl + df.shape[0])
                if args.abs_path:
                    df[star.UCSF.IMAGE_PATH] = writer.path
                else:
                    df[star.UCSF.IMAGE_PATH] = os.path.relpath(writer.path, os.path.dirname(args.star))
                df["index"] = df[star.UCSF.IMAGE_INDEX]
                star.simplify_star_ucsf(df)
                dfs.append(df)
            first_ptcl += df.shape[0]

    if args.star is not None:
        df = pd.concat(dfs, join="inner")
        # df = pd.concat(dfs)
        # df = df.dropna(df, axis=1, how="any")
        if not args.relion2:  # Relion 3.1 style output.
            df = star.remove_deprecated_relion2(df, inplace=True)
            star.write_star(args.star, df, resort_records=False, optics=True)
        else:
            df = star.remove_new_relion31(df, inplace=True)
            star.write_star(args.star, df, resort_records=False, optics=False)
    return 0