def createOutputStep(self):
        imgSet = self.inputParticles.get()
        partSet = self._createSetOfParticles()
        partSet.copyInfo(imgSet)
        outImagesMd = self._getExtraPath('expanded_particles.star')

        # remove repeating rlnImageId column
        tableName = ''
        if Plugin.IS_GT30():
            tableName = 'particles'
            mdOptics = Table(fileName=outImagesMd, tableName='optics')

        mdOut = Table(fileName=outImagesMd, tableName=tableName)
        mdOut.removeColumns("rlnImageId")
        with open(outImagesMd, "w") as f:
            mdOut.writeStar(f, tableName=tableName)
            if Plugin.IS_GT30():
                mdOptics.writeStar(f, tableName='optics')

        reader = convert.createReader()
        reader.readSetOfParticles(
            outImagesMd, partSet,
            alignType=ALIGN_PROJ,
            postprocessImageRow=self._postprocessImageRow)

        self._defineOutputs(outputParticles=partSet)
        self._defineSourceRelation(imgSet, partSet)
Пример #2
0
    def _showChanges(self, paramName=None):
        labels = ['rlnIterationNumber'] + self.protocol.CHANGE_LABELS
        tableChanges = Table(columns=labels)

        print(
            "Computing average changes in offset, angles, and class membership"
        )
        for it in self._getAllIters():
            fn = self.protocol._getFileName('optimiser', iter=it)
            if not os.path.exists(fn):
                continue
            print("Computing data for iteration; %03d" % it)
            fn = self.protocol._getFileName('optimiser', iter=it)
            table = Table(fileName=fn, tableName='optimiser_general')
            row = table[0]
            cols = [
                getattr(row, value) for value in self.protocol.CHANGE_LABELS
            ]
            tableChanges.addRow(it, *cols)

        fn = self.protocol._getFileName('all_changes')

        with open(fn, 'w') as f:
            tableChanges.writeStar(f)

        return [self.createDataView(fn)]
Пример #3
0
    def _showPMax(self, paramName=None):
        labels = ['rlnIterationNumber', 'rlnAveragePmax', 'rlnLogLikelihood']
        tablePMax = Table(columns=labels)

        for it in self._getAllIters():
            if it == 1:  # skip iter1 with Pmax=1
                continue
            # always list all iterations
            prefix = self.protocol.PREFIXES[0]
            fn = self.protocol._getFileName(prefix + 'model', iter=it)
            table = Table(fileName=fn, tableName='model_general')
            row = table[0]
            tablePMax.addRow(int(it), float(row.rlnAveragePmax),
                             float(row.rlnLogLikelihood))

        fn = self.protocol._getFileName('all_avgPmax')
        with open(fn, 'w') as f:
            tablePMax.writeStar(f)

        xplotter = RelionPlotter()
        xplotter.createSubPlot("Avg PMax per Iterations", "Iterations",
                               "Avg PMax")
        xplotter.plotMd(tablePMax, 'rlnIterationNumber', 'rlnAveragePmax')
        xplotter.showLegend(['rlnAveragePmax'])

        return [self.createDataView(fn), xplotter]
Пример #4
0
 def _write(self, f):
     # Create columns from the first row
     items = self.first()._asdict().items()
     cols = [Table.Column(k, type(v)) for k, v in items]
     t = Table(columns=cols)
     for og in self._dict.values():
         t.addRow(*og)
     t.writeStar(f, tableName='optics')
 def convertInputStep(self, newMics, numPass):
     """ Create a star file as expected by cryoassess."""
     micsTable = Table(columns=['rlnMicrographName'])
     for mic in newMics:
         micsTable.addRow(os.path.abspath(mic.getFileName()))
     with open(self.getInputFilename(numPass), 'w') as f:
         f.write("# Star file generated with Scipion\n")
         micsTable.writeStar(f, tableName='')
     self.appendTotalInputStar(numPass)
Пример #6
0
    def test_write_singleRow(self):
        fn = '/tmp/test-single-row.star'
        print("Writing a single row to %s..." % fn)
        t = Table()
        f1 = StringIO(one_micrograph_mc)
        t.readStar(f1, tableName='global_shift')
        t.writeStar(sys.stdout, tableName='global_shift', singleRow=True)

        t = Table(columns=['rlnImageSizeX',
                           'rlnImageSizeY',
                           'rlnMicrographMovieName'])
        t.addRow(3710, 3838, 'Movies/14sep05c_00024sq_00003hl_00002es.frames.out.mrc')

        with open(fn, 'w') as f:
            t.writeStar(f, singleRow=True)
    def convertInputStep(self, movId, partId, postId):
        inputMovies = self.inputMovies.get()
        inputParts = self.inputParticles.get()
        imgStar = self._getFileName('input_particles')
        inputPartsFolder = self._getInputPath('particles')
        pwutils.makePath(inputPartsFolder)

        self.info("Converting set from '%s' into '%s'" %
                  (inputParts.getFileName(), imgStar))

        tableGeneral = Table(columns=[
            'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ',
            'rlnMicrographMovieName', 'rlnMicrographBinning',
            'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate',
            'rlnMicrographPreExposure', 'rlnVoltage',
            'rlnMicrographStartFrame', 'rlnMotionModelVersion',
            'rlnMicrographGainName', 'rlnMicrographDefectFile'
        ])
        tableShifts = Table(columns=[
            'rlnMicrographFrameNumber', 'rlnMicrographShiftX',
            'rlnMicrographShiftY'
        ])
        tableCoeffs = Table(
            columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff'])

        # Create the first row, later only the movieName will be updated
        xdim, ydim, ndim = inputMovies.getDim()
        acq = inputMovies.getAcquisition()
        firstMovie = inputMovies.getFirstItem()
        a0, aN = firstMovie.getAlignment().getRange()
        moviesPixelSize = inputMovies.getSamplingRate()
        binningFactor = inputParts.getSamplingRate() / moviesPixelSize

        og = convert.OpticsGroups.fromImages(inputMovies)
        writer = convert.createWriter(optics=og)
        writer.writeSetOfMicrographs(inputMovies,
                                     self._getFileName('input_mics'),
                                     postprocessImageRow=self._updateMic)

        tableGeneral.addRow(xdim, ydim, ndim, 'movieName',
                            binningFactor, moviesPixelSize,
                            acq.getDosePerFrame(), acq.getDoseInitial(),
                            acq.getVoltage(), a0, 0, '""', '""')
        row = tableGeneral[0]

        for movie in inputMovies:
            movieStar = self._getMovieStar(movie)
            ogId = movie.getAttributeValue('_rlnOpticsGroup', 1)
            gainFn = og[ogId].get('rlnMicrographGainName', None)
            defectFn = og[ogId].get('rlnMicrographDefectFile', None)

            with open(movieStar, 'w') as f:
                coeffs = json.loads(
                    movie.getAttributeValue('_rlnMotionModelCoeff', '[]'))
                motionMode = 1 if coeffs else 0

                # Update some params in the general table
                replaceDict = {
                    'rlnMicrographMovieName': movie.getFileName(),
                    'rlnMotionModelVersion': motionMode
                }
                if gainFn:
                    replaceDict['rlnMicrographGainName'] = gainFn
                if defectFn:
                    replaceDict['rlnMicrographDefectFile'] = defectFn

                tableGeneral[0] = row._replace(**replaceDict)
                tableGeneral.writeStar(f, tableName='general', singleRow=True)
                # Write shifts
                tableShifts.clearRows()
                alignment = movie.getAlignment()
                shiftsX, shiftsY = alignment.getShifts()
                a0, aN = alignment.getRange()
                empty = -9999.000
                for i in range(1, a0):
                    tableShifts.addRow(i, empty, empty)
                # Adjust the shifts to be relative to the first frame
                # so let's add the opposite value
                xoff, yoff = -shiftsX[0], -shiftsY[0]
                for i in range(a0, aN + 1):
                    tableShifts.addRow(i, shiftsX[i - a0] + xoff,
                                       shiftsY[i - a0] + yoff)
                for i in range(aN + 1, ndim + 1):
                    tableShifts.addRow(i, empty, empty)
                tableShifts.writeStar(f, tableName='global_shift')

                # Write coefficients
                tableCoeffs.clearRows()
                if coeffs:
                    for i, c in enumerate(coeffs):
                        tableCoeffs.addRow(i, c)
                    tableCoeffs.writeStar(f, tableName='local_motion_model')

        convert.writeSetOfParticles(inputParts,
                                    imgStar,
                                    outputDir=inputPartsFolder,
                                    alignType=ALIGN_PROJ,
                                    fillMagnification=True)
    def convertInputStep(self, movId, partId, postId):
        inputMovies = self.inputMovies.get()
        inputParts = self.inputParticles.get()
        imgStar = self._getPath('input_particles.star')
        inputPartsFolder = self._getInputPath('particles')
        pwutils.makePath(inputPartsFolder)

        self.info("Converting set from '%s' into '%s'" %
                  (inputParts.getFileName(), imgStar))

        tableMovies = Table(
            columns=['rlnMicrographName', 'rlnMicrographMetadata'])
        tableGeneral = Table(columns=[
            'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ',
            'rlnMicrographMovieName', 'rlnMicrographBinning',
            'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate',
            'rlnMicrographPreExposure', 'rlnVoltage',
            'rlnMicrographStartFrame', 'rlnMotionModelVersion'
        ])
        tableShifts = Table(columns=[
            'rlnMicrographFrameNumber', 'rlnMicrographShiftX',
            'rlnMicrographShiftY'
        ])
        tableCoeffs = Table(
            columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff'])

        # Create the first row, later only the movieName will be updated
        xdim, ydim, ndim = inputMovies.getDim()
        acq = inputMovies.getAcquisition()
        firstMovie = inputMovies.getFirstItem()
        a0, aN = firstMovie.getAlignment().getRange()
        moviesPixelSize = inputMovies.getSamplingRate()
        binningFactor = inputParts.getSamplingRate() / moviesPixelSize
        hasLocal = firstMovie.hasAttribute('_rlnMotionModelCoeff')
        motionMode = 1 if hasLocal else 0

        tableGeneral.addRow(xdim, ydim, ndim, 'movieName',
                            binningFactor, moviesPixelSize,
                            acq.getDosePerFrame(), acq.getDoseInitial(),
                            acq.getVoltage(), a0, motionMode)
        row = tableGeneral[0]

        for movie in inputMovies:
            movieFn = movie.getFileName()
            movieBase = os.path.basename(movieFn)
            movieStar = self._getInputPath(
                pwutils.replaceBaseExt(movieFn, 'star'))
            tableMovies.addRow(movieBase, movieStar)
            with open(movieStar, 'w') as f:
                # Update Movie name
                tableGeneral[0] = row._replace(rlnMicrographMovieName=movieFn)
                tableGeneral.writeStar(f, tableName='general', singleRow=True)
                # Write shifts
                tableShifts.clearRows()
                alignment = movie.getAlignment()
                shiftsX, shiftsY = alignment.getShifts()
                a0, aN = alignment.getRange()
                empty = -9999.000
                for i in range(1, a0):
                    tableShifts.addRow(i, empty, empty)
                # Adjust the shifts to be relative to the first frame
                # so let's add the opposite value
                xoff, yoff = -shiftsX[0], -shiftsY[0]
                for i in range(a0, aN + 1):
                    tableShifts.addRow(i, shiftsX[i - a0] + xoff,
                                       shiftsY[i - a0] + yoff)
                for i in range(aN + 1, ndim + 1):
                    tableShifts.addRow(i, empty, empty)
                tableShifts.writeStar(f, tableName='global_shift')

                # Write coefficients
                if hasLocal:
                    coeffs = movie.getAttributeValue('_rlnMotionModelCoeff',
                                                     '')
                    tableCoeffs.clearRows()
                    for i, c in enumerate(json.loads(coeffs)):
                        tableCoeffs.addRow(i, c)
                    tableCoeffs.writeStar(f, tableName='local_motion_model')

        with open(self._getPath('input_corrected_micrographs.star'), 'w') as f:
            tableMovies.writeStar(f)

        convert.writeSetOfParticles(inputParts,
                                    imgStar,
                                    outputDir=inputPartsFolder,
                                    alignType=ALIGN_PROJ,
                                    fillMagnification=True)
Пример #9
0
def run_job(args):
    start = time.time()
    in_mics = args.in_mics
    job_dir = args.out_dir
    thresh = args.threshold
    box_size = args.box_size
    distance = 0
    model = args.model
    filament = args.filament
    if filament:
        box_dist = args.box_distance
        min_boxes = args.minimum_number_boxes
    denoise = args.denoise
    gpus = args.gpu
    threads = args.threads

    if SCRATCH_DIR is not None:
        filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp")
    else:
        filtered_dir = "%s/filtered_tmp/" % job_dir

    if model == "None":
        model = CRYOLO_GEN_MODEL if not denoise else CRYOLO_GEN_JANNI_MODEL
    else:
        model = os.path.abspath(model)

    # Making a cryolo config file
    json_dict = {
        "model": {
            "architecture": "PhosaurusNet",
            "input_size": 1024,
            "max_box_per_image": 600,
            "filter": [0.1, filtered_dir]
        },
        "other": {
            "log_path": "%s/logs/" % job_dir
        }
    }
    if box_size:  # is not 0
        json_dict["model"]["anchors"] = [int(box_size), int(box_size)]
        if not filament:
            distance = int(box_size / 2)  # use half the box_size
    if denoise:
        json_dict["model"]["filter"] = [
            CRYOLO_JANNI_MODEL, 24, 3, filtered_dir
        ]

    if DEBUG:
        print("Using following config: ", json_dict)

    with open(os.path.join(job_dir, "config_cryolo.json"), "w") as json_file:
        json.dump(json_dict, json_file, indent=4)

    # Reading the micrographs star file from Relion
    mictable = Table(fileName=in_mics, tableName='micrographs')
    mic_fns = mictable.getColumnValues("rlnMicrographName")

    # Launching cryolo
    args_dict = {
        '--conf': os.path.join(job_dir, "config_cryolo.json"),
        '--input': in_mics,
        '--output': os.path.join(job_dir, 'output'),
        '--weights': model,
        '--gpu': gpus.replace(',', ' '),
        '--threshold': thresh,
        '--distance': distance,
        '--cleanup': "",
        '--skip': "",
        '--write_empty': "",
        '--num_cpu': -1 if threads == 1 else threads
    }

    if filament:
        args_dict.update({
            '--filament': "",
            '--box_distance': box_dist,
            '--minimum_number_boxes': min_boxes,
            '--directional_method': 'PREDICTED'
        })
        args_dict.pop('--distance')

    cmd = "%s && %s " % (CONDA_ENV, CRYOLO_PREDICT)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Moving output star files for Relion to use
    table_coords = Table(
        columns=['rlnMicrographName', 'rlnMicrographCoordinates'])
    star_dir = "EMAN_HELIX_SEGMENTED" if filament else "STAR"
    ext = ".box" if filament else ".star"
    with open(os.path.join(job_dir, "autopick.star"), "w") as mics_star:
        for mic in mic_fns:
            mic_base = os.path.basename(mic)
            mic_dir = os.path.dirname(mic)
            if len(mic_dir.split("/")) > 1 and "job" in mic_dir.split(
                    "/")[1]:  # remove JobType/jobXXX
                mic_dir = "/".join(mic_dir.split("/")[2:])
            os.makedirs(os.path.join(job_dir, mic_dir), exist_ok=True)
            coord_cryolo = os.path.splitext(mic_base)[0] + ext
            coord_cryolo = os.path.join(job_dir, "output", star_dir,
                                        coord_cryolo)
            coord_relion = os.path.splitext(mic_base)[0] + "_autopick" + ext
            coord_relion = os.path.join(job_dir, mic_dir, coord_relion)
            if os.path.exists(coord_cryolo):
                os.rename(coord_cryolo, coord_relion)
                table_coords.addRow(mic, coord_relion)
                if DEBUG:
                    print("Moved %s to %s" % (coord_cryolo, coord_relion))
        table_coords.writeStar(mics_star, tableName='coordinate_files')

    # Required output to mini pipeline job_pipeline.star file
    pipeline_fn = os.path.join(job_dir, "job_pipeline.star")
    table_gen = Table(columns=['rlnPipeLineJobCounter'])
    table_gen.addRow(2)
    table_proc = Table(columns=[
        'rlnPipeLineProcessName', 'rlnPipeLineProcessAlias',
        'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel'
    ])
    table_proc.addRow(job_dir, 'None', 'relion.external', 'Running')
    table_nodes = Table(
        columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel'])
    table_nodes.addRow(in_mics, "MicrographsData.star.relion")
    table_nodes.addRow(os.path.join(job_dir, "autopick.star"),
                       "MicrographsCoords.star.relion.autopick")
    table_input = Table(
        columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess'])
    table_input.addRow(in_mics, job_dir)
    table_output = Table(
        columns=['rlnPipeLineEdgeProcess', 'rlnPipeLineEdgeToNode'])
    table_output.addRow(job_dir, os.path.join(job_dir, "autopick.star"))

    with open(pipeline_fn, "w") as f:
        table_gen.writeStar(f, tableName="pipeline_general", singleRow=True)
        table_proc.writeStar(f, tableName="pipeline_processes")
        table_nodes.writeStar(f, tableName="pipeline_nodes")
        table_input.writeStar(f, tableName="pipeline_input_edges")
        table_output.writeStar(f, tableName="pipeline_output_edges")

    # Register output nodes in .Nodes/
    os.makedirs(os.path.join(".Nodes", "MicrographsCoords", job_dir),
                exist_ok=True)
    open(os.path.join(".Nodes", "MicrographsCoords", job_dir, "autopick.star"),
         "w").close()

    outputFn = os.path.join(job_dir, "output_for_relion.star")
    if not os.path.exists(outputFn):
        # get estimated box size
        summaryfn = os.path.join(job_dir, "output/DISTR",
                                 'size_distribution_summary*.txt')
        with open(glob(summaryfn)[0]) as f:
            for line in f:
                if line.startswith("MEAN,"):
                    estim_sizepx = int(line.split(",")[-1])
                    break
        print("\ncrYOLO estimated box size %d px" % estim_sizepx)

        # calculate diameter, original (boxSize) and downsampled (boxSizeSmall) box
        optics = Table(fileName=in_mics, tableName='optics')
        angpix = float(optics[0].rlnMicrographPixelSize)

        if filament:
            # box size = 1.5x tube diam
            diam = 0.66 * box_size
        else:
            # use + 20% for diameter
            diam = math.ceil(estim_sizepx * angpix * 1.2)
            # use +30% for box size, make it even
            boxSize = 1.3 * estim_sizepx
            boxSize = math.ceil(boxSize / 2.) * 2

            # from relion_it.py script
            # Authors: Sjors H.W. Scheres, Takanori Nakane & Colin M. Palmer
            boxSizeSmall = None
            for box in (48, 64, 96, 128, 160, 192, 256, 288, 300, 320, 360,
                        384, 400, 420, 450, 480, 512, 640, 768, 896, 1024):
                # Don't go larger than the original box
                if box > boxSize:
                    boxSizeSmall = boxSize
                    break
                # If Nyquist freq. is better than 7.5 A, use this
                # downscaled box, otherwise continue to next size up
                small_box_angpix = angpix * boxSize / box
                if small_box_angpix < 3.75:
                    boxSizeSmall = box
                    break

            print(
                "\nSuggested parameters:\n\tDiameter (A): %d\n\tBox size (px): %d\n"
                "\tBox size binned (px): %d" % (diam, boxSize, boxSizeSmall))

            # output all params into a star file
            tableCryolo = Table(columns=[
                'rlnParticleDiameter', 'rlnOriginalImageSize', 'rlnImageSize'
            ])
            tableCryolo.addRow(diam, boxSize, boxSizeSmall)
            with open(outputFn, "w") as f:
                tableCryolo.writeStar(f, tableName='picker')

        # create .gui_manualpickjob.star for easy display
        starString = """
# version 30001

data_job

_rlnJobTypeLabel             relion.manualpick%s
_rlnJobIsContinue                       0
_rlnJobIsTomo                           0

# version 30001

data_joboptions_values

loop_
_rlnJobOptionVariable #1
_rlnJobOptionValue #2
    angpix         %f
 black_val          0
blue_value          0
color_label rlnParticleSelectZScore
  diameter         %d
  do_color         No
do_fom_threshold         No
  do_queue         No
do_startend        No
  fn_color         ""
     fn_in         ""
  highpass         -1
   lowpass         20
  micscale        0.2
min_dedicated       1
minimum_pick_fom          0
other_args         ""
      qsub       qsub
qsubscript /public/EM/RELION/relion/bin/relion_qsub.csh
 queuename    openmpi
 red_value          2
sigma_contrast      3
 white_val          0
"""
        label = ".helical" if filament else ""
        with open(".gui_manualpickjob.star", "w") as f:
            f.write(starString % (label, angpix, diam))

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" %
          (diff // 3600, diff // 60 % 60, diff % 60))
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    batch = args.batch_size
    gpu = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    # Reading the model star file from relion
    modelstar = in_parts.replace("_data.star", "_model.star")
    refstable = Table(fileName=getPath(modelstar), tableName='model_classes')
    refstack = refstable[0].rlnReferenceImage.split("@")[-1]
    nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0])

    if DEBUG:
        print("Found input class averages stack: %s" % refstack)

    # Launching cryoassess
    args_dict = {
        '-i': getPath(refstack),
        '-o': getPath(job_dir, 'output'),
        '-b': batch,
        '-m': CRYOASSESS_2D_MODEL,
    }
    cmd = "%s && CUDA_VISIBLE_DEVICES=%s %s " % (CONDA_ENV, gpu, CRYOASSESS_2D)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Parse output to get good classes IDs
    goodTemplate = getPath(job_dir, "output/Good/particle_*.jpg")
    regex = re.compile('particle_(\d*)\.jpg')
    goodcls = []
    files = glob(goodTemplate)
    if files:
        for i in files:
            s = regex.search(i)
            goodcls.append(int(s.group(1)))

    if DEBUG:
        print("Parsing output files: %s\nGood classes: %s" %
              (goodTemplate, goodcls))

    if len(goodcls) == 0:
        print("No good classes found. Job stopped.")
        end = time.time()
        diff = end - start
        print("Job duration = %dh %dmin %dsec \n" %
              (diff // 3600, diff // 60 % 60, diff % 60))
        open(RELION_JOB_FAILURE_FILENAME, "w").close()
        exit(1)

    # Create output star file for Relion to use
    optics = Table(fileName=getPath(in_parts), tableName='optics')
    ptcls = Table(fileName=getPath(in_parts), tableName='particles')
    cols = ptcls.getColumnNames()
    out_ptcls = Table(columns=cols)

    for row in ptcls:
        if row.rlnClassNumber in goodcls:
            out_ptcls.addRow(*row)

    if DEBUG:
        print("Input particles: %d\nOutput particles: %d" %
              (len(ptcls), len(out_ptcls)))

    out_star = getPath(job_dir, "particles_for_training.star")
    with open(out_star, "w") as f:
        optics.writeStar(f, tableName="optics")
        out_ptcls.writeStar(f, tableName="particles")

    # Create backup_selection.star for results visualization
    sel = Table(columns=['rlnSelected'])
    for i in range(1, nrCls + 1):
        sel.addRow(1 if i in goodcls else 0)
    with open(getPath("backup_selection.star"), "w") as f:
        sel.writeStar(f, tableName="")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" %
          (diff // 3600, diff // 60 % 60, diff % 60))
Пример #11
0
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    model = args.model or CRYOLO_GEN_MODEL
    gpus = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    if SCRATCH_DIR is not None:
        filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp")
    else:
        filtered_dir = "%s/filtered_tmp/" % job_dir

    # Create folder structure for cryolo
    os.mkdir(IMG_FOLDER)
    os.mkdir(ANNOT_FOLDER)

    # Reading the box size from relion
    optics = Table(fileName=getPath(in_parts), tableName='optics')[0]
    box_bin = int(optics.rlnImageSize)
    box_size = float(optics.rlnImagePixelSize) // float(optics.rlnMicrographOriginalPixelSize) * box_bin
    print("Using unbinned box size of %d px" % box_size)

    # Making a cryolo config file
    json_dict = {
        "model": {
            "architecture": "PhosaurusNet",
            "input_size": 1024,
            "max_box_per_image": 600,
            "anchors": [box_size, box_size],
            "filter": [
                0.1,
                filtered_dir
            ]
        },
        "train": {
            "train_image_folder": IMG_FOLDER,
            "train_annot_folder": ANNOT_FOLDER,
            "train_times": 10,
            "batch_size": 6,
            "learning_rate": 0.0001,
            "nb_epoch": 200,
            "object_scale": 5.0,
            "no_object_scale": 1.0,
            "coord_scale": 1.0,
            "class_scale": 1.0,
            "pretrained_weights": "%s" % model,
            "saved_weights_name": getPath(job_dir, TUNE_MODEL),
            "debug": True
        },
        "valid": {
            "valid_image_folder": "",
            "valid_annot_folder": "",
            "valid_times": 1
        }
    }

    if DEBUG:
        print("Using following config: ", json_dict)

    with open("config_cryolo.json", "w") as json_file:
        json.dump(json_dict, json_file, indent=4)

    # Reading the particles from relion
    try:
        parttable = Table(fileName=getPath(in_parts), tableName='particles')
    except:
        print("Could not read particles table from %s. Stopping" % in_parts)
        return
    mics_dict = {}

    # Arranging files for cryolo: making symlinks for mics and creating box files
    for row in parttable:
        mic = row.rlnMicrographName
        xCoord = int(int(row.rlnCoordinateX) - box_size / 2)
        yCoord = int(int(row.rlnCoordinateY) - box_size / 2)
        if mic in mics_dict:
            mics_dict[mic].append((xCoord, yCoord))
        else:
            mics_dict[mic] = [(xCoord, yCoord)]

    for mic in mics_dict:
        micSrc = getPath(mic)
        micDst = getPath(job_dir, IMG_FOLDER, os.path.basename(mic))
        if not os.path.exists(micDst):
            os.symlink(micSrc, micDst)
        if DEBUG:
            print("Link %s --> %s" % (micSrc, micDst))

        box = os.path.splitext(micDst)[0] + ".box"
        box = box.replace(IMG_FOLDER, ANNOT_FOLDER)
        with open(box, "w") as f:
            for coords in mics_dict[mic]:
                f.write("%s\t%s\t%s\t%s\n" %
                        (coords[0], coords[1], box_size, box_size))
        if DEBUG:
            print("Created box file: %s" % box)

    # Launching cryolo
    args_dict = {
        '--conf': "config_cryolo.json",
        '--gpu': gpus.replace(',', ' '),
        '--warmup': 0,
        '--fine_tune': "",
        '--cleanup': ""
    }
    cmd = "%s && %s " % (CONDA_ENV, CRYOLO_TRAIN)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Required output job_pipeline.star file
    pipeline_fn = getPath(job_dir, "job_pipeline.star")
    table_gen = Table(columns=['rlnPipeLineJobCounter'])
    table_gen.addRow(2)
    table_proc = Table(columns=['rlnPipeLineProcessName', 'rlnPipeLineProcessAlias',
                                'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel'])
    table_proc.addRow(job_dir, 'None', 'relion.external', 'Running')
    table_nodes = Table(columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel'])
    table_nodes.addRow(in_parts, "ParticlesData.star.relion")
    table_input = Table(columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess'])
    table_input.addRow(in_parts, job_dir)

    with open(pipeline_fn, "w") as f:
        table_gen.writeStar(f, tableName="pipeline_general", singleRow=True)
        table_proc.writeStar(f, tableName="pipeline_processes")
        table_nodes.writeStar(f, tableName="pipeline_nodes")
        table_input.writeStar(f, tableName="pipeline_input_edges")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    thresh = args.threshold
    model = args.model
    gpus = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    if model == "None":
        model = CINDERELLA_GEN_MODEL
    else:
        model = getPath(model)

    # Reading the model star file from relion
    modelstar = in_parts.replace("_data.star", "_model.star")
    refstable = Table(fileName=getPath(modelstar), tableName='model_classes')
    refstack = refstable[0].rlnReferenceImage.split("@")[-1]
    nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0])

    if DEBUG:
        print("Found input class averages stack: %s" % refstack)

    # Launching cinderella
    args_dict = {
        '-i': getPath(refstack),
        '-o': 'output',
        '-w': model,
        '--gpu': gpus,
        '-t': thresh,
    }
    cmd = "%s && %s " % (CONDA_ENV, CINDERELLA_PREDICT)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Parse output to get good classes IDs
    outfn = os.path.basename(refstack.replace(".mrcs", "_index_confidence.txt"))
    outpath = getPath(job_dir, "output", outfn)
    goodcls = []
    with open(outpath, "r") as f:
        for line in f:
            if float(line.split()[1]) > thresh:
                goodcls.append(int(line.split()[0]) + 1)
            else:
                break

    if DEBUG:
        print("Parsing output file: %s\nGood classes: %s" % (outpath, goodcls))

    if len(goodcls) == 0:
        print("No good classes found. Job stopped.")
        end = time.time()
        diff = end - start
        print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
        open(RELION_JOB_FAILURE_FILENAME, "w").close()
        exit(1)

    # Create output star file for Relion to use
    optics = Table(fileName=getPath(in_parts), tableName='optics')
    ptcls = Table(fileName=getPath(in_parts), tableName='particles')
    cols = ptcls.getColumnNames()
    out_ptcls = Table(columns=cols)
    
    for row in ptcls:
        if row.rlnClassNumber in goodcls:
            out_ptcls.addRow(*row)

    if DEBUG:
        print("Input particles: %d\nOutput particles: %d" % 
              (len(ptcls), len(out_ptcls)))

    out_star = getPath(job_dir, "particles_for_training.star")
    with open(out_star, "w") as f:
        optics.writeStar(f, tableName="optics")
        out_ptcls.writeStar(f, tableName="particles")

    # Create backup_selection.star for results visualization
    sel = Table(columns=['rlnSelected'])
    for i in range(1, nrCls + 1):
        sel.addRow(1 if i in goodcls else 0)
    with open(getPath("backup_selection.star"), "w") as f:
        sel.writeStar(f, tableName="")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" % (diff//3600, diff//60 % 60, diff % 60))