def createOutputStep(self): imgSet = self._getInputParticles() vol = Volume() vol.setFileName(self._getExtraPath('relion_class001.mrc')) vol.setSamplingRate(imgSet.getSamplingRate()) half1 = self._getFileName("final_half1_volume", ref3d=1) half2 = self._getFileName("final_half2_volume", ref3d=1) vol.setHalfMaps([half1, half2]) outImgSet = self._createSetOfParticles() outImgSet.copyInfo(imgSet) self._fillDataFromIter(outImgSet, self._lastIter()) self._defineOutputs(outputVolume=vol) self._defineSourceRelation(self.inputParticles, vol) self._defineOutputs(outputParticles=outImgSet) self._defineTransformRelation(self.inputParticles, outImgSet) fsc = FSC(objLabel=self.getRunName()) fn = self._getExtraPath("relion_model.star") table = Table(fileName=fn, tableName='model_class_1') resolution_inv = table.getColumnValues('rlnResolution') frc = table.getColumnValues('rlnGoldStandardFsc') fsc.setData(resolution_inv, frc) self._defineOutputs(outputFSC=fsc) self._defineSourceRelation(vol, fsc)
def _plotFSC(self, a, model_star, label, legend=None): if legend is None: legend = label table = Table(fileName=model_star, tableName='model_class_1') resolution_inv = table.getColumnValues('rlnResolution') frc = table.getColumnValues('rlnGoldStandardFsc') fsc = FSC(objLabel=legend) fsc.setData(resolution_inv, frc) return fsc
def plotMdAngularDistribution(self, title, angularMd, tableName=None, color='blue'): """Create an special type of subplot, representing the angular distribution of weight projections. A metadata should be provided containing labels: RLN_ORIENT_ROT, RLN_ORIENT_TILT """ table = Table(fileName=angularMd, tableName=tableName) rot = radians(table.getColumnValues('rlnAngleRot')) tilt = radians(table.getColumnValues('rlnAngleTilt')) self.plotAngularDistribution(title, rot, tilt)
def _plotSSNR(self, a, fn, table, label): table = Table(fileName=fn, tableName=table) ssnr = map(float, table.getColumnValues('rlnSsnrMap')) resolution_inv = map(float, table.getColumnValues('rlnResolution')) ssnrDict = {k: v for (k, v) in zip(ssnr, resolution_inv)} ssnrNewDict = {} for ssnr in ssnrDict: # only cross by 1 is important if ssnr > 0.9: ssnrNewDict[log(ssnr)] = ssnrDict[ssnr] resolution_inv = list(ssnrNewDict.values()) frc = list(ssnrNewDict.keys()) a.plot(resolution_inv, frc, label=label) a.xaxis.set_major_formatter(self._plotFormatter)
def _getGoodMicFns(self, numPass): """ Parse output star file and get a list of good mics. """ micNames = [] if os.path.exists(self.getOutputFilename(numPass)): table = Table(fileName=self.getOutputFilename(numPass), tableName='') micNames = table.getColumnValues('rlnMicrographName') return micNames
def test_iterRows(self): print("Checking iterRows...") dataFile = testfile('star', 'refine3d', 'run_it016_data.star') table = Table(fileName=dataFile, tableName='particles') # Let's open again the same file for iteration with open(dataFile) as f: tableReader = Table.Reader(f, tableName='particles') for c1, c2 in zip(table.getColumns(), tableReader.getColumns()): self.assertEqual(c1, c2, "Column c1 (%s) differs from c2 (%s)" % (c1, c2)) for r1, r2 in zip(table, tableReader): self.assertEqual(r1, r2) # Now try directly with iterRows function for r1, r2 in zip(table, Table.iterRows(dataFile, tableName='particles')): self.assertEqual(r1, r2) defocusSorted = sorted(float(r.rlnDefocusU) for r in table) for d1, row in zip(defocusSorted, Table.iterRows(dataFile, tableName='particles', key=lambda r: r.rlnDefocusU)): self.assertAlmostEqual(d1, row.rlnDefocusU) # Test sorting by imageName column, also using getColumnValues and sort() imageIds = table.getColumnValues('rlnImageName') imageIds.sort() # Check sorted iteration give the total amount of rows rows = [r for r in Table.iterRows(dataFile, tableName='particles', key='rlnImageName')] self.assertEqual(len(imageIds), len(rows)) for id1, row in zip(imageIds, Table.iterRows(dataFile, tableName='particles', key='rlnImageName')): self.assertEqual(id1, row.rlnImageName) def getIter(): """ Test a function to get an iterator. """ return Table.iterRows(dataFile, tableName='particles', key='rlnImageName') iterByIds = getIter() for id1, row in zip(imageIds, iterByIds): self.assertEqual(id1, row.rlnImageName)
def test_read_blocks(self): """ Read an star file with several blocks """ print("Reading micrograph star file...") t1 = Table() f1 = StringIO(one_micrograph_mc) # This is a single-row table (different text format key, value) print("\tread data_general ..") t1.readStar(f1, tableName='general') goldValues = [('rlnImageSizeX', 3710), ('rlnImageSizeY', 3838), ('rlnImageSizeZ', 24), ('rlnMicrographMovieName', 'Movies/20170629_00027_frameImage.tiff'), ('rlnMicrographGainName', 'Movies/gain.mrc'), ('rlnMicrographBinning', 1.000000), ('rlnMicrographOriginalPixelSize', 0.885000), ('rlnMicrographDoseRate', 1.277000), ('rlnMicrographPreExposure', 0.000000), ('rlnVoltage', 200.000000), ('rlnMicrographStartFrame', 1), ('rlnMotionModelVersion', 1) ] self._checkColumns(t1, [k for k, v in goldValues]) row = t1[0] for k, v in goldValues: self.assertEqual(getattr(row, k), v, "data_general table check failed!") print("\tread data_global_shift ..") t1.readStar(f1, tableName='global_shift') cols = t1.getColumns() self.assertEqual(len(t1), 24, "Number of rows check failed!") self._checkColumns(t1, ['rlnMicrographFrameNumber', 'rlnMicrographShiftX', 'rlnMicrographShiftY']) print("\tread data_local_motion_model ..") t1.readStar(f1, tableName='local_motion_model') self.assertEqual(len(t1), 36, "Number of rows check failed!") self._checkColumns(t1, ['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff']) coeffs = [int(v) for v in t1.getColumnValues('rlnMotionModelCoeffsIdx')] self.assertEqual(coeffs, list(range(36)), "rlnMotionModelCoeffsIdx check failed") f1.close()
def run_job(args): start = time.time() in_mics = args.in_mics job_dir = args.out_dir thresh = args.threshold box_size = args.box_size distance = 0 model = args.model filament = args.filament if filament: box_dist = args.box_distance min_boxes = args.minimum_number_boxes denoise = args.denoise gpus = args.gpu threads = args.threads if SCRATCH_DIR is not None: filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp") else: filtered_dir = "%s/filtered_tmp/" % job_dir if model == "None": model = CRYOLO_GEN_MODEL if not denoise else CRYOLO_GEN_JANNI_MODEL else: model = os.path.abspath(model) # Making a cryolo config file json_dict = { "model": { "architecture": "PhosaurusNet", "input_size": 1024, "max_box_per_image": 600, "filter": [0.1, filtered_dir] }, "other": { "log_path": "%s/logs/" % job_dir } } if box_size: # is not 0 json_dict["model"]["anchors"] = [int(box_size), int(box_size)] if not filament: distance = int(box_size / 2) # use half the box_size if denoise: json_dict["model"]["filter"] = [ CRYOLO_JANNI_MODEL, 24, 3, filtered_dir ] if DEBUG: print("Using following config: ", json_dict) with open(os.path.join(job_dir, "config_cryolo.json"), "w") as json_file: json.dump(json_dict, json_file, indent=4) # Reading the micrographs star file from Relion mictable = Table(fileName=in_mics, tableName='micrographs') mic_fns = mictable.getColumnValues("rlnMicrographName") # Launching cryolo args_dict = { '--conf': os.path.join(job_dir, "config_cryolo.json"), '--input': in_mics, '--output': os.path.join(job_dir, 'output'), '--weights': model, '--gpu': gpus.replace(',', ' '), '--threshold': thresh, '--distance': distance, '--cleanup': "", '--skip': "", '--write_empty': "", '--num_cpu': -1 if threads == 1 else threads } if filament: args_dict.update({ '--filament': "", '--box_distance': box_dist, '--minimum_number_boxes': min_boxes, '--directional_method': 'PREDICTED' }) args_dict.pop('--distance') cmd = "%s && %s " % (CONDA_ENV, CRYOLO_PREDICT) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Moving output star files for Relion to use table_coords = Table( columns=['rlnMicrographName', 'rlnMicrographCoordinates']) star_dir = "EMAN_HELIX_SEGMENTED" if filament else "STAR" ext = ".box" if filament else ".star" with open(os.path.join(job_dir, "autopick.star"), "w") as mics_star: for mic in mic_fns: mic_base = os.path.basename(mic) mic_dir = os.path.dirname(mic) if len(mic_dir.split("/")) > 1 and "job" in mic_dir.split( "/")[1]: # remove JobType/jobXXX mic_dir = "/".join(mic_dir.split("/")[2:]) os.makedirs(os.path.join(job_dir, mic_dir), exist_ok=True) coord_cryolo = os.path.splitext(mic_base)[0] + ext coord_cryolo = os.path.join(job_dir, "output", star_dir, coord_cryolo) coord_relion = os.path.splitext(mic_base)[0] + "_autopick" + ext coord_relion = os.path.join(job_dir, mic_dir, coord_relion) if os.path.exists(coord_cryolo): os.rename(coord_cryolo, coord_relion) table_coords.addRow(mic, coord_relion) if DEBUG: print("Moved %s to %s" % (coord_cryolo, coord_relion)) table_coords.writeStar(mics_star, tableName='coordinate_files') # Required output to mini pipeline job_pipeline.star file pipeline_fn = os.path.join(job_dir, "job_pipeline.star") table_gen = Table(columns=['rlnPipeLineJobCounter']) table_gen.addRow(2) table_proc = Table(columns=[ 'rlnPipeLineProcessName', 'rlnPipeLineProcessAlias', 'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel' ]) table_proc.addRow(job_dir, 'None', 'relion.external', 'Running') table_nodes = Table( columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel']) table_nodes.addRow(in_mics, "MicrographsData.star.relion") table_nodes.addRow(os.path.join(job_dir, "autopick.star"), "MicrographsCoords.star.relion.autopick") table_input = Table( columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess']) table_input.addRow(in_mics, job_dir) table_output = Table( columns=['rlnPipeLineEdgeProcess', 'rlnPipeLineEdgeToNode']) table_output.addRow(job_dir, os.path.join(job_dir, "autopick.star")) with open(pipeline_fn, "w") as f: table_gen.writeStar(f, tableName="pipeline_general", singleRow=True) table_proc.writeStar(f, tableName="pipeline_processes") table_nodes.writeStar(f, tableName="pipeline_nodes") table_input.writeStar(f, tableName="pipeline_input_edges") table_output.writeStar(f, tableName="pipeline_output_edges") # Register output nodes in .Nodes/ os.makedirs(os.path.join(".Nodes", "MicrographsCoords", job_dir), exist_ok=True) open(os.path.join(".Nodes", "MicrographsCoords", job_dir, "autopick.star"), "w").close() outputFn = os.path.join(job_dir, "output_for_relion.star") if not os.path.exists(outputFn): # get estimated box size summaryfn = os.path.join(job_dir, "output/DISTR", 'size_distribution_summary*.txt') with open(glob(summaryfn)[0]) as f: for line in f: if line.startswith("MEAN,"): estim_sizepx = int(line.split(",")[-1]) break print("\ncrYOLO estimated box size %d px" % estim_sizepx) # calculate diameter, original (boxSize) and downsampled (boxSizeSmall) box optics = Table(fileName=in_mics, tableName='optics') angpix = float(optics[0].rlnMicrographPixelSize) if filament: # box size = 1.5x tube diam diam = 0.66 * box_size else: # use + 20% for diameter diam = math.ceil(estim_sizepx * angpix * 1.2) # use +30% for box size, make it even boxSize = 1.3 * estim_sizepx boxSize = math.ceil(boxSize / 2.) * 2 # from relion_it.py script # Authors: Sjors H.W. Scheres, Takanori Nakane & Colin M. Palmer boxSizeSmall = None for box in (48, 64, 96, 128, 160, 192, 256, 288, 300, 320, 360, 384, 400, 420, 450, 480, 512, 640, 768, 896, 1024): # Don't go larger than the original box if box > boxSize: boxSizeSmall = boxSize break # If Nyquist freq. is better than 7.5 A, use this # downscaled box, otherwise continue to next size up small_box_angpix = angpix * boxSize / box if small_box_angpix < 3.75: boxSizeSmall = box break print( "\nSuggested parameters:\n\tDiameter (A): %d\n\tBox size (px): %d\n" "\tBox size binned (px): %d" % (diam, boxSize, boxSizeSmall)) # output all params into a star file tableCryolo = Table(columns=[ 'rlnParticleDiameter', 'rlnOriginalImageSize', 'rlnImageSize' ]) tableCryolo.addRow(diam, boxSize, boxSizeSmall) with open(outputFn, "w") as f: tableCryolo.writeStar(f, tableName='picker') # create .gui_manualpickjob.star for easy display starString = """ # version 30001 data_job _rlnJobTypeLabel relion.manualpick%s _rlnJobIsContinue 0 _rlnJobIsTomo 0 # version 30001 data_joboptions_values loop_ _rlnJobOptionVariable #1 _rlnJobOptionValue #2 angpix %f black_val 0 blue_value 0 color_label rlnParticleSelectZScore diameter %d do_color No do_fom_threshold No do_queue No do_startend No fn_color "" fn_in "" highpass -1 lowpass 20 micscale 0.2 min_dedicated 1 minimum_pick_fom 0 other_args "" qsub qsub qsubscript /public/EM/RELION/relion/bin/relion_qsub.csh queuename openmpi red_value 2 sigma_contrast 3 white_val 0 """ label = ".helical" if filament else "" with open(".gui_manualpickjob.star", "w") as f: f.write(starString % (label, angpix, diam)) end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def _getInputMicFns(self, numPass): """ Parse input star file and get a list of mics. """ table = Table(fileName=self.getInputFilename(numPass), tableName='') micNames = table.getColumnValues('rlnMicrographName') return micNames