def cluster_traject(resname, trajToDistribute, columnToChoose, distance_contact, clusterThreshold, path_to_cluster, output_path, mapping_out, epsilon=0.5, report_basename="report", condition="min", metricweights="linear", nclusters=5): outputPathConst = constants.OutputPathConstants(output_path) outputPathConst.tmpFolder = output_path outputPathConst.buildTmpFolderConstants(outputPathConst.tmpFolder) utilities.makeFolder(outputPathConst.tmpFolder) thresholdCalc = thresholdcalculator.ThresholdCalculatorConstant( value=clusterThreshold) similarityEval = clustering.CMSimilarityEvaluator("Jaccard") clusteringObject = clustering.ContactMapAccumulativeClustering( thresholdCalc, similarityEval, resname=resname, reportBaseFilename=report_basename, columnOfReportFile=columnToChoose, contactThresholdDistance=distance_contact, altSelection=True) clusteringObject.cluster([path_to_cluster], ignoreFirstRow=True) spawning_params = spawning.SpawningParams() spawning_params.reportFilename = report_basename spawning_params.epsilon = epsilon spawning_params.nclusters = nclusters spawning_params.metricWeights = metricweights spawning_params.condition = condition density = densitycalculator.NullDensityCalculator() spawningObject = spawning.EpsilonDegeneracyCalculator( spawning_params, density) degeneracy = spawningObject.calculate(clusteringObject.clusters, trajToDistribute, spawning_params) spawningObject.log() _, procMapping = spawningObject.writeSpawningInitialStructures( outputPathConst, degeneracy, clusteringObject, 0) processorManagerFilename = "processorMapping.txt" utilities.writeProcessorMappingToDisk(mapping_out, processorManagerFilename, procMapping)
def main(jsonParams, clusteringHook=None): """ Main body of the adaptive sampling program. :param jsonParams: A string with the name of the control file to use :type jsonParams: str """ controlFileValidator.validate(jsonParams) generalParams, spawningBlock, simulationrunnerBlock, clusteringBlock = loadParams( jsonParams) spawningAlgorithmBuilder = spawning.SpawningAlgorithmBuilder() spawningCalculator = spawningAlgorithmBuilder.build(spawningBlock) runnerbuilder = simulationrunner.RunnerBuilder() simulationRunner = runnerbuilder.build(simulationrunnerBlock) restart = generalParams.get(blockNames.GeneralParams.restart, True) debug = generalParams.get(blockNames.GeneralParams.debug, False) outputPath = generalParams[blockNames.GeneralParams.outputPath] initialStructuresWildcard = generalParams[ blockNames.GeneralParams.initialStructures] writeAll = generalParams.get(blockNames.GeneralParams.writeAllClustering, False) nativeStructure = generalParams.get( blockNames.GeneralParams.nativeStructure, '') resname = clusteringBlock[blockNames.ClusteringTypes.params].get( blockNames.ClusteringTypes.ligandResname) if resname is None: # check if resname is provided in the simulation block resname = simulationRunner.getResname() initialStructures = expandInitialStructuresWildcard( initialStructuresWildcard) if not initialStructures: raise InitialStructuresError("No initial structures found!!!") if len(initialStructures) > simulationRunner.getWorkingProcessors(): raise InitialStructuresError( "Error: More initial structures than Working Processors found!!!") if resname is not None: checkSymmetryDict(clusteringBlock, initialStructures, resname) outputPathConstants = constants.OutputPathConstants(outputPath) if not debug: atexit.register(utilities.cleanup, outputPathConstants.tmpFolder) simulationRunner.unifyReportNames( spawningCalculator.parameters.reportFilename) utilities.makeFolder(outputPath) utilities.makeFolder(outputPathConstants.tmpFolder) utilities.makeFolder(outputPathConstants.topologies) processManager = ProcessesManager(outputPath, simulationRunner.getNumReplicas()) firstRun = findFirstRun(outputPath, outputPathConstants.clusteringOutputObject, simulationRunner, restart) if processManager.isMaster(): printRunInfo(restart, debug, simulationRunner, spawningCalculator, clusteringBlock, outputPath, initialStructuresWildcard) saveInitialControlFile(jsonParams, outputPathConstants.originalControlFile) processManager.barrier() # once the replicas are properly syncronized there is no need for the # process files, and erasing them allows us to restart simulations cleanProcessesFiles(processManager.syncFolder) topologies = utilities.Topology(outputPathConstants.topologies) if restart and firstRun is not None: topology_files = glob.glob( os.path.join(outputPathConstants.topologies, "topology*.pdb")) topology_files.sort(key=utilities.getTrajNum) topologies.setTopologies(topology_files) if firstRun == 0: createMappingForFirstEpoch(initialStructures, topologies, simulationRunner.getWorkingProcessors()) clusteringMethod, initialStructuresAsString = buildNewClusteringAndWriteInitialStructuresInNewSimulation( debug, jsonParams, outputPathConstants, clusteringBlock, spawningCalculator.parameters, initialStructures, simulationRunner, processManager) else: clusteringMethod, initialStructuresAsString = buildNewClusteringAndWriteInitialStructuresInRestart( firstRun, outputPathConstants, clusteringBlock, spawningCalculator.parameters, spawningCalculator, simulationRunner, topologies, processManager) if processManager.isMaster(): checkMetricExitConditionMultipleTrajsinRestart( firstRun, outputPathConstants.epochOutputPathTempletized, simulationRunner) processManager.barrier() if firstRun is None or not restart: topologies.setTopologies(initialStructures) if processManager.isMaster(): if not debug: cleanPreviousSimulation(outputPath, outputPathConstants.allTrajsPath) writeTopologyFiles(initialStructures, outputPathConstants.topologies) processManager.barrier() firstRun = 0 # if restart false, but there were previous simulations if simulationRunner.parameters.runEquilibration: initialStructures = simulationRunner.equilibrate( initialStructures, outputPathConstants, spawningCalculator.parameters.reportFilename, outputPath, resname, processManager, topologies) # write the equilibration structures for each replica processManager.writeEquilibrationStructures( outputPathConstants.tmpFolder, initialStructures) if processManager.isMaster( ) and simulationRunner.parameters.constraints: # write the new constraints for synchronization utilities.writeNewConstraints( outputPathConstants.topologies, "new_constraints.txt", simulationRunner.parameters.constraints) processManager.barrier() if not processManager.isMaster( ) and simulationRunner.parameters.constraints: simulationRunner.parameters.constraints = utilities.readConstraints( outputPathConstants.topologies, "new_constraints.txt") # read all the equilibration structures initialStructures = processManager.readEquilibrationStructures( outputPathConstants.tmpFolder) topologies.setTopologies(initialStructures, cleanFiles=processManager.isMaster()) if processManager.isMaster(): writeTopologyFiles(initialStructures, outputPathConstants.topologies) # ensure that topologies are written processManager.barrier() topology_files = glob.glob( os.path.join(outputPathConstants.topologies, "topology*.pdb")) topology_files.sort(key=utilities.getTrajNum) topologies.setTopologies(topology_files, cleanFiles=False) createMappingForFirstEpoch(initialStructures, topologies, simulationRunner.getWorkingProcessors()) clusteringMethod, initialStructuresAsString = buildNewClusteringAndWriteInitialStructuresInNewSimulation( debug, jsonParams, outputPathConstants, clusteringBlock, spawningCalculator.parameters, initialStructures, simulationRunner, processManager) if processManager.isMaster(): repeat, numSteps = simulationRunner.getClusteringInfo() clusteringMethod.updateRepeatParameters(repeat, numSteps) clusteringMethod.setProcessors(simulationRunner.getWorkingProcessors()) if simulationRunner.parameters.modeMovingBox is not None and simulationRunner.parameters.boxCenter is None: simulationRunner.parameters.boxCenter = simulationRunner.selectInitialBoxCenter( initialStructuresAsString, resname) for i in range(firstRun, simulationRunner.parameters.iterations): if processManager.isMaster(): utilities.print_unbuffered("Iteration", i) outputDir = outputPathConstants.epochOutputPathTempletized % i utilities.makeFolder(outputDir) simulationRunner.writeMappingToDisk( outputPathConstants.epochOutputPathTempletized % i) topologies.writeMappingToDisk( outputPathConstants.epochOutputPathTempletized % i, i) if i == 0: # write the object to file at the start of the first epoch, so # the topologies can always be loaded topologies.writeTopologyObject() processManager.barrier() if processManager.isMaster(): utilities.print_unbuffered("Production run...") if not debug: simulationRunner.runSimulation( i, outputPathConstants, initialStructuresAsString, topologies, spawningCalculator.parameters.reportFilename, processManager) processManager.barrier() if processManager.isMaster(): if simulationRunner.parameters.postprocessing: simulationRunner.processTrajectories( outputPathConstants.epochOutputPathTempletized % i, topologies, i) utilities.print_unbuffered("Clustering...") startTime = time.time() clusterEpochTrajs(clusteringMethod, i, outputPathConstants.epochOutputPathTempletized, topologies, outputPathConstants) endTime = time.time() utilities.print_unbuffered("Clustering ligand: %s sec" % (endTime - startTime)) if clusteringHook is not None: clusteringHook(clusteringMethod, outputPathConstants, simulationRunner, i + 1) clustersList = clusteringMethod.getClusterListForSpawning() clustersFiltered = [True for _ in clusteringMethod] if simulationRunner.parameters.modeMovingBox is not None: simulationRunner.getNextIterationBox( outputPathConstants.epochOutputPathTempletized % i, resname, topologies, i) if processManager.isMaster(): clustersList, clustersFiltered = clusteringMethod.filterClustersAccordingToBox( simulationRunner.parameters) if processManager.isMaster(): if spawningCalculator.parameters.filterByMetric: clustersList, clustersFiltered = clusteringMethod.filterClustersAccordingToMetric( clustersFiltered, spawningCalculator.parameters.filter_value, spawningCalculator.parameters.condition, spawningCalculator.parameters.filter_col) degeneracyOfRepresentatives = spawningCalculator.calculate( clustersList, simulationRunner.getWorkingProcessors(), i, outputPathConstants=outputPathConstants) spawningCalculator.log() # this method only does works with MSM-based spwaning methods, # creating a plot of the stationary distribution and the PMF, for # the rest of methods it does nothing spawningCalculator.createPlots(outputPathConstants, i, clusteringMethod) if degeneracyOfRepresentatives is not None: if simulationRunner.parameters.modeMovingBox is not None or spawningCalculator.parameters.filterByMetric: degeneracyOfRepresentatives = mergeFilteredClustersAccordingToBox( degeneracyOfRepresentatives, clustersFiltered) utilities.print_unbuffered("Degeneracy", degeneracyOfRepresentatives) assert len(degeneracyOfRepresentatives) == len( clusteringMethod) else: # When using null or independent spawning the calculate method returns None assert spawningCalculator.type in spawningTypes.SPAWNING_NO_DEGENERACY_TYPES, "calculate returned None with spawning type %s" % spawningTypes.SPAWNING_TYPE_TO_STRING_DICTIONARY[ spawningCalculator.type] clusteringMethod.writeOutput( outputPathConstants.clusteringOutputDir % i, degeneracyOfRepresentatives, outputPathConstants.clusteringOutputObject % i, writeAll) simulationRunner.cleanCheckpointFiles( outputPathConstants.epochOutputPathTempletized % i) if i > 0: # Remove old clustering object, since we already have a newer one try: os.remove(outputPathConstants.clusteringOutputObject % (i - 1)) except OSError: # In case of restart pass # Prepare for next pele iteration if i != simulationRunner.parameters.iterations - 1: # Differentiate between null spawning and the rest of spawning # methods if spawningCalculator.shouldWriteStructures(): if processManager.isMaster(): _, procMapping = spawningCalculator.writeSpawningInitialStructures( outputPathConstants, degeneracyOfRepresentatives, clusteringMethod, i + 1, topologies=topologies) utilities.writeProcessorMappingToDisk( outputPathConstants.tmpFolder, "processMapping.txt", procMapping) processManager.barrier() if not processManager.isMaster(): procMapping = utilities.readProcessorMappingFromDisk( outputPathConstants.tmpFolder, "processMapping.txt") simulationRunner.updateMappingProcessors(procMapping) topologies.mapEpochTopologies(i + 1, procMapping) initialStructuresAsString = simulationRunner.createMultipleComplexesFilenames( simulationRunner.getWorkingProcessors(), outputPathConstants.tmpInitialStructuresTemplate, i + 1) if processManager.isMaster(): topologies.writeTopologyObject() if clusteringMethod.symmetries and nativeStructure: fixReportsSymmetry( outputPathConstants.epochOutputPathTempletized % i, resname, nativeStructure, clusteringMethod.symmetries, topologies) # check exit condition, if defined if simulationRunner.hasExitCondition(): if simulationRunner.checkExitCondition( clusteringMethod, outputPathConstants.epochOutputPathTempletized % i): utilities.print_unbuffered( "Simulation exit condition met at iteration %d, stopping" % i) # send a signal to all possible adaptivePELE copies to stop for pid in processManager.lockInfo: if pid != processManager.pid: os.kill(pid, signal.SIGTERM) break else: utilities.print_unbuffered( "Simulation exit condition not met at iteration %d, continuing..." % i) processManager.barrier()