示例#1
0
def multipleCore(inputFile, outputDir, skipHDF5):

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    world = MPI.COMM_WORLD
    myMPI.rankPrint(world, 'Running EMinv1D_MCMC')
    myMPI.rankPrint(world, 'Using user input file {}'.format(inputFile))
    rank = world.rank
    nRanks = world.size
    masterRank = rank == 0

    # Start keeping track of time.
    t0 = MPI.Wtime()
    t1 = t0

    UP = import_module(inputFile, package=None)

    # Make data and system filenames lists of str.
    if isinstance(UP.dataFilename, str):
        UP.dataFilename = [UP.dataFilename]
    if isinstance(UP.systemFilename, str):
        UP.systemFilename = [UP.systemFilename]

    # Everyone needs the system classes read in early.
    Dataset = eval(customFunctions.safeEval(UP.dataInit))
    Dataset.readSystemFile(UP.systemFilename)

    # Get the number of points in the file.
    if masterRank:
        nPoints = Dataset._readNpoints(UP.dataFilename)
        assert (nRanks - 1 <= nPoints + 1), Exception(
            'Do not ask for more cores than you have data points! Cores:nData {}:{} '
            .format(nRanks, nPoints))

    # Create a communicator containing only the master rank.
    allGroup = world.Get_group()
    masterGroup = allGroup.Incl([0])
    masterComm = world.Create(masterGroup)

    # Create a parallel RNG on each worker with a different seed.
    prng = myMPI.getParallelPrng(world, MPI.Wtime)

    myMPI.rankPrint(world,
                    'Creating HDF5 files, this may take a few minutes...')
    myMPI.rankPrint(
        world,
        'Files are being created for data files {} and system files {}'.format(
            UP.dataFilename, UP.systemFilename))
    ### Only do this using the Master subcommunicator!
    # Here we initialize the HDF5 files.
    if (masterComm != MPI.COMM_NULL):

        # Make sure the results folders exist
        try:
            makedirs(outputDir)
        except:
            pass

        # Prepare the dataset so that we can read a point at a time.
        Dataset._initLineByLineRead(UP.dataFilename, UP.systemFilename)
        # Get a datapoint from the file.
        DataPoint = Dataset._readSingleDatapoint()

        Dataset._closeDatafiles()

        # While preparing the file, we need access to the line numbers and fiducials in the data file
        tmp = fileIO.read_columns(UP.dataFilename[0],
                                  Dataset._indicesForFile[0][:2], 1, nPoints)

        Dataset._openDatafiles(UP.dataFilename)

        # Get the line numbers in the data
        lineNumbers = np.unique(tmp[:, 0])
        lineNumbers.sort()
        nLines = lineNumbers.size
        fiducials = tmp[:, 1]

        # Read in the user parameters
        paras = UP.userParameters(DataPoint)

        # Check the parameters
        paras.check(DataPoint)

        # Initialize the inversion to obtain the sizes of everything
        [paras, Mod, D, prior, posterior, PhiD] = Initialize(paras,
                                                             DataPoint,
                                                             prng=prng)

        # Create the results template
        Res = Results(D,
                      Mod,
                      save=paras.save,
                      plot=paras.plot,
                      savePNG=paras.savePNG,
                      nMarkovChains=paras.nMarkovChains,
                      plotEvery=paras.plotEvery,
                      parameterDisplayLimits=paras.parameterDisplayLimits,
                      reciprocateParameters=paras.reciprocateParameters)

        # For each line. Get the fiducials, and create a HDF5 for the Line results.
        # A line results file needs an initialized Results class for a single data point.
        for line in lineNumbers:
            fiducialsForLine = np.where(tmp[:, 0] == line)[0]
            nFids = fiducialsForLine.size
            # Create a filename for the current line number
            fName = join(outputDir, '{}.h5'.format(line))
            # Open a HDF5 file in parallel mode.
            with h5py.File(fName, 'w', driver='mpio', comm=masterComm) as f:
                LR = LineResults()
                LR.createHdf(f, tmp[fiducialsForLine, 1], Res)
            myMPI.rankPrint(
                world,
                'Time to create the line with {} data points: {:.3f} s'.format(
                    nFids,
                    MPI.Wtime() - t0))
            t0 = MPI.Wtime()

        myMPI.print('Initialized Results for writing.')

    # Everyone needs the line numbers in order to open the results files collectively.
    if masterRank:
        DataPointType = DataPoint.hdfName()
    else:
        lineNumbers = None
        DataPointType = None
    lineNumbers = myMPI.Bcast(lineNumbers, world)
    nLines = lineNumbers.size

    DataPointType = world.bcast(DataPointType)

    # Open the files collectively
    LR = [None] * nLines
    for i, line in enumerate(lineNumbers):
        fName = join(outputDir, '{}.h5'.format(line))
        LR[i] = LineResults(fName,
                            hdfFile=h5py.File(fName,
                                              'a',
                                              driver='mpio',
                                              comm=world))

    world.barrier()
    myMPI.rankPrint(world,
                    'Files Created in {:.3f} s'.format(MPI.Wtime() - t1))
    t0 = MPI.Wtime()

    # Carryout the master-worker tasks
    if (world.rank == 0):
        masterTask(Dataset, world)
    else:
        DataPoint = eval(customFunctions.safeEval(DataPointType))
        workerTask(DataPoint, UP, prng, world, lineNumbers, LR)

    world.barrier()
    # Close all the files. Must be collective.
    for i in range(nLines):
        LR[i].close()

    if masterRank:
        Dataset._closeDatafiles()
示例#2
0
def multipleCore(inputFile, outputDir, skipHDF5):

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    world = MPI.COMM_WORLD
    myMPI.rankPrint(world, 'Running EMinv1D_MCMC')

    UP = import_module(inputFile, package=None)

    AllData = eval(UP.dataInit)
    # Initialize the data object on master
    if (world.rank == 0):
        AllData.read(UP.dataFname, UP.sysFname)

    myData = AllData.Bcast(world)
    if (world.rank == 0): myData = AllData

    myMPI.rankPrint(world, 'Data Broadcast')

    assert (
        world.size <= myData.N + 1
    ), 'Do not ask for more cores than you have data points! Cores:nData ' + str(
        [world.size, myData.N])

    allGroup = world.Get_group()
    masterGroup = allGroup.Incl([0])
    masterComm = world.Create(masterGroup)

    t0 = MPI.Wtime()
    t1 = t0

    prng = myMPI.getParallelPrng(world, MPI.Wtime)

    # Make sure the line results folders exist
    try:
        makedirs(outputDir)
    except:
        pass

    # Get a datapoint, it doesnt matter which one
    DataPoint = myData.getDataPoint(0)
    # Read in the user parameters
    paras = UP.userParameters(DataPoint)
    # Check the parameters
    paras.check(DataPoint)
    # Initialize the inversion to obtain the sizes of everything
    [paras, Mod, D, prior, posterior, PhiD] = Initialize(paras,
                                                         DataPoint,
                                                         prng=prng)
    # Create the results template
    Res = Results(paras.save, paras.plot, paras.savePNG, paras, D, Mod)

    world.barrier()
    myMPI.rankPrint(world, 'Initialized Results')

    # Get the line numbers in the data
    lines = np.unique(myData.line)
    lines.sort()
    nLines = lines.size

    world.barrier()

    myMPI.rankPrint(world,
                    'Creating HDF5 files, this may take a few minutes...')
    ### Only do this using the subcommunicator!
    if (masterComm != MPI.COMM_NULL):
        for i in range(nLines):
            j = np.where(myData.line == lines[i])[0]
            fName = join(outputDir, str(lines[i]) + '.h5')
            with h5py.File(fName, 'w', driver='mpio', comm=masterComm) as f:
                LR = LineResults()
                LR.createHdf(f, myData.id[j], Res)
            myMPI.rankPrint(
                world,
                'Time to create the line with {} data points: {:.3f} s'.format(
                    j.size,
                    MPI.Wtime() - t0))
            t0 = MPI.Wtime()

    world.barrier()

    # Open the files collectively
    LR = [None] * nLines
    for i in range(nLines):
        fName = join(outputDir, str(lines[i]) + '.h5')
        LR[i] = LineResults(fName,
                            hdfFile=h5py.File(fName,
                                              'a',
                                              driver='mpio',
                                              comm=world))
        # myMPI.print("rank {} line {} iDs {}".format(world.rank, i, LR[i].iDs))

    world.barrier()
    myMPI.rankPrint(world,
                    'Files Created in {:.3f} s'.format(MPI.Wtime() - t1))
    t0 = MPI.Wtime()

    # Carryout the master-worker tasks
    if (world.rank == 0):
        masterTask(myData, world)
    else:
        workerTask(myData, UP, prng, world, LR)

    world.barrier()
    # Close all the files
    for i in range(nLines):
        LR[i].close()