Exemplo n.º 1
0
def transpose(args):
    print("Loading survey...")
    survey_info = common.getdict(args.survey_file)

    print("Success!")
    with Pool(processes=12) as pool:
        pool.map(proconesurvey, survey_info)
Exemplo n.º 2
0
def statsrun(args):
    all_settings = common.getdict(args.settings)
    binsize = int(all_settings["binsize"])
    outputFile = all_settings["output_filename"]
    filename = all_settings["survey_filename"]
    chop = float(all_settings["chop"])
    if all_settings["model_override"] is not None:
        override = common.getdict(all_settings["model_override"])
    else:
        override = None

    if "many" in all_settings and all_settings["many"] == True:
        num_files = all_settings["num_files"]
        for x in range(num_files):
            singlerun(filename.format(x), outputFile.format(x), binsize, chop,
                      override)
    else:
        singlerun(filename, outputFile, binsize, chop, override)
Exemplo n.º 3
0
def statsrun(args):

    #When having problems with dividing by zero, we can debug more easily by having execution
    #stop completely when we encounter one, instead of continuing on with only a warning

    np.seterr(all='raise')
    all_settings = common.getdict(args.settings)
    binsize = all_settings["binsize"]
    outputFile = all_settings["output_filename"]
    filename = all_settings["survey_filename"]
    chop = float(all_settings["chop"])
    if all_settings["model_override"] is not None:
        override = common.getdict(all_settings["model_override"])
    else:
        override = None

    if "many" in all_settings and all_settings["many"] == True:
        num_files = all_settings["num_files"]
        for x in range(num_files):
            singlerun(filename.format(x), outputFile.format(x), binsize, chop,
                      override)
    else:
        singlerun(filename, outputFile, binsize, chop, override)
Exemplo n.º 4
0
def transpose(args):
    print("Loading survey...")
    hubble_constant = 100
    fractional_error = 0.1
    use_dvs = False;
    survey_info = common.getdict(args.survey_file)
    #print(survey_info)
    print("Success!")
    for survey in survey_info:
        outCF2String = ""
        print("Processing survey {}.".format(survey['name']))
        with open(survey['name'],'r') as csvFile:
            for line in csvFile:
                if line[0] == '#':
                    continue
                galaxy=common.MillenniumGalaxy(line)
                center = survey['center']
                rotationMatrix = np.matrix(survey['rot'])
                ontoGalaxy = np.array([galaxy.x-center[0],galaxy.y-center[1],galaxy.z-center[2]])
                #ontoGalaxy is the vector from the survey origin to the galaxy
                rotatedCoord = ontoGalaxy #* rotationMatrix
                x = rotatedCoord.item(0)
                y = rotatedCoord.item(1)
                z = rotatedCoord.item(2)
                rho = space.distance.euclidean(ontoGalaxy,[0,0,0])
                phi = math.degrees(math.acos(z/rho)) - 90
                theta = math.degrees(math.atan2(y,x))+ 180
                peculiarVel = np.dot(ontoGalaxy,[galaxy.velX,galaxy.velY,galaxy.velZ])/rho
                #posVec = ontoGalaxy/space.distance.euclidean(ontoGalaxy,(0,0,0))
                cf2row = [rho*hubble_constant+peculiarVel,#cz
                          rho,#distance (mpc/h)
                          peculiarVel,#peculiar velocity km/sec
                          rho*hubble_constant*0.2,#dv
                          theta,#longitude degrees - 0 - 360
                          phi]#latitude degrees - -90 - 90
                outCF2String = outCF2String + '{}  {}  {}  {}  {}  {}'.format(*cf2row)
                if use_dvs:
                    dvs = np.random.normal(peculiarVel,rho*hubble_constant*0.2,20)
                    for x in dvs:
                        outCF2String = outCF2String + '  {}'.format(x)
                outCF2String = outCF2String + '\n'
 
        with open(survey['name'] + '_cf2.txt', 'w') as cf2outfile:
            cf2outfile.write(outCF2String)
Exemplo n.º 5
0
def main(args):
    np.seterr(divide='ignore',invalid='ignore')
    """ Compute the velocity correlations on one or many galaxy surveys. 
    """
    #Get setup information from the settings files
    settings =   common.getdict(args.settings)
    if settings['num_files'] != 10000 and settings['use_npy']:
        print("Sorry! We can only handle 100x100 surveys. Try turning off the use_npy flag.")
        exit()
    numpoints =    settings['numpoints']
    dr =           settings['dr']
    min_r =        settings['min_r']
    orig_outfile = settings['output_file_name']
    step_type =    settings['step_type']
    infile =       settings['input_file']
    unitslist =    settings['binunits']
    maxd_master =  settings['max_distance']
    numpy =        settings['use_npy']
    use_tmp =      settings['use_tmp']
    if settings['many_squared']:
        distance_args_master = list(zip(dr,min_r,numpoints))
        file_schemes  = list(zip(infile,orig_outfile,settings['readable_name']))
        xintervals = [common.genBins(x[1],x[2],x[0],step_type) for x in distance_args_master]
        xs_master = [a[0] for a in xintervals]
        intervals_master = [a[1] for a in xintervals]
    else:
        #Everything is built around lists now, so we just build lists of length one!
        distance_args_master = [(dr,min_r,numpoints)]
        file_schemes = [(infile,orig_outfile,settings['readable_name'])]
        xs_master,intervals_master = common.genBins(min_r,numpoints,dr,step_type)
        xs_master = [xs_master]
        intervals_master = [intervals_master]
    if numpy:
        if args.override:
            print(args.override)
            indices = args.override.split(':')
            a = int(indices[0])
            b = int(indices[1])
            file_schemes = file_schemes[a:b]
        print(file_schemes)
    else:    
        infileindices = [x + settings['offset'] for x in range(settings['num_files'])]
    for rawInFile, outfile, readName in file_schemes:
        for units in unitslist:
            if units == 'km/s':
                
                xs = [[x * 100 for x in y] for y in xs_master]
                intervals = [[x * 100 for x in y] for y in intervals_master]
                distance_args = [(x[0]*100,x[1]*100,x[2]) for x in distance_args_master]
                maxd = maxd_master * 100
            else:
                xs = xs_master
                intervals = intervals_master
                distance_args = distance_args_master
                maxd = maxd_master
                
            if settings['many'] and not numpy:
                d = filemanagement(rawInFile,infileindices)
                i = d
            elif not numpy:
                galaxies = common.loadData(rawInFile,dataType='CF2')
                d = [np.array([(g.normx,
                                g.normy,
                                g.normz,
                                g.redx,
                                g.redy,
                                g.redz,
                                g.dv,
                                g.d,
                                g.v) for g in galaxies])]
                i = ['nothing']
            else:
                print("Loading numpy file...")
                data = np.load(rawInFile)
                print("Handling NaNs")
                nansremoved = [ data[x][np.invert(np.isnan(data[x][:,0]))] for x in range(100)]
                del data
                #for x in range(100):
                #    np.save('/tmp/c156r133-{}/vcorr-{}'.format(b,x),nansremoved[x])
                #df = ['/tmp/c156r133-{}/vcorr-{}.npy'.format(b,x//100) for x in range(10000) ]
                d = [ nansremoved[x//100] for x in range(10000) ]
                i = [ x%100  for x in range(10000) ]
                #print(d[542].shape)
            print("Opening Pool...")
            gc.collect()
            with Pool(processes=NUM_PROCESSES) as pool:
                print("Generating Histograms...")
                histogramData = list(pool.starmap(turboRun,zip(d,i,itertools.repeat(numpy),
                                                               itertools.repeat(maxd),
                                                               itertools.repeat(units),
                                                               itertools.repeat(xs),
                                                               itertools.repeat(intervals),
                                                               itertools.repeat(use_tmp)
                                                           )))
                """
                Each turbo run returns a list of histograms [ 5-length histogram, 10-length histogram, 20-length etc]
                so histogramData is a list of turbo runs, which means data is a jagged array
                data = [
                
                [ [ ----- ],
                  [ ---------- ],
                  [ -------------------- ] ],
                
                [ [ ----- ],
                  [ ---------- ],
                  [ -------------------- ] ],
                
                ]
                """
            for scheme_index in range(len(intervals)):
                hist_for_scheme = np.array([turbo_data[scheme_index] for turbo_data in histogramData])
                saveOutput(hist_for_scheme,outfile.format('',distance_args[scheme_index][0],units.replace('/','')))
            print(" Done!")
Exemplo n.º 6
0
import hashlib
#import pdb
from numpy.core.umath_tests import inner1d #Note: this function has no documentation and could easily be deprecated.
#if that happens, you can always use the syntax (a*b).sum(axis=1), which is ever so slightly slower and much more
#ugly.
#Alternate definition:
# def inner1d(a,b):
#     return (a*b).sum(axis=1)
import gc
import time
import os
#import smtplib
#import matplotlib.ticker as mtick

#These are loaded in by every process. You can't modify them, or the processes will act weird.
GLOBAL_SETTINGS = common.getdict('global_settings_vcorr.json')
TEMP_DIRECTORY = GLOBAL_SETTINGS['tmp']#"tmp/"
NUM_PROCESSES= GLOBAL_SETTINGS['num_processors']#8
STAGGER_PROCESSES = GLOBAL_SETTINGS['stagger']

#PERFECT_LOCATION = "output/PERFECT_DONTTOUCH/COMPOSITE-MOCK-bin-{:.0f}-{}.npy"
def main(args):
    np.seterr(divide='ignore',invalid='ignore')
    """ Compute the velocity correlations on one or many galaxy surveys. 
    """
    #Get setup information from the settings files
    settings =   common.getdict(args.settings)
    if settings['num_files'] != 10000 and settings['use_npy']:
        print("Sorry! We can only handle 100x100 surveys. Try turning off the use_npy flag.")
        exit()
    numpoints =    settings['numpoints']
Exemplo n.º 7
0
def dice(args):
    #algorithm: go through the box and find its bounds
    #Divide that volume into sub-boxes using the x_divs, y_divs and z_divs from the settings file
    #go through the box again and test each data point and put it into the right boxfile(s)
    #remember that each box has a standard and an extended data set.
    #filename convention: "<NAME>_xindex_yindex_zindex(.extended).box"
    #settings convention: "<NAME>"

    #Think of these as macro definitions. I want to be able to say sizes[Z]
    #because I'll feel "cool" doing that.
    X = 0
    Y = 1
    Z = 2
    MIN = 0
    MAX = 1
    filebounds = dict()
    input(
        "MAKE SURE that your box is 100Mpc on a side! OTHERWISE THIS WON'T WORK! (press enter to continue)"
    )
    all_settings = common.getdict(args.settings)
    settings = all_settings["Divide"]
    inFileName = settings["filename"]
    sizes = (settings["x_box_size"], settings["y_box_size"],
             settings["z_box_size"])
    radius = settings["expected_radius"]
    outFileName = all_settings["boxname"]
    directory = os.path.dirname(outFileName)
    if not os.path.exists(directory):
        os.makedirs(directory)
    """
    minmax = [[None,None],[None,None],[None,None]]
#    Here we loop through the file, finding the global maximum and minimum of the data set.
#    This is an expensive operation (for HDD time) but optimally it will only have to run once ever
#    so that's how I'm justifying it.
    
    with open(inFileName,'r') as infile:
        first = True
        for line in infile:
            line = line.strip()
            if line[0] != "#":
                row = line.split(',')
                coord = None
                try:
                    coord = (float(row[14]),float(row[15]),float(row[16]))
                except ValueError:
                    pass
                if coord is not None:    
                    if first:
                        minmax[X][MIN] = coord[X]
                        minmax[X][MAX] = coord[X]
                        minmax[Y][MIN] = coord[Y]
                        minmax[Y][MAX] = coord[Y]
                        minmax[Z][MIN] = coord[Z]
                        minmax[Z][MAX] = coord[Z]
                        first = False
                    else:
                        for dimension in range(3):
                            #if the current coordinate is bigger than the current max
                            if coord[dimension]>minmax[dimension][MAX]:
                                #store the coordinate in the max part of the file
                                minmax[dimension][MAX] = coord[dimension]
                            #same ish for the minimums
                            elif coord[dimension]<minmax[dimension][MIN]:
                                minmax[dimension][MIN] = coord[dimension]
   """ """    
#    We now know the size of the box and will be able to use that to decide the chopping points for
#    the galaxies. Who knows if we will ever need any more information out of the box (it's entirely possible)
#    so we are going to copy entire lines.

#    Side note: wouldn't it be cool to store the file as a KD tree in a file?

    Also: be careful of the box's edges because since they are minimums and maximums they refer to an
    ACTUAL COORDINATE and will probably cause inequalities to explode a bit.

    This is actually the hard part that requires a lot of thought. Wish me luck.
    
    macroBoxSize = [dimension[MAX]-dimension[MIN] for dimension in minmax]
    microBoxSize = tuple(macroBoxSize[dimension]/divs[dimension] for dimension in range(3))
    #so first figure out the size in Mpc of the huge box
    #then figure the sizes in Mpc of the small boxes
    #Use that to partition the box into smaller boxes
    boxPartitions = 
    """
    #I would like for the padding that I described in my ipad file to be implemented,

    #note: box 0,0,0 will contain the box with defining corners (0,0,0)Mpc and (xsize,ysize,zsize).
    #all boxes will be inclusive on lower-numbered faces and exclusive on higher-numbered faces

    #We'll build "normal" boxes starting at minimum + expectedRadius
    #We will also build the extended boxes each centered around a normal box
    #These operations will run concurrently so as to minimize unnecessary disk i/o usage

    fileType = inFileName.split('.')[-1].lower()
    fileParms = {
        'dat': {
            'x': 0,
            'y': 1,
            'z': 2,
            'split': None
        },
        'csv': {
            'x': 14,
            'y': 15,
            'z': 16,
            'split': ','
        },
        'box': {
            'x': 0,
            'y': 1,
            'z': 2,
            'split': ','
        }
    }

    with open(inFileName, 'r') as infile:
        for rawline in infile:
            line = rawline.strip()
            if line[0] != "#":
                row = line.split(fileParms[fileType]['split'])
                coord = None
                try:
                    coord = (float(row[fileParms[fileType]['x']]),
                             float(row[fileParms[fileType]['y']]),
                             float(row[fileParms[fileType]['z']]))
                except ValueError:
                    pass
                if coord is not None:
                    boxIDX = random.randrange(
                        0, 16)  # CHANGE THIS TO USE AN INPUT FILE SOMETIME
                    boxfilename = common.getBoxNameJackknife(
                        outFileName, boxIDX)
                    #NOTE: The asterisk passes each part of the tuple as one argument.
                    #Which is REALLY HANDY and also REALLY OBSCURE. Be careful!

                    filebounds[boxfilename] = ((0, 0, 0), (100, 100, 100))
                    #calculate the bounding box of this box and add it to a dictionary for later use.
                    #WARNING: Not accurate unless the boxes fit evenly into the big box! e.g. does not
                    #work for arbitrary box sizes!
                    #WARNING: NOT ACCURATE FOR ANY BOX THAT IS NOT 100MPC ON A SIDE!!!
                    with open(boxfilename, 'a') as boxfile:
                        boxfile.write(
                            str(coord[0]) + ',' + str(coord[1]) + ',' +
                            str(coord[2]) + '\n')

    genericInfo = {
        "list_of_files": filebounds,
        "box_x_size": 100,
        "box_y_size": 100,
        "box_z_size": 100  #BLAH
    }
    with open(outFileName, 'w') as infofile:
        infofile.write(
            json.dumps(genericInfo,
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))
    with open(outFileName + '_README', 'w') as readmefile:
        readmefile.write("""User's guide to the {0} file.
{0} is in JSON format, and contains information about the boxes in this folder.
box_x_size, box_y_size, and box_z_size are all floats that describe the size of each box
in the x, y and z dimensions. 
List_of_files is a dictionary. Its keys are names of boxes. Iterate through all the keys
to make sure you've processed each box. The file paths assume that you are running the
python script from the 'millenium/matplot' folder. 
The values are lists of two lists. The first list tells you the x, y, and z coordinates (in that order)
of the smallest corner of the box. The second list tells you the x, y, and z coordinates in that order
of the largest corner of the box. From there you can figure out everything about the bounding box."""
                         .format(outFileName))
Exemplo n.º 8
0
def statistics(args):
    data = common.getdict(args.datafile)
    #Create a new figure. Each page in the pdf file is a figure
    fig = plt.figure(1, figsize=(8, 6), dpi=400)
    #Create a subplot axis
    ax = fig.add_subplot(111)
    #Set the title, x and y labels of this page's plot.
    ax.set_title("Correlation Function, Davis and Peebles Estimator")
    plt.xlabel("Correlation distance, Mpc/h")
    plt.ylabel("Correlation")

    fig2 = plt.figure(2, figsize=(8, 6), dpi=400)
    ax2 = fig2.add_subplot(111)
    ax2.set_title("Correlation Function, Hamilton Estimator")
    plt.xlabel("Correlation distance, Mpc/h")
    plt.ylabel("Correlation")

    fig3 = plt.figure(3, figsize=(8, 6), dpi=400)
    ax3 = fig3.add_subplot(111)
    plt.ylabel("Correlation")
    plt.xlabel("Correlation distance, Mpc/h")
    ax3.set_title("Correlation Function, Landy and Szalay Estimator")

    fig4 = plt.figure(4, figsize=(8, 6), dpi=400)
    ax4 = fig4.add_subplot(111)
    plt.ylabel("Correlation+1")
    plt.xlabel("Correlation distance, Mpc/h")
    ax4.set_title(
        "Correlation Function from random points, modified Davis and Peebles Estimator"
    )

    fig5 = plt.figure(5, figsize=(8, 6), dpi=400)
    ax5 = fig5.add_subplot(111)
    plt.ylabel("Correlation")
    plt.xlabel("Correlation distance, Mpc/h")
    ax5.set_title("Correlation Function, average. Landy and Szalay estimator.")
    ax5.set_xscale('log', nonposx='clip')
    ax5.set_yscale('log', nonposy='clip')

    fig6 = plt.figure(6, figsize=(8, 6), dpi=400)
    ax6 = fig6.add_subplot(111)
    plt.ylabel("Correlation Residuals")
    plt.xlabel("Correlation distance, Mpc/h")
    ax6.set_title("Correlation Function residuals")
    ax6.set_xscale('log', nonposx='clip')
    #ax6.set_yscale('log', nonposx = 'clip')

    #Set all the axes to log scale

    ax.set_xscale("log", nonposx='clip')
    ax2.set_xscale("log", nonposx='clip')
    ax3.set_xscale("log", nonposx='clip')
    ax4.set_xscale("log", nonposx='clip')
    ax.set_yscale("log", nonposy='clip')
    ax2.set_yscale("log", nonposy='clip')
    ax3.set_yscale("log", nonposy='clip')
    ax4.set_yscale("log", nonposy='clip')
    ys = []
    maxY = 10**1
    minY = 10**-2
    maxRandom = 5 * 10**1
    minRandom = 10**-1
    numboxes = 0
    for box in data['raw_runs'][0].items():
        #The .items() function returns a tuple (Key, value)
        #That's why there are all the box[1]'s running around.
        if box[0] != "ALL_BOXES":
            #Each box has its own data associated with it, so first we plot ALL the data
            plt.figure(1)
            plt.plot(box[1]["rs"], box[1]["Davis_Peebles"], '.')
            plt.figure(2)
            plt.plot(box[1]["rs"], box[1]["Hamilton"], '.')
            plt.figure(3)
            plt.plot(box[1]["rs"], box[1]["Landy_Szalay"], '.')
            plt.figure(4)
            plt.plot(box[1]["rs"],
                     [x + 1 for x in box[1]["Random_Correlation"]], '.')
            ys.append(box[1]["Landy_Szalay"])
            #minY, maxY = updateMinMax(minY, maxY, box[1]["Davis_Peebles"])
            #minY, maxY = updateMinMax(minY, maxY, box[1]["Hamilton"])
            #minY, maxY = updateMinMax(minY, maxY, box[1]["Landy_Szalay"])
            #minRandom, maxRandom = updateMinMax(minRandom, maxRandom, box[1]["Random_Correlation"])
            #This was an attempt to give all of the graphs the same scales. I don't know why it didn't work...
            numboxes += 1
            #Here we count the number of boxes so that we know whether we can use the standard deviation
            #for error bars
    power = lambda r, r0, gamma: (r / r0)**(-gamma)
    #power law for estimating correlation and its relation to distance.
    #Used in the curvefit scipy function

    allys = list(zip(*ys))
    #This list contains tuples of y-values for a certain x value for use in calculating the standard
    #deviation easily. Format [(10,9.8,10.25),(7.776,7.90,7.745) etc] except with possibly more values per tuple
    #and definitely way more tuples.

    #Calculate the 95% confidence interval, two times the standard deviation of all the ys for a certain x.

    #yerrs = jackknife(allys)

    ys = data['raw_runs'][0]["ALL_BOXES"]["Landy_Szalay"]
    xs = data['raw_runs'][0]["ALL_BOXES"]["rs"]
    xerrs = [
        data['raw_runs'][0]["ALL_BOXES"]["dr_left"],
        data['raw_runs'][0]["ALL_BOXES"]["dr_right"]
    ]
    #Take the raw xs and ys from the dataset that was averaged over all of the boxes.
    if numboxes == 1:
        popt, pcov = scipy.optimize.curve_fit(
            power, xs, ys, p0=(10, 1.5))  #,sigma=yerrs,absolute_sigma=True)
        #When we only have one box, we need to tell the curve fit that all of the errors are "The Same"
        yerrs = [
            300 * ys[i] *
            math.sqrt(data['raw_runs'][0]["ALL_BOXES"]["DDs"][i]) /
            data['raw_runs'][0]["ALL_BOXES"]["DDs"][i] for i in range(len(ys))
        ]
    else:
        yerrs = [np.std(y) for y in allys]
        popt, pcov = scipy.optimize.curve_fit(power,
                                              xs,
                                              ys,
                                              p0=(10, 1.5),
                                              sigma=yerrs,
                                              absolute_sigma=True)
        #More than one box means that the standard deviation errors are correct.

    print(yerrs)
    # print(pcov)
    # print(popt)
    plt.figure(5)
    dot = plt.errorbar(xs,
                       ys,
                       yerr=yerrs,
                       xerr=xerrs,
                       fmt='.',
                       label="Averaged Correlation Data")
    model = [power(x, *popt) for x in xs]
    line = plt.plot(
        xs,
        model,
        label=
        "Model fit: $(r/r_0)^{{-\gamma}}$\n$r_0 = {:.3f}$\n$\gamma = {:.3f}$".
        format(popt[0], popt[1]))
    #We need {{ and }} to escape the .format thingy and pass { and } to LaTeX

    plt.legend()
    plt.figure(6)
    residuals = [y / mod for y, mod in zip(ys, model)]

    #Since a residual data point is y / model, the relative error in residual will be equal to
    #sqrt( relative error in model ^2 + relative error in point ^2)
    residuals_errors = [
        res * (dy / y) for y, dy, res in zip(ys, yerrs, residuals)
    ]
    plt.errorbar(xs,
                 residuals,
                 yerr=residuals_errors,
                 fmt='.',
                 label="Residuals")
    plt.plot(xs, [1 for x in xs], label="Model")
    plt.legend()
    ax6.axis([0, max(xs) + 1, 0, 2])  #min(residuals)*.9,max(residuals)*1.1])
    plt.figure(5)
    #Here, we set the scale of each axis. We need a better method of dynamically deciding what the bounds should be
    ax5.axis([min(xs) - 0.15, max(xs) + 3, minY, maxY])
    plt.figure(4)
    ax4.axis([min(xs) - 0.15, max(xs) + 3, minRandom, maxRandom])
    plt.figure(3)
    ax3.axis([min(xs) - 0.15, max(xs) + 3, minY, maxY])
    plt.figure(2)
    ax2.axis([min(xs) - 0.15, max(xs) + 3, minY, maxY])
    plt.figure(1)
    ax.axis([min(xs) - 0.15, max(xs) + 3, minY, maxY])
    #plt.legend([dot,line],["Data","Best fit"])
    basefilename = args.datafile.replace("rawdata.json", "")
    with open(
            "output/statistics.csv", 'a'
    ) as outfile:  #WARNING:: I usually don't like using static file paths!!!!!!!!
        #ANOTHER WARNING: every time you run stats it appends a line to this file. So, be careful and only use the
        #statistics file after cleaning it and doing a very controlled run.
        line = ""
        for datapt in [float(x) for x in popt]:
            line = line + str(datapt) + ","
        for error in np.sqrt(np.diag(pcov)):
            line = line + str(error) + ','
        line = line + str(data["settings"]["Divide"]["x_box_size"] *
                          data["settings"]["Divide"]["y_box_size"] *
                          data["settings"]["Divide"]["z_box_size"])
        outfile.write(line + '\n')
    with pdfback.PdfPages(basefilename + 'graphs.pdf') as pdf:
        pdf.savefig(fig)
        pdf.savefig(fig2)
        pdf.savefig(fig3)
        pdf.savefig(fig4)
        pdf.savefig(fig5)
        pdf.savefig(fig6)
Exemplo n.º 9
0
def mainrun(args):
    print("Setting things up...")
    all_settings = common.getdict(args.settings)

    boxname = all_settings["boxname"]
    settings = all_settings["Correlation"]
    numpoints = settings["numpoints"]
    dr = settings["dr"]
    runs = settings["num_runs"]
    min_r = settings["min_r"]
    step_size = settings["step_size"]
    step_type = "lin"
    try:
        step_type = settings["step_type"]
    except KeyError:
        print(
            "We've added a new argument \"step_type\" to the settings file.\nPlease update {} to include this argument.\nDefault value is \"lin\" for linear steps on the x axis.\nOther values are \"log\" for logarithmic point spacing. More types to come. Maybe."
            .format(args.settings))
    boxinfo = common.getdict(boxname)
    print("Computing correlation function...")
    argslist = [(x, boxinfo, numpoints, dr, step_size, min_r, step_type)
                for x in range(runs)]
    start = time.time()
    correlation_func_of_r = list(map(calculate_correlations, argslist))
    finish = time.time() - start
    print("That took {} seconds.".format(finish))
    """
    the structure of correlation_func_of_r:
    [
      [ (x1,dx1,y1), (x2,dx2,y2), ... (xn,dxn,yn) ], <- run 0
      [ (x1,dx1,y1), (x2,dx2,y2), ... (xn,dxn,yn) ], <- run 1
      [ (x1,dx1,y1), (x2,dx2,y2), ... (xn,dxn,yn) ]  <- run 2
    ]
    
                                                                  
    print("Computing statistics...")

    Correlation Func of r is a list containing tuples of xs and ys (and dxs).
    To compute the error bars, we'll want to take the y values out of all the tuples, grouped by x value,
    find the standard deviation, and multiply by two to calculate the error bars for each number.
    Then find the average of the y values to place the center point of the error bars.
    Finally, save that information to file and then use it to build a graph!

    We're actually going to move the "computing statistics" thingies to a different file.
    
    final_data = []
    for x_value in range(len(correlation_func_of_r[0])): 
        ys_for_this_x = [] #a list of the y values of a specific x value
        for y in range(len(correlation_func_of_r)):
            ys_for_this_x.append(correlation_func_of_r[y][x_value][2])
        final_data.append((correlation_func_of_r[0][x_value][0],
                           correlation_func_of_r[0][x_value][1],
                           np.average(ys_for_this_x),
                           2*np.std(ys_for_this_x)))
    """
    print("Complete.")
    dataFileName = settings['output_data_folder'] + boxname.split(
        '/')[-1] + '---'
    dataFileName = dataFileName + args.settings.split('/')[-1].split(
        '.')[0] + '---rawdata.json'

    dictionary = {
        'raw_runs': correlation_func_of_r,
        'settings': all_settings,
        'time': finish
    }
    #return dictionary
    #print(dictionary)
    common.writedict(dataFileName, dictionary)
Exemplo n.º 10
0
def selectrun(args):
    #I'll need from args:
    #The input file folder name (5+gb csv file from millennium)
    #Density of the input file name (in shells around the survey centers)
    #The survey function (the filename of the function parameter json)
    #minimum distance between surveys
    #number of surveys
    global USE_GPU
    #I'll need to go through the database in blocks using the usual sub-box method
    USE_GPU = args.gpu and USE_GPU
    #USE_GPU represents whether we can and want to use the gpu.
    settings = common.getdict(args.settings)

    hugeFile = settings["dataset_filename"]
    #density        = settings['dataset_density']
    surveyOverride = settings['survey_position_override']
    boxSize = settings['box_size']
    if boxSize[0] != 500 or boxSize[1] != 500 or boxSize[2] != 500:
        print("Not designed with other simulation boxes in mind.")
        exit()
    outFileName = settings['survey_output_files']

    if os.path.isdir(hugeFile):
        files = [hugeFile + x for x in os.listdir(hugeFile)]
    else:
        files = hugeFile
        print(
            "[WARN] Using the gigantic file is no longer supported and will probably cause really weird errors."
        )

    #Now: make a list of the survey starting points. The data structure should be:
    #list of origin tuples, (x,y,z). The index will be the ID of the survey. So we save each
    #survey in a file named based on the list index.

    if surveyOverride is not None:
        surveys = surveyOverride
    else:
        surveySeparation = settings['survey_separation_distance']
        numSurveys = settings['num_surveys']
        surveys = genSurveyPos(surveySeparation, boxSize, numSurveys, hugeFile)

    #Generate a coordinate system for each survey to use
    #Method: Since the normal distribution is symmetric around its mean, if each coordinate is
    #normally distributed around a mean of zero then the distribution is spherically symmetric
    #While the variable name is "up vector," it more closely represents the rotation angle to
    #rotate the coordinate system around.
    rawSurveyUpVectors = np.random.normal(0, 0.1, (len(surveys), 3))
    upVectorLengths = np.linalg.norm(
        rawSurveyUpVectors,
        axis=1)  #The axis=1 means this is an array of 3-vectors,
    #and not a single 5x3 vector

    #Normalized version of the original 'raw' survey up vectors
    surveyUpVectors = np.array([
        vec / length
        for vec, length in zip(rawSurveyUpVectors, list(upVectorLengths))
    ])
    surveyRotationAngles = np.random.uniform(0, 2 * math.pi,
                                             surveyUpVectors.shape[0])
    #Rotation angles is the angle by which the coordinate system is rotated about the 'up' vector

    #Now we make a rotation matrix out of the vector-angle rotation
    #Definition from http://en.wikipedia.org/wiki/Rotation_matrix#Conversion_from_and_to_axis-angle
    axisAngletoMatrix = lambda r, theta: (math.cos(theta) * np.identity(
        3) + math.sin(theta) * common.crossProductMatrix(r) +
                                          (1 - math.cos(theta)) * np.outer(
                                              r, r))
    #It turns out that the rows of these rotation matrices form the basis vectors for the new coordinate system
    rotationMatrices = [
        axisAngletoMatrix(r, theta)
        for r, theta in zip(surveyUpVectors, list(surveyRotationAngles))
    ]

    selectionParams = common.getdict(settings['selection_function_json'])

    #Grab the pre-computed distance files if they exist, or if not generate them.
    distFileBase = hugeFile.rstrip('/') + "_distances_{:x}/".format(
        hash(tuple([tuple(x) for x in surveys])))
    distanceFiles = [
        distFileBase + os.path.basename(os.path.splitext(milFile)[0])
        for milFile in files
    ]
    #Distance file format: outFile location + hash of survey centerpoints / xi.yi.zi.npy

    pool = multiprocessing.Pool(processes=NUM_PROCESSORS)

    # if not os.path.exists(distFileBase):
    #     start = time.time()
    #     print("Generating distance data...")
    #     os.mkdir(distFileBase)
    #     pool.starmap(distanceOneBox,zip(files,
    #                                     itertools.repeat(surveys),
    #                                     distanceFiles))
    #     print("Generating distance data took {} seconds.".format(time.time()-start))
    #     #Single core version for use with profiling
    #     #os.mkdir(distFileBase)
    #     #[distanceOneBox(afile,surveys,distanceFile) for afile,distanceFile in zip(files,distanceFiles)]
    # else:
    #     print("Found distance data!")

    #Generate lookup-tables for 'original-number-of-galaxies' if they don't already exist
    boxMaxDistance = 350  # space.distance.euclidean([0,0,0],boxSize)
    print("Assuming chop distance of 350 Mpc/h")
    if not os.path.exists(
            distFileBase +
            'hist{}.npy'.format(selectionParams["info"]["shell_thickness"])):
        print("Generating histograms...")
        start = time.time()

        # listOfHistograms = pool.starmap(surveyBins,zip(distanceFiles,
        #                                                itertools.repeat(selectionParams["info"]["shell_thickness"]),
        #                                                itertools.repeat(boxMaxDistance)))
        listOfHistograms = pool.starmap(
            distsurvey,
            zip(files, itertools.repeat(surveys),
                itertools.repeat(selectionParams["info"]["shell_thickness"]),
                itertools.repeat(boxMaxDistance)))
        full_histogram = sum(listOfHistograms)
        np.save(
            distFileBase +
            'hist{}.npy'.format(selectionParams["info"]["shell_thickness"]),
            full_histogram)
        print("Generating histograms took {} seconds.".format(time.time() -
                                                              start))
        #because the surveyBins function returns a numpy array, the sum function will add them all together element-wise!
    else:
        print("Found histogram!")
        full_histogram = np.load(
            distFileBase +
            'hist{}.npy'.format(selectionParams["info"]["shell_thickness"]))

    print("Generating surveys...")
    start = time.time()
    if USE_GPU:
        pool.close()
        listOfSurveyContents = itertools.starmap(
            surveyOneFile,
            zip(files, itertools.repeat(surveys),
                itertools.repeat(selectionParams),
                itertools.repeat(full_histogram),
                itertools.repeat(boxMaxDistance)))
    else:
        listOfSurveyContents = pool.starmap(
            surveyOneFile,
            zip(files, itertools.repeat(surveys),
                itertools.repeat(selectionParams),
                itertools.repeat(full_histogram),
                itertools.repeat(boxMaxDistance)))
    listOfSurveyContents = list(listOfSurveyContents)
    print("Generating surveys took {} seconds.".format(time.time() - start))

    #Format of listOfSurveyContents:
    #List of 1000 elements.
    #Each 'element' is a list of numSurveys elements, each element of which is a list of rows that belong to that
    #survey.

    #e.g. [
    #       [
    #         [rows for survey 1],
    #         [rows for survey 2],
    #         ...],
    #       [],[],...,[]]
    info = []
    #Jam the arrays back together
    surveyContent = transposeMappedSurvey(listOfSurveyContents)
    #write them to the disk
    for i, surveyFinal in enumerate(surveyContent):
        surveyFileName = outFileName + str(i) + '.mil'
        with open(surveyFileName, 'w') as surveyFile:
            for line in surveyFinal:
                surveyFile.write(line)
        info.append({
            'name': surveyFileName,
            'center': surveys[i],
            'rot': [[d for d in c] for c in rotationMatrices[i]]
        })
    common.writedict(outFileName + '.json', info)
    common.writedict(outFileName + '_info.json', {
        'selection_params': selectionParams,
        'settings': settings
    })
Exemplo n.º 11
0
def main(args):
    """ Compute the velocity correlations on one or many galaxy surveys. 
    """
    print("Incomplete function - see comments")
    exit()
    #Get setup information from the settings file
    settings = common.getdict(args.settings)
    numpoints = settings["numpoints"]
    outfolder = settings["output_data_folder"]
    outfile = settings["output_file_name"]
    rawInFile = settings["input_file"]
    step_type = settings["step_type"]
    dr = settings["dr"]
    min_r = settings["min_r"]
    if settings["many"]:
        #If there are lots of files, set them up accordingly.
        inFileList = [
            rawInFile.format(x + settings['offset'])
            for x in range(settings["num_files"])
        ]
    else:
        inFileList = [rawInFile]
    xs, intervals = common.genBins(min_r, numpoints, dr, step_type)
    for index, infile in enumerate(inFileList):
        #Load the survey
        galaxies = np.array(common.loadData(infile, dataType="millVel"))
        print(galaxies.shape)
        #Put just the galaxy positions into one array
        positions = galaxies[:, 0:3]  # [(x,y,z),...]
        velocities = galaxies[:, 3:6]

        kd = cKDTree(positions)
        pairs = kd.query_pairs(max(intervals))
        npPairs = np.array(list(pairs))
        g1pos = positions[npPairs[:, 0]]
        g2pos = positions[npPairs[:, 1]]

        g1vs = velocities[npPairs[:, 0]]
        g2vs = velocities[npPairs[:, 1]]

        distBetweenG1G2 = np.linalg.norm(g2pos - g1pos, axis=1)

        velocityCorrelation = inner1d(g1vs, g2vs) / 10**4

        c11 = g1vs[:, 0] * g2vs[:, 0]
        c12 = g1vs[:, 0] * g2vs[:, 1]
        c13 = g1vs[:, 0] * g2vs[:, 2]
        c21 = g1vs[:, 1] * g2vs[:, 0]
        c22 = g1vs[:, 1] * g2vs[:, 1]
        c23 = g1vs[:, 1] * g2vs[:, 2]
        c31 = g1vs[:, 2] * g2vs[:, 0]
        c32 = g1vs[:, 2] * g2vs[:, 1]
        c33 = g1vs[:, 2] * g2vs[:, 2]

        n, bins = np.histogram(distBetweenG1G2, bins=intervals)

        correlation11, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c11)
        correlation12, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c12)
        correlation13, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c13)
        correlation21, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c21)
        correlation22, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c22)
        correlation23, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c23)
        correlation31, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c31)
        correlation32, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c32)
        correlation33, bins = np.histogram(distBetweenG1G2,
                                           bins=intervals,
                                           weights=c33)

        a11 = correlation11 / n
        a12 = correlation12 / n
        a13 = correlation13 / n
        a21 = correlation21 / n
        a22 = correlation22 / n
        a23 = correlation23 / n
        a31 = correlation31 / n
        a32 = correlation32 / n
        a33 = correlation33 / n

        f, ((ax11, ax12, ax13), (ax21, ax22, ax23),
            (ax31, ax32, ax33)) = plt.subplots(3,
                                               3,
                                               sharex='col',
                                               sharey='row',
                                               figsize=(11, 8.5))

        ax11.plot(xs, a11)
        ax12.plot(xs, a12)
        ax13.plot(xs, a13)
        ax21.plot(xs, a21)
        ax22.plot(xs, a22)
        ax23.plot(xs, a23)
        ax31.plot(xs, a31)
        ax32.plot(xs, a32)
        ax33.plot(xs, a33)

        #set x axis and y axis to be the same
        #go out to until correlation is zero

        f.suptitle('3-D velocity correlation')
        ax31.set_xlabel('Distance, Mpc/h')
        ax32.set_xlabel('Distance, Mpc/h')
        ax33.set_xlabel('Distance, Mpc/h')

        ax11.set_ylabel('correlation, $(km/s)^2$')
        ax21.set_ylabel('correlation, $(km/s)^2$')
        ax31.set_ylabel('correlation, $(km/s)^2$')

        with pdfback.PdfPages(outfolder + outfile.format(index)) as pdf:
            pdf.savefig(f)
        pylab.close('all')