Exemplo n.º 1
0
def getTheGenes(theStartDate, templateList, dirr=os.getcwd()):
    """Walking the dir using Python 3.5. Variable theStartDate has to be
    a datetime.date() data type."""
    vv = ppma.lookForVARinList(templateList)
    datOut = []
#    dataOrdering = ['VAR', 'VARX', 'meanAllel', 'stdAllel', 'slope']
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if(filepath == os.path.join(dirName, 'InputParameters.json') and
               ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
#                with open(filepath) as f:
#                    prms = json.load(f)
                if ppma.compareParams(templateList, paramzList):
                    print("Data from:", dirName, end=" ")
                    popFiles = os.path.join(dirName, "HostGenomesFile.*.csv")
                    for fil in glob.glob(popFiles):
                        if not re.search('HostGenomesFile.0.csv', fil):
                            hostPopFile = fil
                    Gene_list = loadHostPopulation(hostPopFile)
                    geneStats = analiseGeneContent(Gene_list)
                    var = float(paramzList[vv['VAR']])
                    varx = float(paramzList[vv['VARX']])
                    geneStats['spp'] = varx
                    geneStats['patho_mut'] = var
                    datOut.append((geneStats))
                    print("- done!")
    return datOut
def getTheData(theStartDate, templateList, dirr=os.getcwd()):
    """Walking the dir using Python 3.6. Variable theStartDate has to be
    a datetime.date() data type."""
    vv = ppma.lookForVARinList(templateList)
    datOut = []
    dataOrdering = ['VAR', 'VARX', 'slope', 'intercept']
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if (filepath == os.path.join(dirName, 'InputParameters.json') and
                    ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
                if ppma.compareParams(templateList, paramzList):
                    with open(filepath) as f:
                        prms = json.load(f)
                    path_spp = float(prms['number_of_pathogen_species'])
                    lg = prms['number_of_host_generations']
                    genomeFileName = "HostGenomesFile." + str(lg) + ".csv"
                    genomeFileName = os.path.join(dirName, genomeFileName)
                    #                    print(genomeFileName)
                    var = float(paramzList[vv['VAR']])
                    varx = float(paramzList[vv['VARX']])
                    try:
                        print(dirName, end=' : ')
                        pathos = loadPathoExposed(genomeFileName)
                        hosts = loadHostPopulation(genomeFileName)
                        if hosts is None or pathos is None:
                            print("Failed to read data")
                            continue
                        else:
                            print("Done")
                    except Exception:
                        print("ERROR in getTheData(): cant's load the host",
                              "population data")
                        continue
                    uniqNumb, pathoNumb = calculateTheNumbers(hosts, pathos)
                    uniqNumb = np.hstack((uniqNumb, 0))
                    pathoNumb = np.hstack((pathoNumb, 0))
                    # slope, intercept, r_val, p_val, std_err
                    data = linregress(uniqNumb, pathoNumb)
                    plotMHCvsPathoPresent(uniqNumb, pathoNumb, data[0],
                                          data[1], dirName)
                    datOut.append((var, varx, data[0], data[1], data[2]**2,
                                   data[3], data[4], path_spp, dirName))
    datOut = np.array(datOut, dtype=outType)
    return np.sort(datOut, order=dataOrdering)
Exemplo n.º 3
0
def getTheData(theStartDate, templateList, dirr=os.getcwd()):
    """ """
    datOut = []
    vv = ppma.lookForVARinList(templateList)
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if (filepath == os.path.join(dirName, 'InputParameters.json') and
                    ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
                if ppma.compareParams(templateList, paramzList):
                    var = float(paramzList[vv['VAR']])
                    varx = float(paramzList[vv['VARX']])
                    awkMeanINV(dirName)
                    meanINV = loadMeanInvdMhcNumb(dirName)
                    datOut.append((var, varx, meanINV))
                    print("Done dir:", dirName)
    return datOut
def getTheData(theStartDate, templateList, dirr=os.getcwd(), genLast=0):
    """Walking the dir using Python 3.5. Variable theStartDate has to be
    a datetime.date() data type. Each item in the `datOut` structure is the
    result of computing one simulation."""
    datOut = []
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if (filepath == os.path.join(dirName, 'InputParameters.json') and
                    ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
                if ppma.compareParams(templateList, paramzList):
                    print("Processing dir:", dirName, end=" ")
                    moPth = os.path.join(dirName, 'NumberOfMhcInMother.csv')
                    faPth = os.path.join(dirName, 'NumberOfMhcInFather.csv')
                    mPth = os.path.join(dirName, 'NumberOfMhcBeforeMating.csv')
                    mothr, fathr, bmate = loadTheParents(
                        genLast, moPth, faPth, mPth)
                    moth, fath, bmate = trimData(mothr, fathr, bmate, 2, 100)
                    mmMt = avrgMateMHCnumb(bmate)
                    rMom, rDad, rMmMt = reshapeMatherFather(moth, fath, mmMt)
                    ww, Fatrs, meanM = pickMotherSizeGroups(rMom, rDad, rMmMt)
                    bSize = np.zeros(len(Fatrs))
                    for i, itm in enumerate(Fatrs):
                        bSize[i] = len(itm)
                    deltas = []
                    for i, it in enumerate(ww):
                        deltas.append(np.nanmean(Fatrs[i] - meanM[i]))
                    justPlotDeviantFromMeanFather(ww, deltas, bSize, dirName)
                    try:
                        xx = np.transpose(
                            np.vstack((ww, np.array(deltas), bSize)))
                    except Exception:
                        print(" - failed to stack the data! Check if the",
                              "input file sizes (e.g. line numbers) are OK.")
                        continue
                    datOut.append(xx)
                    print(" - done.")
    return datOut
def serchTheDirs(FILE, template, dirr=os.getcwd()):
    """Walk the directory tree in search of model runs and process each
    simulation individually. Produces some meta-statistics regarding the
    results geathered in Numpy structured array."""
#    vv = ppma.lookForVAR(template)
    vv = ppma.lookForVARinList(template)
    datOut = []
    dataOrdering = ['VAR', 'VARX', 'MRCA_time', 'maxMutNumb', 'numOfGenes']
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if filepath == os.path.join(dirName, FILE):
                try:
                    paramList = ppma.loadParamSettings(os.path.join(dirName,
                                                       "InputParameters.json"))
                except Exception:
                    print("Cannot load the parameters. in dir", dirName)
                    continue
                if ppma.compareParams(template, paramList):
                    try:
                        DATA = processDataOneFile(filepath)
                    except Exception:
                        print("Cannot load the data. in dir", dirName)
                        continue
                    plotTheTimes(DATA[0], DATA[1], DATA[2], DATA[3], DATA[4],
                                 dirName)
                    var = float(paramList[vv['VAR']])
                    varx = float(paramList[vv['VARX']])
                    datOut.append((var, varx, DATA[6], DATA[0].shape[1],
                                   DATA[0].shape[0], dirName))
    if len(datOut) > 0:
        datOut = np.array(datOut, dtype=outType)
        return np.sort(datOut, order=dataOrdering)
    else:
        print("ERROR in serchTheDirs(): output array is empty")
        return None