示例#1
0
def shapeWrfIOfile(fileList, fileExt, keyWord):
    dataKey = "domain"
    wrfIODict = {}
    for filePath in fileList:
        if filePath.endswith(fileExt):
            domain = {}
            #This place, we can use ML to check what is the data character by word segmetation, like how many domain to seperate and so on
            #but now, to simplify, I just think there are 4 domain
            fr = open(filePath)
            for line in fr.readlines():
                if (line.find(keyWord) >= 0) and (line.find(dataKey) >= 0):
                    tmpStr = line[line.find(dataKey) + len(dataKey):]
                    pattern = re.compile(
                        r'\D*(\d+):\D*(\d+\.*\d*)'
                    )  # not good, I will change it by word segmetation method
                    match = pattern.findall(tmpStr)
                    if len(match) > 0 and len(match[0]) == 2:
                        if match[0][0] in domain.keys():
                            domain[match[0][0]].append(match[0][1])
                        else:
                            tmpList = []
                            tmpList.append(match[0][1])
                            domain[match[0][0]] = tmpList
                    else:
                        print("%s  %d: Error Pattern [%s]" %
                              (myDebug.file(), myDebug.line(), line))
            # sort keys
            if domain:
                wrfIODict[filePath] = domain
    return wrfIODict
示例#2
0
def fillComputList(domainNum, writeNum, taskNum, nTaskX, nTaskY, compInfo,
                   currentDir):
    tmpComputeList = []
    if domainNum == 0 or writeNum == 0 or writeNum % domainNum != 0 or taskNum == 0 or nTaskX == np.inf or nTaskY == np.inf or not compInfo:
        print(
            "%s  %d: Failed read profiling information in %s [Domanin:%d   WriteNum:%d   TaskNum:%d]"
            % (myDebug.file(), myDebug.line(), currentDir, domainNum, writeNum,
               taskNum))
    else:
        #print("Domanin:%d   WriteNum:%d   TaskNum:%d" %(domainNum, writeNum, taskNum))
        for i in range(0, taskNum):
            tmpList = [
                taskNum, nTaskX, nTaskY, writeNum / domainNum - 1
            ]  #writeNum/domainNum is the true write time, but seems the first write is a specaill write shall not be caculated
            tmpList.extend(compInfo[i])
            tmpComputeList.append(tmpList)
    return tmpComputeList
示例#3
0
from __future__ import division
import shapeFile as sF
import random
from numpy import *
import math
import matplotlib.pyplot as plt
import writeTrainingResult as wTR
import regression 
import os.path
import myUtils
import myDebug

files = sF.listAllFiles("Data/train")
trainDataList = sF.shapeWrfComputingfile(files)
if not trainDataList:
    print("%s  %d: Get profiling information from %s failed." %(myDebug.file(), myDebug.line(), files))
    exit(1)

#for i in range(0,2):#0 for task 0 and 1 for other tasks
#The list is sperated to two part, [0] save all task 0 job information, task 1 save all other tasks
#generate traing dataset
#task 0 first
'''
print "%d, %d, %d" % (len(trainDataList), len(trainDataList[0]), len(trainDataList[1]))
print trainDataList
print "==========================="
exit(1)
'''
bestKList = [28, 10, 3, 0.7, 0.3, 0.07, 0.01, 40, 60, 80, 100]#[0.6, 0.8, 0.7, 0.9, 1, 0.5, 0.4, 0.2, 1.1, 1.2, 1.3]#[100, 80, 60, 40, 28, 10, 3, 0.7, 0.3, 0.07, 0.01]    
predictHourList = [6, 180]
predictTaskSizeList = range(40, 440, 20)
示例#4
0
import shapeFile as sF
import random
from numpy import *
import math
import matplotlib.pyplot as plt
import writeTrainingResult as wTR
import regression
import os.path
import myUtils
import myDebug

files = sF.listAllFiles("/home/yu/workspace/Data/train")
trainDataList = sF.shapeWrfComputingfile(files)
if not trainDataList:
    print("%s  %d: Get profiling information from %s failed." %
          (myDebug.file(), myDebug.line(), files))
    exit(1)
#Get test data, we need check together
files = sF.listAllFiles("/home/yu/workspace/Data/test")
testDataList = sF.shapeWrfComputingfile(files)
if not testDataList:
    print("%s  %d: Get profiling information from %s failed." %
          (myDebug.file(), myDebug.line(), testFile))
    exit(1)
#for i in range(0,2):#0 for task 0 and 1 for other tasks
#The list is sperated to two part, [0] save all task 0 job information, task 1 save all other tasks
#generate traing dataset
#task 0 first
'''
print "%d, %d, %d" % (len(trainDataList), len(trainDataList[0]), len(trainDataList[1]))
print trainDataList
示例#5
0
def shapeWrfComputingfile(fileList):
    #To be enhance, now it is fixed
    #Read .000file to konw the computing size
    #read .0 file to know task number and consumed time
    wrfOutFileExt = ".0000"
    wrfProfileExt = ".0"
    wrfComputeList = [
    ]  #4 columns: task number, nTaskX, nTaskY, consumed computing time, consumed communication time
    taskNumMark = "Data for MPI rank"

    domainNum = 0
    writeNum = 0
    taskNum = 0
    nTaskX = np.inf
    nTaskY = np.inf
    compInfo = []
    currentDir = ""
    filePath = ""

    for filePath in fileList:
        if currentDir != "" and currentDir != os.path.dirname(
                filePath):  # change folder, save current data
            tmpList = fillComputList(domainNum, writeNum, taskNum,
                                     nTaskX, nTaskY, compInfo,
                                     os.path.dirname(filePath))
            if not tmpList:
                return
            wrfComputeList = fillwrfComputeList(wrfComputeList, tmpList)
        currentDir = os.path.dirname(filePath)
        #read .000 file
        if os.path.splitext(filePath)[1] == wrfOutFileExt:
            keyWord = "wrfout"
            dataKey = "domain"
            domainNum = 0
            writeNum = 0
            fr = open(filePath)
            for line in fr.readlines():
                if (line.find(keyWord) >= 0) and (line.find(dataKey) >= 0):
                    tmpStr = line[line.find(dataKey) + len(dataKey):]
                    pattern = re.compile(
                        r'\D*(\d+):\D*(\d+\.*\d*)'
                    )  # not good, I will change it by word segmetation method
                    match = pattern.findall(tmpStr)
                    if len(match[0]) == 2:
                        if int(match[0][0]) > domainNum:
                            domainNum = int(match[0][0])
                        writeNum += 1
                    else:
                        print("%s  %d: Error Pattern [%s]" %
                              (myDebug.file(), myDebug.line(), line))
                if (line.lower().find("ntasks") >= 0):
                    tmpStr = line
                    #Ntasks in X  5 , ntasks in Y  8
                    pattern = re.compile(
                        r'\D*(\d+)\ *,\D*(\d+)'
                    )  # not good, I will change it by word segmetation method
                    match = pattern.findall(tmpStr)
                    if len(match[0]) == 2:
                        nTaskX = int(match[0][0])
                        nTaskY = int(match[0][1])
                    else:
                        print("%s  %d: Error Pattern [%s]" %
                              (myDebug.file(), myDebug.line(), line))
            fr.close()
        #read .0 file
        elif os.path.splitext(filePath)[1] == wrfProfileExt:
            fr = open(filePath)
            keyWord = taskNumMark
            taskNum = 0
            compInfo = []
            fillF = False
            count = 0
            for line in fr.readlines():
                if fillF:  #start to fill the tasks information into the list
                    tmpData = line.split()
                    compInfo.append([
                        float(tmpData[2]) - float(tmpData[1]),
                        float(tmpData[1])
                    ])  #Computing time, communication time
                    count += 1
                    if count == taskNum:
                        fillF = False  #end
                else:
                    if (line.find(keyWord) >= 0):
                        pattern = re.compile(r'\D*(\d+)\D*(\d+):')
                        match = pattern.findall(line)
                        if int(
                                match[0][0]
                        ) != 0:  #Just according to current log, it is may not right
                            print(
                                "%s  %d: Find a not expected record [%s] [%d]"
                                % (myDebug.file(), myDebug.line(), line,
                                   match[0][0]))
                        else:
                            taskNum = int(match[0][1])
                    if (line.find("taskid") >= 0 and line.find("comm(s)") >= 0
                            and line.find("elapsed(s)") >= 0):
                        #start to read time information
                        fillF = True
            fr.close()
    #last dataset to be filled
    if filePath != "":
        tmpList = fillComputList(domainNum, writeNum, taskNum, nTaskX, nTaskY,
                                 compInfo, os.path.dirname(filePath))
        if not tmpList:
            return
        wrfComputeList = fillwrfComputeList(wrfComputeList, tmpList)
    return wrfComputeList
示例#6
0
import shapeFile as sF
import random
from numpy import *
import math
import matplotlib.pyplot as plt
import writeTrainingResult as wTR
import regression
import os.path
import myUtils
import myDebug

files = sF.listAllFiles("Data/train")
trainDataList = sF.shapeWrfComputingfile(files)
if not trainDataList:
    print("%s  %d: Get profiling information from %s failed." %
          (myDebug.file(), myDebug.line(), files))
    exit(1)

#for i in range(0,2):#0 for task 0 and 1 for other tasks
#The list is sperated to two part, [0] save all task 0 job information, task 1 save all other tasks
#generate traing dataset
#task 0 first
'''
print "%d, %d, %d" % (len(trainDataList), len(trainDataList[0]), len(trainDataList[1]))
print trainDataList
print "==========================="
exit(1)
'''
bestKList = [
    28, 10, 3, 0.7, 0.3, 0.07, 0.01, 40, 60, 80, 100
]  #[0.6, 0.8, 0.7, 0.9, 1, 0.5, 0.4, 0.2, 1.1, 1.2, 1.3]#[100, 80, 60, 40, 28, 10, 3, 0.7, 0.3, 0.07, 0.01]