def dataGeneratorMain(args): taskName = args.function inputSize = None outputSize = None if args.function == 'ANN-bp': inputSize = args.struct[0] outputSize = args.struct[-1] else: inputSize = args.input_size outputSize = args.output_size dataSetSize = args.data_size assert inputSize > 0 and inputSize <= 10 assert dataSetSize <= 23 and dataSetSize >= 3 ################################# # format the data set dir # ################################# try: # shell script to be run as subprocess scriptFormatDir = """ #!/bin/bash set -eu # setup data set dir orig_dir="`pwd`/" dir_name={} task_name="$0" ip_size="$1" set_size_pow="$2" set_size_pow_start=0"$3" if [ ! -d $orig_dir$dir_name ] then mkdir $orig_dir$dir_name echo "created dir: $orig_dir$dir_name" fi if [ ! -d $orig_dir$dir_name$task_name ] then mkdir $orig_dir$dir_name$task_name echo "created dir: $orig_dir$dir_name$task_name" fi echo "input size: "$ip_size echo "set pow size: "$set_size_pow task_dir=$orig_dir$dir_name$task_name ################## cd $task_dir # ################## existing_set="`ls`" for size_pow in $(eval echo "{{$set_size_pow_start..$set_size_pow}}") do file_name=$size_pow if [ "`find . -maxdepth 1 -type f -printf '%f\n' | grep $file_name`" ] then echo "data set already exists: "$file_name else touch $file_name echo "created data set file: "$file_name fi done ################## cd $orig_dir # ################## """.format(trainingDirName) # args to the script: # $0: name for the training set (e.g.: sin: the data set should show characteristic of sin function) # $1: number of input to the sigmoid neuron # $2: indicate number of tuples in the data-set: # range: 3 ~ 14 # e.g.: # suppose user provide 10, then data set with size 2^3, 2^4, 2^5, ... 2^10 will be generated, # and be stored as separate files in the corresponding folder # $3: indicate min number of tuples in the data-set if args.function == 'ANN-bp': pass else: taskName += '_in-{}-out-{}'.format(inputSize, outputSize) stdout, stderr = runScript(scriptFormatDir, [taskName, str(inputSize), str(dataSetSize), str(dataSetSizeStart)]) print("============================") print("script msg: \n{}".format(stdout.decode('ascii'))) print("============================") except ScriptException as se: print(se) ######################################### # generate data and write to file # ######################################### genY = trainingFunc(args.function) for dFile in os.listdir(trainingDirName + taskName): dFileFull = trainingDirName + taskName + '/' + dFile # all old files should already be read-only if not (os.stat(dFileFull).st_mode & int('010010010',2)): # i don't use os.access(file, os.W_OK) here, as it will always be true if you launch python3 as root in ec2 continue if 'conf' in dFile or 'ignore' in dFile: continue dSize = dFile dSize = int(dSize) assert dSize >= dataSetSizeStart and dSize <= dataSetSize assert os.stat(dFileFull).st_size == 0 f = open(dFileFull, 'w') numEntry = pow(2, dSize) for i in range(0, numEntry): # randomly generate input list, within range 0 ~ 10 xList = [uniform(0, 10) for k in range(0, inputSize)] yList = None if args.function == 'ANN-bp': pass else: yList = [genY(xList)] dataList = yList + xList dataStr = reduce(lambda x,y: str(x)+' '+str(y), dataList) print(dataStr, file=f) f.close() ########################################################## # always enforce read-only policy for data set dir # ########################################################## try: scriptChmod = """ #!/bin/bash orig_dir="`pwd`/" dir_name={} task_name="$0" chmod 444 $orig_dir$dir_name$task_name/* """.format(trainingDirName) stdout, stderr = runScript(scriptChmod, [taskName]) except ScriptException as se: print(se)
# e.g.: # suppose user provide 10, then data set with size 2^3, 2^4, 2^5, ... 2^10 will be generated, # and be stored as separate files in the corresponding folder # $3: indicate min number of tuples in the data-set stdout, stderr = runScript(scriptFormatDir, [taskName, str(inputSize), str(dataSetSize), str(dataSetSizeStart)]) print("============================") print("script msg: \n" + str(stdout)) print("============================") except ScriptException as se: print(se) ######################################### # generate data and write to file # ######################################### genY = trainingFunc(taskName) for dFile in os.listdir(trainingDirName + taskName): dFileFull = trainingDirName + taskName + '/' + dFile # all old files should already be read-only if not os.access(dFileFull, os.W_OK): continue ipSize, dSize = dFile.split("_") ipSize = int(ipSize) dSize = int(dSize) assert ipSize == inputSize assert dSize >= dataSetSizeStart and dSize <= dataSetSize assert os.stat(dFileFull).st_size == 0 f = open(dFileFull, 'w') numEntry = pow(2, dSize) for i in range(0, numEntry):