def config_marge(user_data, marge_output_dir): user_path = user_data['user_path'] marge_input_dir = os.path.join(user_path, 'upload') # back up the original config.json config_file = os.path.join(marge_output_dir, 'config.json') config_file_bak = os.path.join(marge_output_dir, 'config.json.bak') shutil.copyfile(config_file, config_file_bak) with open(config_file) as data_file: data = json.load(data_file) # tools path in MARGE/config.json data["tools"]["MACS2"] = MACS2_path data["tools"]["bedClip"] = bedClip_path data["tools"]["bedGraphToBigWig"] = bedGraphToBigWig_path data["tools"]["bigWigAverageOverBed"] = bigWigAverageOverBed_path data["tools"]["bigWigSummary"] = bigWigSummary_path data["ASSEMBLY"] = user_data["assembly"] data["MARGEdir"] = os.path.join(MARGE_DIR, "marge") data["REFdir"] = os.path.join(MARGE_LIB_DIR, data["ASSEMBLY"] + "_all_reference") if user_data['dataType'] == "ChIP-seq": data["EXPSDIR"] = "" data["EXPS"] = "" data["EXPTYPE"] = "" data["ID"] = "" data["SAMPLESDIR"] = marge_input_dir data["SAMPLES"] = utils.get_files_in_dir("ChIP", data["SAMPLESDIR"]) elif user_data['dataType'] == "Geneset": data["SAMPLESDIR"] = "" data["SAMPLES"] = "" data["EXPSDIR"] = marge_input_dir data["EXPS"] = utils.get_files_in_dir("GeneList", data["EXPSDIR"]) # Gene_Only & Gene_Response data["EXPTYPE"] = user_data["gene_exp_type"] # GeneSymbol & RefSeq data["ID"] = user_data["gene_id_type"] elif user_data['dataType'] == "Both": data["SAMPLESDIR"] = marge_input_dir data["EXPSDIR"] = marge_input_dir data["SAMPLES"] = utils.get_files_in_dir("ChIP", data["SAMPLESDIR"]) data["EXPS"] = utils.get_files_in_dir("GeneList", data["EXPSDIR"]) # Gene_Only & Gene_Response data["EXPTYPE"] = user_data["gene_exp_type"] # GeneSymbol & RefSeq data["ID"] = user_data["gene_id_type"] with open(config_file, 'w') as data_file: json.dump(data, data_file)
def retrieve_tweets(dataset_path, log_path, index_path, apis, from_main=False, main_index=""): if from_main: Split_Index_Per_Day(main_index, index_path) files = get_files_in_dir(index_path) api = apis[0] num = len(apis) n = len(files) for i in range(0, n): file = random.choice(files) files.remove(file) if (api == apis[-1]): print("Wait for a while") time.sleep(240) api = apis[i % num] else: api = apis[i % num] print("----------------\n", join(index_path, file)) build_dataset_file(join(index_path, file), log_path, dataset_path, api)
def evaluate(train=False): dataset_path = 'datasets/dataset' model_path = 'models/model_expectation_bd' test = '2012-10-29.txt' dataset_files = get_files_in_dir(dataset_path) dataset_files.remove(test) features_burst = [ 'Storm', 'Episode', 'Obama', 'Hurricane', 'Sandy', 'Game', 'Football', 'Giants', 'Cowboys', 'Romney', 'Debat', 'Frankenstorm', 'Halloween', 'TheWalkingDead', 'WalkingDead', 'Walking', 'Dead', '#Sandy', '#Hurricanesandy' ] if train: for file in dataset_files: print("Training on file: " + file + "...") run_burst_detection(join(dataset_path, file), model_path) get_and_evaluate(join(dataset_path, test), model_path, features_burst) #run_burst_detection(join(dataset_path, test), model_path,train = False) #evaluate()
def diagnosis_by_fuzzing_entropy(program, fuzzing_dir, entropy_threshold, ratio_min, ratio_max, fuzzed_files_per_iter=10, stop_iter=500, pre_fuzz_count=0): seedfiles_dir = os.path.join(fuzzing_dir, consts.SEEDFILES) matrix_file = None for granularity in [DLL_GRANULARITY, FUNCTION_GRANULARITY, DOMINATOR_GRANULARITY, XREF_GRANULARITY]: instances_dir = utils.clear_dir(os.path.join(fuzzing_dir, consts.INSTANCES, granularity, str(entropy_threshold))) current_entropy = float('inf') previous_entropy = float('-inf') tracing_data = generate_tracing_data(granularity, matrix_file) matrix_file = os.path.join(fuzzing_dir, consts.FUZZING_MATRIX.format("{0}_{1}".format(granularity, str(entropy_threshold)))) if os.path.exists(matrix_file): os.remove(matrix_file) working_dir = utils.clear_dir(os.path.join(fuzzing_dir, consts.WORKING_DIR, granularity, str(entropy_threshold))) diagnosis_result = os.path.join(fuzzing_dir,consts.DLL_DIAGNOSIS_RESULT if granularity == DLL_GRANULARITY else consts.FUNCTION_DIAGNOSIS_RESULT) for seed_example in utils.get_files_in_dir(seedfiles_dir): shutil.copy2(seed_example, instances_dir) instance_path = os.path.join(instances_dir, os.path.basename(seed_example)) run_debugger_on_files(program, [instance_path], working_dir, config, granularity, tracing_data) fuzzed_files = fuzz_project_dir(seedfiles_dir, instances_dir, pre_fuzz_count, ratio_min, ratio_max) run_debugger_on_files(program, fuzzed_files, working_dir, config, granularity, tracing_data) iter_ind = 0 while abs(current_entropy - previous_entropy) > entropy_threshold: fuzzed_files = fuzz_project_dir(seedfiles_dir, instances_dir, fuzzed_files_per_iter, ratio_min, ratio_max) run_debugger_on_files(program, fuzzed_files, working_dir, config, granularity, tracing_data) diagnoser.campaign_matrix.create_matrix_for_dir(working_dir, diagnosis_result, matrix_file) sfl_matrix = readPlanningFile(matrix_file) sfl_matrix.diagnose() results = Diagnosis_Results(sfl_matrix.diagnoses, sfl_matrix.initial_tests, sfl_matrix.error) previous_entropy = current_entropy current_entropy = results.component_entropy iter_ind = iter_ind + 1 if iter_ind > stop_iter: break
def fuzz_project_dir(seedfiles_dir, output_dir, iterations, ratio_min=0.0, ratio_max=1.0): fuzzed_files = [] if not os.path.exists(output_dir): os.mkdir(output_dir) for seed_example in utils.get_files_in_dir(seedfiles_dir): fuzzed_files.extend(fuzz_seed_file(seed_example, output_dir, iterations, ratio_min, ratio_max)) return fuzzed_files
def extract_MusicalFeatures(folder, descriptors, filename): file_count = 0 segment_files = get_files_in_dir(folder) print(segment_files) data_file = os.path.join(folder, filename) with open(data_file, 'w') as writer: #adding column names as the first line in csv line2write = ','.join(descriptors + ['instrument']).replace( 'lowlevel.', '') + '\n' writer.write(line2write) for file in segment_files: if '.wav' in file: file_count += 1 if file_count % 20 == 0: #print name of a file every 20 files print(file_count, "files processed, current file: ", file) features, features_frames = ess.MusicExtractor( lowlevelSilentFrames='drop', lowlevelFrameSize=2048, lowlevelHopSize=1024, lowlevelStats=['mean', 'stdev'])(file) selected_features = [ features[descriptor] for descriptor in descriptors ] instrument = file.split('/')[-1].split( '_')[1].lower()[:-4] #class information line2write = str( selected_features)[1:-1] + ',' + instrument + '\n' writer.write(line2write) print("A total of ", file_count, "files processed")
def process_song_file(cur, dirpath): """Process song file to process song and artist data""" filepaths = get_files_in_dir('data/song_data') df = pd.concat([pd.read_json(f, lines=True) for f in filepaths], ignore_index=True) insert_songs(cur, df) insert_artists(cur, df)
def count_tweets(dataset_path, log_path): files = get_files_in_dir(dataset_path) N_tweets = 0 for file in files: file_ds = open(join(dataset_path, file), mode='r') ds = json.load(file_ds) N_tweets += len(ds) file_ds.close() print('Collected: ' + str(N_tweets)) files = get_files_in_dir(log_path) N_tweets = 0 for file in files: file_ds = open(join(log_path, file), mode='r') ds = json.load(file_ds) N_tweets += ds file_ds.close() print('Listed: ' + str(N_tweets))
def process_log_file(cur, dirpath): """Process log file to process time, user and songplay data""" filepaths = get_files_in_dir('data/log_data') df = pd.concat([pd.read_json(f, lines=True) for f in filepaths], ignore_index=True) df = df[df['page'] == 'NextSong'] insert_time(cur, df) insert_user(cur, df) insert_songplay(cur, df)
parser = argparse.ArgumentParser( description= 'Collect randomply sampling photos as dataset used for CrowdFlower.') parser.add_argument('-d', help='The image data path.') parser.add_argument('-o', help='The output file.') parser.add_argument('-n', help='The number of sampled photos.') parser.add_argument('-m', help='The output mode.') parser.add_argument('-u', help='The URL prefix.') args = parser.parse_args() def write_file(filename, files, prefix, mode='a'): f = open(filename, mode) if mode == 'a': f.write("\n" + prefix) if mode == 'w': f.write(prefix) files.tofile(f, sep="\n" + prefix) f.close() (files, fullpath_files) = utils.get_files_in_dir(args.d, True) if args.n != None: files = files[0:min(len(files), int(args.n))] if args.m == None: args.m = 'a' write_file(args.o, files, args.u, args.m)
def run_do_rest(do_convert_fits, do_photometry, do_match, do_compstars_flag, do_aperture_search, do_lightcurve_flag, do_pos, do_ml, do_lightcurve_plot, do_phase_diagram, do_field_charting, do_reporting, args): if do_convert_fits: logging.info("Converting fits...") write_convert_fits() # either read the previous reference frame or calculate a new one _, _, reference_frame_index = do_calibration.get_reference_frame( 100, do_calibration.select_reference_frame_jpeg) logging.info(f"reference header is {settings.reference_header}") # get wcs model from the reference header. Used in writing world positions and field charts wcs = do_calibration.get_wcs(settings.reference_header) apertures = None aperture = None apertureidx = None if do_photometry: logging.info( f"Writing photometry with config file {settings.conf_phot}...") write_photometry(config_file=settings.conf_phot) if do_match: logging.info("Performing matching...") pool = mp.Pool(init.nr_threads, maxtasksperchild=100) ref_frame = do_calibration.find_reference_photometry( reference_frame_index) file_list = utils.get_files_in_dir(settings.photometrydir) file_list.sort() func = partial(write_match, base_photometry_file=ref_frame) logging.info( f"Writing matches for {len(file_list)} stars with reference frame {ref_frame}" ) trash_and_recreate_dir(settings.matchedphotometrydir) for _ in tqdm.tqdm(pool.imap_unordered(func, file_list, 10), total=len(file_list)): pass if do_aperture_search: logging.info("Searching best aperture...") # getting aperture stddevs = None counts = None # stddevs, _, apertures, apertureidx, _, _, counts = do_aperture.main(the_dir=settings.matchedphotometrydir, percentage=init.aperture_find_percentage) apertures = [x for x in do_aperture.get_apertures()] apertureidx = np.abs(np.array(apertures) - init.aperture).argmin() aperture = apertures[apertureidx] # saving all calculated data np.savetxt(settings.basedir + "apertures.txt", apertures, fmt='%.2f', delimiter=';') np.savetxt(settings.basedir + "apertureidx_best.txt", [apertureidx], fmt='%d') logging.debug("Done writing aperture search results") else: logging.info("Loading best aperture...") apertures, apertureidx, aperture = reading.read_aperture() logging.info(f"aperture: {aperture}, apertures:{apertures}") if do_pos: logging.info( "Writing positions of all stars on the reference image...") reference_matched = do_calibration.find_reference_matched( reference_frame_index) logging.info(f"reference match is {reference_matched}") do_write_pos(init.star_list, aperture, reference_matched, is_resume=False) do_world_pos(wcs, init.star_list, reference_frame_index) if do_ml: logging.info("Doing ML detection of variable stars...") import do_upsilon # do it here because it takes some time at startup do_upsilon.run(init.star_list) if (do_lightcurve_flag or do_field_charting): logging.info("Loading photometry...") jd, fwhm, nrstars, star_result = read_photometry.read_photometry( init.star_list, apertureidx) # costruction of the star descriptions list comparison_stars_1, comparison_stars_1_desc, star_descriptions = construct_star_descriptions( args, do_compstars_flag) if do_lightcurve_flag: logging.info(f"Writing lightcurves...") chosen_stars = [x.local_id for x in star_descriptions] do_lightcurve.write_lightcurves(chosen_stars, comparison_stars_1, aperture, int(apertureidx), jd, fwhm, star_result) if do_lightcurve_plot or do_phase_diagram: logging.info("starting charting / phase diagrams...") do_charts.run(star_descriptions, comparison_stars_1_desc, do_lightcurve_plot, do_phase_diagram) if do_field_charting: logging.info("Starting field chart plotting...") vsx_stars = list(filter(vsx_filter, star_descriptions)) do_charts_field.run_standard_field_charts(vsx_stars, wcs) # do_charts_stats.main(fwhm) # import code # code.InteractiveConsole(locals=dict(globals(), **locals())).interact() if do_reporting: # star_descriptions_ucac4 = do_calibration.add_ucac4_to_star_descriptions(star_descriptions) vsx_stars = list(filter(vsx_filter, star_descriptions)) logging.info(f"AAVSO Reporting with: {len(vsx_stars)} stars") trash_and_recreate_dir(settings.aavsoreportsdir) for star in vsx_stars: do_aavso_report.report(settings.aavsoreportsdir, star, comparison_stars_1_desc[0])
from utils import get_file_read, update_file, get_files_in_dir, split_path, clear_path, add_and_get_file if __name__ == '__main__': test = int(sys.argv[1]) if test == 0: test_path = SRC_PATH + '/FS_3' print(get_file_read("D1/1.1", test_path)) if test == 1: test_path = SRC_PATH + '/' + 'cache' file_name = 'D1/1.1' file_content = 'Hello test' print(update_file(file_name, test_path, file_content)) if test == 2: test_path = SRC_PATH + '/' + 'FS_1/D1' print(get_files_in_dir(test_path)) elif test == 3: path = 'D1/1.1' print(split_path(path)[1]) elif test == 4: test_path = SRC_PATH + '/' + 'temp' print(clear_path(test_path)) elif test == 5: test_path = SRC_PATH + '/' + 'temp' file_name = 'D1/1.1' f = add_and_get_file(file_name, test_path) print(f.read())
def hierarchical_diagnosis(program, fuzzing_dir, is_continuous): """ diagnose the program in few hierarchical steps: 1) dll diagnoses 1.1*) dll entry points diagnoses 2) function diagnoses 3) xref diagnoses :param program: program to diagnose :param fuzzing_dir: working dir :param is_continuous: whether to use known bugs or use the bugs from previous step :return: """ init_dirs(fuzzing_dir) seedfiles_dir = os.path.join(fuzzing_dir, consts.SEEDFILES) exploit_dir = os.path.join(fuzzing_dir, consts.EXPLOIT_DIR) utils.copy_files_to_dir(exploit_dir, seedfiles_dir) instances_dir = os.path.join(fuzzing_dir, consts.INSTANCES) config = yaml.load(open(os.path.join(fuzzing_dir, "config.yaml"))) dll_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.DLL_WORKING_DIR)) dll_matrix_file = os.path.join(fuzzing_dir, consts.DLL_MATRIX) function_matrix_file = os.path.join(fuzzing_dir, consts.FUNCTION_MATRIX) dominator_matrix_file = os.path.join(fuzzing_dir, consts.DOMINATOR_MATRIX) # entry_points_file = os.path.join(fuzzing_dir, consts.ENTRY_POINTS_MATRIX) utils.copy_files_to_dir(seedfiles_dir, instances_dir) utils.copy_files_to_dir(consts.EXAMPLES_DIR, instances_dir) fuzz_project_dir(seedfiles_dir, instances_dir, consts.FUZZ_ITERATIONS) # dll diagnoses run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), dll_working_dir, config, DLL_GRANULARITY, None, None) diagnoser.campaign_matrix.create_matrix_for_dir(dll_working_dir, os.path.join(fuzzing_dir, consts.DLL_DIAGNOSIS_RESULT), dll_matrix_file) dll_instance = readPlanningFile(dll_matrix_file) dll_instance.diagnose() # # # # # # # # entry points diagnoses # # # # named_diagnoses = filter(lambda diag: diag.probability > consts.DLL_DIAGNOSIS_THRESHOLD or True, # # # # dll_instance.get_named_diagnoses()) # # # # entry_points_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.ENTRY_POINTS_WORKING_DIR)) # # # # run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), entry_points_working_dir, config, # # # # ENTRY_POINTS_GRANULARITY, # # # # get_binaries_to_diagnose(named_diagnoses, config), None) # # # # diagnoser.campaign_matrix.create_matrix_for_dir(entry_points_working_dir, # # # # os.path.join(fuzzing_dir, consts.ENTRY_POINTS_DIAGNOSIS_RESULT), # # # # entry_points_file) # # # # entry_points_instance = readPlanningFile(entry_points_file) # # # # entry_points_instance.diagnose() # # # # # # # function diagnosis named_diagnoses = filter(lambda diag: diag.probability > consts.DLL_DIAGNOSIS_THRESHOLD,dll_instance.get_named_diagnoses()) binaries_to_diagnose = get_binaries_to_diagnose(named_diagnoses, config) function_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.FUNCTION_WORKING_DIR)) run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), function_working_dir, config, FUNCTION_GRANULARITY, binaries_to_diagnose, None) diagnoser.campaign_matrix.create_matrix_for_dir(function_working_dir, os.path.join(fuzzing_dir, consts.FUNCTION_DIAGNOSIS_RESULT), function_matrix_file) function_instance = readPlanningFile(function_matrix_file) function_instance.diagnose() # dominators diagnosis diagnosed_components = filter(lambda x: '&' in x and "crt" not in x and "sub_" not in x and "asan" not in x ,map(lambda x: x[0], function_instance.get_components_probabilities_by_name())) tracing_data = {} for comp in diagnosed_components: dll = comp.split('#')[1] address = comp.split('&')[0] tracing_data.setdefault(dll, []).append(address) dominator_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.DOMINATOR_WORKING_DIR)) run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), dominator_working_dir , config, DOMINATOR_GRANULARITY, binaries_to_diagnose, tracing_data) diagnoser.campaign_matrix.create_matrix_for_dir(dominator_working_dir, os.path.join(fuzzing_dir, consts.FUNCTION_DIAGNOSIS_RESULT), dominator_matrix_file) dominator_instance = readPlanningFile(dominator_matrix_file) dominator_instance.diagnose() # xref diagnosis diagnosed_components = map(lambda x: x[0], filter(lambda x: '&' in x[0] and x[1] > 0.01, dominator_instance.get_components_probabilities_by_name())) diagnosed_components = map(lambda x: x[0], filter(lambda x: '&' in x[0], sorted(dominator_instance.get_components_probabilities_by_name(), key=lambda x: x[1],reverse=True))[:20]) tracing_data = {} for comp in diagnosed_components: address, function_dll = comp.split('&') print function_dll tracing_data.setdefault(function_dll, []).extend(address.split("+")) xref_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.XREF_WORKING_DIR)) run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), xref_working_dir, config, XREF_GRANULARITY, binaries_to_diagnose, tracing_data) diagnoser.campaign_matrix.create_matrix_for_dir(xref_working_dir, os.path.join(fuzzing_dir, consts.FUNCTION_DIAGNOSIS_RESULT), os.path.join(fuzzing_dir, consts.XREF_MATRIX))
def train(args): X = [] y = [] train_dir = args.traindir detector = dlib.get_frontal_face_detector() sp = dlib.shape_predictor("models/shape_predictor_5_face_landmarks.dat") facerec = dlib.face_recognition_model_v1( "models/dlib_face_recognition_resnet_model_v1.dat") if len(utils.get_files_in_dir(train_dir, '.csv')) != 0: # train using csv files print('Training using .csv files.') preds_per_person = utils.load_faces_from_csv(train_dir) for p in preds_per_person: if p != 'unknown': for l in preds_per_person[p]: X.append(l[2]) y.append(p) if len(X) == 0: print('No faces found in database {}'.format(train_dir)) return elif len(os.listdir(train_dir)) != 0: # train using train folder # Loop through each person in the training set print('Training using faces in subfolders.') for class_dir in os.listdir(train_dir): if not os.path.isdir(os.path.join(train_dir, class_dir)): continue images = utils.get_images_in_dir(os.path.join( train_dir, class_dir)) if len(images) == 0: continue print('adding {} to training data'.format(class_dir)) # Loop through each training image for the current person for img_path in images: locations, descriptors = utils.detect_faces_in_image( img_path, detector, facerec, sp, use_entire_image=True) # Add face descriptor for current image to the training set X.append(descriptors[0]) y.append(class_dir) print('{} faces used for training'.format(len(images))) else: print('Training directory does not contain valid training data.') return # Determine how many neighbors to use for weighting in the KNN classifier n_neighbors = int(round(math.sqrt(len(X)))) print("Chose n_neighbors automatically:", n_neighbors) # Create and train the KNN classifier print("Training model with KNN ...") knn_clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='ball_tree', weights='distance') knn_clf.fit(X, y) # Save the trained KNN classifier with open(os.path.join(args.outdir, 'knn.clf'), 'wb') as f: pickle.dump(knn_clf, f) # train the svm print("Training model with an SVM ...") recognizer = SVC(C=1.0, kernel="linear", probability=True) recognizer.fit(X, y) # Save the trained SVM with open(os.path.join(args.outdir, 'svm.clf'), 'wb') as f: pickle.dump(recognizer, f) print('Trained models with {} faces'.format(len(X)))
def run_and_diagnose(program, instances_dir, working_dir, diagnosis_result, diagnosis_matrix, granularity, config, binaries_to_diagnose, tracing_data): run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), working_dir, config, granularity, binaries_to_diagnose, tracing_data) diagnoser.campaign_matrix.create_matrix_for_dir(working_dir, diagnosis_result, diagnosis_matrix)
# add page number text = "{}".format(pdf.page_no()) pdf.set_xy(cover.left, cover.top) pdf.multi_cell(cover.width, cover.height, text, align="C") else: text = "{}".format(pdf.page_no()) pdf.set_xy(pdf.w - 1, pdf.h - 1) pdf.cell(0, 0, text, align="C") return pdf from PIL import Image if __name__ == "__main__": files = list( get_files_in_dir("C:/Users/andris/Desktop/00066_stabilized_adjusted")) step = int(len(files) / 20) files = files[::step] n = len(files) print("create pdf...") pdf = PDF(orientation='P', unit='in', format='A4') pdf.line_width = 0 print("create rectangles...") image = Image.open(files[0]) image_rect = Rectangle(0, 0, image.height, image.width) page_rect = Rectangle(0, 0, pdf.h, pdf.w) print_rect = page_rect.shrink(0.5) rect = image_rect.fitIn(print_rect) rectangles = [persp(rect, k=400, distance=i * step) for i in range(n)]
import utils parser = argparse.ArgumentParser(description = 'Collect randomply sampling photos as dataset used for CrowdFlower.') parser.add_argument('-d', help = 'The image data path.') parser.add_argument('-o', help = 'The output file.') parser.add_argument('-n', help = 'The number of sampled photos.') parser.add_argument('-m', help = 'The output mode.') parser.add_argument('-u', help = 'The URL prefix.') args = parser.parse_args() def write_file(filename, files, prefix, mode = 'a'): f = open(filename, mode) if mode == 'a': f.write("\n" + prefix) if mode == 'w': f.write(prefix) files.tofile(f, sep = "\n" + prefix) f.close() (files, fullpath_files) = utils.get_files_in_dir(args.d, True) if args.n != None: files = files[0:min(len(files), int(args.n))] if args.m == None: args.m = 'a' write_file(args.o, files, args.u, args.m)
def load_training_batch(load_path): batch_size = 1 # batch_size always 1, feed one gif """ Get gif array: [batch_size, pic_in_gif, 240, 240, 3] coreesponding action: [batch_size, pic_in_gif, 2] """ print('in load_training_batch') all_gif_names = get_files_in_dir(root_path=load_path, suffix='.gif') # print(all_gif_names) print('len(all_gif_names) = ' + str(len(all_gif_names))) for start in range(0, len(all_gif_names), batch_size): end = min(start + batch_size, len(all_gif_names)) # print('Batch start(%3d) -> end(%3d)' %(start,end)) batch_gif_names = all_gif_names[start:end] # get batch_gifs [batch_size, pic_in_gif, 240, 240, 3] gifs_dict = {} gifs_dict = { gif_name: imageio.mimread(gif_name) for gif_name in batch_gif_names } # gifs = [imageio.mimread(gif_name) for gif_name in batch_gif_names] che10000ck_all_gif_shape = True gifs_rgb = [] # for g in gifs: for name, g in gifs_dict.items(): g_ary = np.array(g, dtype='f') check_all_gif_shape = True # print('g_ary.shape', g_ary.shape) if len(g_ary.shape) < 4: # print('Strange g_ary shape') print( 'Strange g_ary shape, name = {}, g_ary.shape = {}'.format( name, g_ary.shape)) check_all_gif_shape = False continue try: g_ary = g_ary[:, :, :, :3] g_ary[:, :, :, 0] -= 103.939 g_ary[:, :, :, 1] -= 116.779 g_ary[:, :, :, 2] -= 123.68 # g_ary.astype(float32) gifs_rgb.append(g_ary) except Exception as e: # double check, maybe no need print('g_ary = g_ary[:3] Exception is ' + str(e)) print('g_ary.shape = ' + str(g_ary.shape)) # print(batch_gif_names) print('name = ' + name) check_all_gif_shape = False #abandon this batch if not check_all_gif_shape: print('Abandon this batch from start(%3d) -> end(%3d)' % (start, end)) continue batch_gifs_normal = np.array(gifs_rgb) # print('batch_gifs_normal.shape = %s ' % str(batch_gifs_normal.shape)) # get batch_actions [batch_size, pic_in_gif, 2] batch_actions = [ np.loadtxt( join(load_path, '{}.csv'.format(gif_name.split('/')[-1].split('.')[0]))) for gif_name in batch_gif_names ] batch_actions = np.array(batch_actions) # print('batch_actions.shape = %s ' % str(batch_actions.shape)) # [[1,2]] # batch_gifs_normal.shape = 1,20, 100,100,3 if batch_size == 1: batch_gifs_normal = np.squeeze(batch_gifs_normal) batch_actions = np.squeeze(batch_actions) yield batch_gifs_normal, batch_actions
data = {'dir_port': SERVER_PORT} requests.post(server_init_url, data=json.dumps(data), headers=cf.JSON_HEADER) except: print('Registry server not ready') sys.exit(1) #Get dir server port try: dir_port_url = format_registry_req('dir_server', cf.REGISTRY_SERVER_PORT) response = json.loads(requests.get(dir_port_url).content.decode()) dir_server_port = response['dir_port'] if str(dir_server_port) == str(-1): sys.exit() except: print('No directory port up yet') sys.exit() #Get files supported by file server file_names = get_files_in_dir(FILE_SERVER_PATH) data = {'file_names': file_names} #Send batch of files url = format_node_req(SERVER_PORT, dir_server_port) requests.post(url, data=json.dumps(data), headers=cf.JSON_HEADER) app.run(host='0.0.0.0', port=SERVER_PORT, debug=False) print("File server node running on port: ", SERVER_PORT)