예제 #1
0
def config_marge(user_data, marge_output_dir):
    user_path = user_data['user_path']
    marge_input_dir = os.path.join(user_path, 'upload')

    # back up the original config.json
    config_file = os.path.join(marge_output_dir, 'config.json')
    config_file_bak = os.path.join(marge_output_dir, 'config.json.bak')
    shutil.copyfile(config_file, config_file_bak)

    with open(config_file) as data_file:
        data = json.load(data_file)

    # tools path in MARGE/config.json
    data["tools"]["MACS2"] = MACS2_path
    data["tools"]["bedClip"] = bedClip_path
    data["tools"]["bedGraphToBigWig"] = bedGraphToBigWig_path
    data["tools"]["bigWigAverageOverBed"] = bigWigAverageOverBed_path
    data["tools"]["bigWigSummary"] = bigWigSummary_path

    data["ASSEMBLY"] = user_data["assembly"]
    data["MARGEdir"] = os.path.join(MARGE_DIR, "marge")
    data["REFdir"] = os.path.join(MARGE_LIB_DIR,
                                  data["ASSEMBLY"] + "_all_reference")

    if user_data['dataType'] == "ChIP-seq":
        data["EXPSDIR"] = ""
        data["EXPS"] = ""
        data["EXPTYPE"] = ""
        data["ID"] = ""
        data["SAMPLESDIR"] = marge_input_dir
        data["SAMPLES"] = utils.get_files_in_dir("ChIP", data["SAMPLESDIR"])
    elif user_data['dataType'] == "Geneset":
        data["SAMPLESDIR"] = ""
        data["SAMPLES"] = ""
        data["EXPSDIR"] = marge_input_dir
        data["EXPS"] = utils.get_files_in_dir("GeneList", data["EXPSDIR"])
        # Gene_Only & Gene_Response
        data["EXPTYPE"] = user_data["gene_exp_type"]
        # GeneSymbol & RefSeq
        data["ID"] = user_data["gene_id_type"]
    elif user_data['dataType'] == "Both":
        data["SAMPLESDIR"] = marge_input_dir
        data["EXPSDIR"] = marge_input_dir
        data["SAMPLES"] = utils.get_files_in_dir("ChIP", data["SAMPLESDIR"])
        data["EXPS"] = utils.get_files_in_dir("GeneList", data["EXPSDIR"])
        # Gene_Only & Gene_Response
        data["EXPTYPE"] = user_data["gene_exp_type"]
        # GeneSymbol & RefSeq
        data["ID"] = user_data["gene_id_type"]

    with open(config_file, 'w') as data_file:
        json.dump(data, data_file)
예제 #2
0
def retrieve_tweets(dataset_path,
                    log_path,
                    index_path,
                    apis,
                    from_main=False,
                    main_index=""):

    if from_main:
        Split_Index_Per_Day(main_index, index_path)

    files = get_files_in_dir(index_path)
    api = apis[0]

    num = len(apis)

    n = len(files)

    for i in range(0, n):
        file = random.choice(files)
        files.remove(file)
        if (api == apis[-1]):
            print("Wait for a while")
            time.sleep(240)
            api = apis[i % num]
        else:
            api = apis[i % num]
        print("----------------\n", join(index_path, file))
        build_dataset_file(join(index_path, file), log_path, dataset_path, api)
예제 #3
0
def evaluate(train=False):
    dataset_path = 'datasets/dataset'
    model_path = 'models/model_expectation_bd'
    test = '2012-10-29.txt'
    dataset_files = get_files_in_dir(dataset_path)
    dataset_files.remove(test)

    features_burst = [
        'Storm', 'Episode', 'Obama', 'Hurricane', 'Sandy', 'Game', 'Football',
        'Giants', 'Cowboys', 'Romney', 'Debat', 'Frankenstorm', 'Halloween',
        'TheWalkingDead', 'WalkingDead', 'Walking', 'Dead', '#Sandy',
        '#Hurricanesandy'
    ]

    if train:
        for file in dataset_files:
            print("Training on file: " + file + "...")
            run_burst_detection(join(dataset_path, file), model_path)

    get_and_evaluate(join(dataset_path, test), model_path, features_burst)

    #run_burst_detection(join(dataset_path, test), model_path,train = False)


#evaluate()
예제 #4
0
def diagnosis_by_fuzzing_entropy(program, fuzzing_dir, entropy_threshold, ratio_min, ratio_max,
                                 fuzzed_files_per_iter=10, stop_iter=500, pre_fuzz_count=0):
    seedfiles_dir = os.path.join(fuzzing_dir, consts.SEEDFILES)
    matrix_file = None
    for granularity in [DLL_GRANULARITY, FUNCTION_GRANULARITY, DOMINATOR_GRANULARITY, XREF_GRANULARITY]:
        instances_dir = utils.clear_dir(os.path.join(fuzzing_dir, consts.INSTANCES, granularity, str(entropy_threshold)))
        current_entropy = float('inf')
        previous_entropy = float('-inf')
        tracing_data = generate_tracing_data(granularity, matrix_file)
        matrix_file = os.path.join(fuzzing_dir, consts.FUZZING_MATRIX.format("{0}_{1}".format(granularity, str(entropy_threshold))))
        if os.path.exists(matrix_file):
            os.remove(matrix_file)
        working_dir = utils.clear_dir(os.path.join(fuzzing_dir, consts.WORKING_DIR, granularity, str(entropy_threshold)))
        diagnosis_result = os.path.join(fuzzing_dir,consts.DLL_DIAGNOSIS_RESULT if granularity == DLL_GRANULARITY else consts.FUNCTION_DIAGNOSIS_RESULT)
        for seed_example in utils.get_files_in_dir(seedfiles_dir):
            shutil.copy2(seed_example, instances_dir)
            instance_path = os.path.join(instances_dir, os.path.basename(seed_example))
            run_debugger_on_files(program, [instance_path], working_dir, config, granularity, tracing_data)
        fuzzed_files = fuzz_project_dir(seedfiles_dir, instances_dir, pre_fuzz_count, ratio_min, ratio_max)
        run_debugger_on_files(program, fuzzed_files, working_dir, config, granularity, tracing_data)
        iter_ind = 0
        while abs(current_entropy - previous_entropy) > entropy_threshold:
            fuzzed_files = fuzz_project_dir(seedfiles_dir, instances_dir, fuzzed_files_per_iter, ratio_min, ratio_max)
            run_debugger_on_files(program, fuzzed_files, working_dir, config, granularity, tracing_data)
            diagnoser.campaign_matrix.create_matrix_for_dir(working_dir, diagnosis_result, matrix_file)
            sfl_matrix = readPlanningFile(matrix_file)
            sfl_matrix.diagnose()
            results = Diagnosis_Results(sfl_matrix.diagnoses, sfl_matrix.initial_tests, sfl_matrix.error)
            previous_entropy = current_entropy
            current_entropy = results.component_entropy
            iter_ind = iter_ind + 1
            if iter_ind > stop_iter:
                break
def fuzz_project_dir(seedfiles_dir, output_dir, iterations, ratio_min=0.0, ratio_max=1.0):
    fuzzed_files = []
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    for seed_example in utils.get_files_in_dir(seedfiles_dir):
        fuzzed_files.extend(fuzz_seed_file(seed_example, output_dir, iterations, ratio_min, ratio_max))
    return fuzzed_files
def extract_MusicalFeatures(folder, descriptors, filename):
    file_count = 0
    segment_files = get_files_in_dir(folder)
    print(segment_files)
    data_file = os.path.join(folder, filename)

    with open(data_file, 'w') as writer:

        #adding column names as the first line in csv
        line2write = ','.join(descriptors + ['instrument']).replace(
            'lowlevel.', '') + '\n'
        writer.write(line2write)
        for file in segment_files:
            if '.wav' in file:
                file_count += 1
                if file_count % 20 == 0:  #print name of a file every 20 files
                    print(file_count, "files processed, current file: ", file)
                features, features_frames = ess.MusicExtractor(
                    lowlevelSilentFrames='drop',
                    lowlevelFrameSize=2048,
                    lowlevelHopSize=1024,
                    lowlevelStats=['mean', 'stdev'])(file)
                selected_features = [
                    features[descriptor] for descriptor in descriptors
                ]
                instrument = file.split('/')[-1].split(
                    '_')[1].lower()[:-4]  #class information
                line2write = str(
                    selected_features)[1:-1] + ',' + instrument + '\n'
                writer.write(line2write)
    print("A total of ", file_count, "files processed")
예제 #7
0
def process_song_file(cur, dirpath):
    """Process song file to process song and artist data"""
    filepaths = get_files_in_dir('data/song_data')
    df = pd.concat([pd.read_json(f, lines=True) for f in filepaths],
                   ignore_index=True)

    insert_songs(cur, df)
    insert_artists(cur, df)
예제 #8
0
def count_tweets(dataset_path, log_path):
    files = get_files_in_dir(dataset_path)
    N_tweets = 0

    for file in files:
        file_ds = open(join(dataset_path, file), mode='r')
        ds = json.load(file_ds)
        N_tweets += len(ds)
        file_ds.close()
    print('Collected: ' + str(N_tweets))

    files = get_files_in_dir(log_path)
    N_tweets = 0

    for file in files:
        file_ds = open(join(log_path, file), mode='r')
        ds = json.load(file_ds)
        N_tweets += ds
        file_ds.close()
    print('Listed: ' + str(N_tweets))
예제 #9
0
def process_log_file(cur, dirpath):
    """Process log file to process time, user and songplay data"""
    filepaths = get_files_in_dir('data/log_data')
    df = pd.concat([pd.read_json(f, lines=True) for f in filepaths],
                   ignore_index=True)

    df = df[df['page'] == 'NextSong']

    insert_time(cur, df)
    insert_user(cur, df)
    insert_songplay(cur, df)
예제 #10
0
parser = argparse.ArgumentParser(
    description=
    'Collect randomply sampling photos as dataset used for CrowdFlower.')
parser.add_argument('-d', help='The image data path.')
parser.add_argument('-o', help='The output file.')
parser.add_argument('-n', help='The number of sampled photos.')
parser.add_argument('-m', help='The output mode.')
parser.add_argument('-u', help='The URL prefix.')

args = parser.parse_args()


def write_file(filename, files, prefix, mode='a'):

    f = open(filename, mode)
    if mode == 'a':
        f.write("\n" + prefix)
    if mode == 'w':
        f.write(prefix)
    files.tofile(f, sep="\n" + prefix)
    f.close()


(files, fullpath_files) = utils.get_files_in_dir(args.d, True)
if args.n != None:
    files = files[0:min(len(files), int(args.n))]
if args.m == None:
    args.m = 'a'
write_file(args.o, files, args.u, args.m)
예제 #11
0
def run_do_rest(do_convert_fits, do_photometry, do_match, do_compstars_flag,
                do_aperture_search, do_lightcurve_flag, do_pos, do_ml,
                do_lightcurve_plot, do_phase_diagram, do_field_charting,
                do_reporting, args):
    if do_convert_fits:
        logging.info("Converting fits...")
        write_convert_fits()

    # either read the previous reference frame or calculate a new one
    _, _, reference_frame_index = do_calibration.get_reference_frame(
        100, do_calibration.select_reference_frame_jpeg)

    logging.info(f"reference header is {settings.reference_header}")
    # get wcs model from the reference header. Used in writing world positions and field charts
    wcs = do_calibration.get_wcs(settings.reference_header)
    apertures = None
    aperture = None
    apertureidx = None

    if do_photometry:
        logging.info(
            f"Writing photometry with config file {settings.conf_phot}...")
        write_photometry(config_file=settings.conf_phot)

    if do_match:
        logging.info("Performing matching...")
        pool = mp.Pool(init.nr_threads, maxtasksperchild=100)
        ref_frame = do_calibration.find_reference_photometry(
            reference_frame_index)
        file_list = utils.get_files_in_dir(settings.photometrydir)
        file_list.sort()
        func = partial(write_match, base_photometry_file=ref_frame)
        logging.info(
            f"Writing matches for {len(file_list)} stars with reference frame {ref_frame}"
        )
        trash_and_recreate_dir(settings.matchedphotometrydir)
        for _ in tqdm.tqdm(pool.imap_unordered(func, file_list, 10),
                           total=len(file_list)):
            pass

    if do_aperture_search:
        logging.info("Searching best aperture...")
        # getting aperture
        stddevs = None
        counts = None
        # stddevs, _, apertures, apertureidx, _, _, counts = do_aperture.main(the_dir=settings.matchedphotometrydir, percentage=init.aperture_find_percentage)
        apertures = [x for x in do_aperture.get_apertures()]
        apertureidx = np.abs(np.array(apertures) - init.aperture).argmin()
        aperture = apertures[apertureidx]
        # saving all calculated data
        np.savetxt(settings.basedir + "apertures.txt",
                   apertures,
                   fmt='%.2f',
                   delimiter=';')
        np.savetxt(settings.basedir + "apertureidx_best.txt", [apertureidx],
                   fmt='%d')
        logging.debug("Done writing aperture search results")
    else:
        logging.info("Loading best aperture...")
        apertures, apertureidx, aperture = reading.read_aperture()
        logging.info(f"aperture: {aperture}, apertures:{apertures}")

    if do_pos:
        logging.info(
            "Writing positions of all stars on the reference image...")
        reference_matched = do_calibration.find_reference_matched(
            reference_frame_index)
        logging.info(f"reference match is {reference_matched}")
        do_write_pos(init.star_list,
                     aperture,
                     reference_matched,
                     is_resume=False)
        do_world_pos(wcs, init.star_list, reference_frame_index)

    if do_ml:
        logging.info("Doing ML detection of variable stars...")
        import do_upsilon  # do it here because it takes some time at startup
        do_upsilon.run(init.star_list)

    if (do_lightcurve_flag or do_field_charting):
        logging.info("Loading photometry...")
        jd, fwhm, nrstars, star_result = read_photometry.read_photometry(
            init.star_list, apertureidx)

    # costruction of the star descriptions list
    comparison_stars_1, comparison_stars_1_desc, star_descriptions = construct_star_descriptions(
        args, do_compstars_flag)
    if do_lightcurve_flag:
        logging.info(f"Writing lightcurves...")
        chosen_stars = [x.local_id for x in star_descriptions]
        do_lightcurve.write_lightcurves(chosen_stars, comparison_stars_1,
                                        aperture, int(apertureidx), jd, fwhm,
                                        star_result)

    if do_lightcurve_plot or do_phase_diagram:
        logging.info("starting charting / phase diagrams...")
        do_charts.run(star_descriptions, comparison_stars_1_desc,
                      do_lightcurve_plot, do_phase_diagram)

    if do_field_charting:
        logging.info("Starting field chart plotting...")
        vsx_stars = list(filter(vsx_filter, star_descriptions))
        do_charts_field.run_standard_field_charts(vsx_stars, wcs)
        # do_charts_stats.main(fwhm)

    # import code
    # code.InteractiveConsole(locals=dict(globals(), **locals())).interact()
    if do_reporting:
        # star_descriptions_ucac4 = do_calibration.add_ucac4_to_star_descriptions(star_descriptions)
        vsx_stars = list(filter(vsx_filter, star_descriptions))
        logging.info(f"AAVSO Reporting with: {len(vsx_stars)} stars")
        trash_and_recreate_dir(settings.aavsoreportsdir)
        for star in vsx_stars:
            do_aavso_report.report(settings.aavsoreportsdir, star,
                                   comparison_stars_1_desc[0])
예제 #12
0
from utils import get_file_read, update_file, get_files_in_dir, split_path, clear_path, add_and_get_file

if __name__ == '__main__':
    test = int(sys.argv[1])
    if test == 0:
        test_path = SRC_PATH + '/FS_3'
        print(get_file_read("D1/1.1", test_path))

    if test == 1:
        test_path = SRC_PATH + '/' + 'cache'
        file_name = 'D1/1.1'
        file_content = 'Hello test'
        print(update_file(file_name, test_path, file_content))

    if test == 2:
        test_path = SRC_PATH + '/' + 'FS_1/D1'
        print(get_files_in_dir(test_path))
    elif test == 3:
        path = 'D1/1.1'
        print(split_path(path)[1])

    elif test == 4:
        test_path = SRC_PATH + '/' + 'temp'
        print(clear_path(test_path))

    elif test == 5:
        test_path = SRC_PATH + '/' + 'temp'
        file_name = 'D1/1.1'
        f = add_and_get_file(file_name, test_path)
        print(f.read())
예제 #13
0
def hierarchical_diagnosis(program, fuzzing_dir, is_continuous):
    """
    diagnose the program in few hierarchical steps:
    1) dll diagnoses
    1.1*) dll entry points diagnoses
    2) function diagnoses
    3) xref diagnoses
    :param program: program to diagnose
    :param fuzzing_dir: working dir
    :param is_continuous: whether to use known bugs or use the bugs from previous step
    :return:
    """
    init_dirs(fuzzing_dir)
    seedfiles_dir = os.path.join(fuzzing_dir, consts.SEEDFILES)
    exploit_dir = os.path.join(fuzzing_dir, consts.EXPLOIT_DIR)
    utils.copy_files_to_dir(exploit_dir, seedfiles_dir)
    instances_dir = os.path.join(fuzzing_dir, consts.INSTANCES)
    config = yaml.load(open(os.path.join(fuzzing_dir, "config.yaml")))
    dll_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.DLL_WORKING_DIR))
    dll_matrix_file = os.path.join(fuzzing_dir, consts.DLL_MATRIX)
    function_matrix_file = os.path.join(fuzzing_dir, consts.FUNCTION_MATRIX)
    dominator_matrix_file = os.path.join(fuzzing_dir, consts.DOMINATOR_MATRIX)
    # entry_points_file = os.path.join(fuzzing_dir, consts.ENTRY_POINTS_MATRIX)
    utils.copy_files_to_dir(seedfiles_dir, instances_dir)
    utils.copy_files_to_dir(consts.EXAMPLES_DIR, instances_dir)
    fuzz_project_dir(seedfiles_dir, instances_dir, consts.FUZZ_ITERATIONS)

    # dll diagnoses
    run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), dll_working_dir, config, DLL_GRANULARITY,
                          None, None)
    diagnoser.campaign_matrix.create_matrix_for_dir(dll_working_dir, os.path.join(fuzzing_dir, consts.DLL_DIAGNOSIS_RESULT),
                                                    dll_matrix_file)
    dll_instance = readPlanningFile(dll_matrix_file)
    dll_instance.diagnose()
    # # #
    # # # # # entry points diagnoses
    # # # # named_diagnoses = filter(lambda diag: diag.probability > consts.DLL_DIAGNOSIS_THRESHOLD or True,
    # # # #                          dll_instance.get_named_diagnoses())
    # # # # entry_points_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.ENTRY_POINTS_WORKING_DIR))
    # # # # run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), entry_points_working_dir, config,
    # # # #                       ENTRY_POINTS_GRANULARITY,
    # # # #                       get_binaries_to_diagnose(named_diagnoses, config), None)
    # # # # diagnoser.campaign_matrix.create_matrix_for_dir(entry_points_working_dir,
    # # # #                                                 os.path.join(fuzzing_dir, consts.ENTRY_POINTS_DIAGNOSIS_RESULT),
    # # # #                                                 entry_points_file)
    # # # # entry_points_instance = readPlanningFile(entry_points_file)
    # # # # entry_points_instance.diagnose()
    # # #
    # # # # function diagnosis
    named_diagnoses = filter(lambda diag: diag.probability > consts.DLL_DIAGNOSIS_THRESHOLD,dll_instance.get_named_diagnoses())
    binaries_to_diagnose = get_binaries_to_diagnose(named_diagnoses, config)
    function_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.FUNCTION_WORKING_DIR))
    run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), function_working_dir, config, FUNCTION_GRANULARITY,
                          binaries_to_diagnose, None)
    diagnoser.campaign_matrix.create_matrix_for_dir(function_working_dir, os.path.join(fuzzing_dir,
                                                                                       consts.FUNCTION_DIAGNOSIS_RESULT),
                                                    function_matrix_file)

    function_instance = readPlanningFile(function_matrix_file)
    function_instance.diagnose()

    # dominators diagnosis
    diagnosed_components = filter(lambda x: '&' in x and "crt" not in x and "sub_" not in x and "asan" not in x
                                  ,map(lambda x: x[0], function_instance.get_components_probabilities_by_name()))
    tracing_data = {}
    for comp in diagnosed_components:
        dll = comp.split('#')[1]
        address = comp.split('&')[0]
        tracing_data.setdefault(dll, []).append(address)
    dominator_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.DOMINATOR_WORKING_DIR))
    run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), dominator_working_dir , config, DOMINATOR_GRANULARITY, binaries_to_diagnose, tracing_data)
    diagnoser.campaign_matrix.create_matrix_for_dir(dominator_working_dir, os.path.join(fuzzing_dir,
                                                                                   consts.FUNCTION_DIAGNOSIS_RESULT),
                                                    dominator_matrix_file)
    dominator_instance = readPlanningFile(dominator_matrix_file)
    dominator_instance.diagnose()

    # xref diagnosis
    diagnosed_components = map(lambda x: x[0],
                               filter(lambda x: '&' in x[0] and x[1] > 0.01,
                                      dominator_instance.get_components_probabilities_by_name()))
    diagnosed_components = map(lambda x: x[0], filter(lambda x: '&' in x[0],
                                  sorted(dominator_instance.get_components_probabilities_by_name(), key=lambda x: x[1],reverse=True))[:20])
    tracing_data = {}
    for comp in diagnosed_components:
        address, function_dll = comp.split('&')
        print function_dll
        tracing_data.setdefault(function_dll, []).extend(address.split("+"))
    xref_working_dir = utils.mkdir_if_not_exists(os.path.join(fuzzing_dir, consts.XREF_WORKING_DIR))
    run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), xref_working_dir, config, XREF_GRANULARITY, binaries_to_diagnose, tracing_data)
    diagnoser.campaign_matrix.create_matrix_for_dir(xref_working_dir, os.path.join(fuzzing_dir,
                                                                                   consts.FUNCTION_DIAGNOSIS_RESULT),
                                                    os.path.join(fuzzing_dir, consts.XREF_MATRIX))
예제 #14
0
def train(args):
    X = []
    y = []
    train_dir = args.traindir

    detector = dlib.get_frontal_face_detector()
    sp = dlib.shape_predictor("models/shape_predictor_5_face_landmarks.dat")
    facerec = dlib.face_recognition_model_v1(
        "models/dlib_face_recognition_resnet_model_v1.dat")

    if len(utils.get_files_in_dir(train_dir, '.csv')) != 0:
        # train using csv files
        print('Training using .csv files.')

        preds_per_person = utils.load_faces_from_csv(train_dir)
        for p in preds_per_person:
            if p != 'unknown':
                for l in preds_per_person[p]:
                    X.append(l[2])
                    y.append(p)

        if len(X) == 0:
            print('No faces found in database {}'.format(train_dir))
            return

    elif len(os.listdir(train_dir)) != 0:
        # train using train folder
        # Loop through each person in the training set
        print('Training using faces in subfolders.')
        for class_dir in os.listdir(train_dir):
            if not os.path.isdir(os.path.join(train_dir, class_dir)):
                continue

            images = utils.get_images_in_dir(os.path.join(
                train_dir, class_dir))
            if len(images) == 0:
                continue

            print('adding {} to training data'.format(class_dir))
            # Loop through each training image for the current person
            for img_path in images:
                locations, descriptors = utils.detect_faces_in_image(
                    img_path, detector, facerec, sp, use_entire_image=True)

                # Add face descriptor for current image to the training set
                X.append(descriptors[0])
                y.append(class_dir)
            print('{} faces used for training'.format(len(images)))
    else:
        print('Training directory does not contain valid training data.')
        return

    # Determine how many neighbors to use for weighting in the KNN classifier
    n_neighbors = int(round(math.sqrt(len(X))))
    print("Chose n_neighbors automatically:", n_neighbors)

    # Create and train the KNN classifier
    print("Training model with KNN ...")
    knn_clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors,
                                             algorithm='ball_tree',
                                             weights='distance')
    knn_clf.fit(X, y)

    # Save the trained KNN classifier
    with open(os.path.join(args.outdir, 'knn.clf'), 'wb') as f:
        pickle.dump(knn_clf, f)

    # train the svm
    print("Training model with an SVM ...")
    recognizer = SVC(C=1.0, kernel="linear", probability=True)
    recognizer.fit(X, y)

    # Save the trained SVM
    with open(os.path.join(args.outdir, 'svm.clf'), 'wb') as f:
        pickle.dump(recognizer, f)

    print('Trained models with {} faces'.format(len(X)))
예제 #15
0
def run_and_diagnose(program, instances_dir, working_dir, diagnosis_result, diagnosis_matrix,
                     granularity, config, binaries_to_diagnose, tracing_data):
    run_debugger_on_files(program, utils.get_files_in_dir(instances_dir), working_dir, config, granularity, binaries_to_diagnose, tracing_data)
    diagnoser.campaign_matrix.create_matrix_for_dir(working_dir, diagnosis_result, diagnosis_matrix)
예제 #16
0
                # add page number
                text = "{}".format(pdf.page_no())
                pdf.set_xy(cover.left, cover.top)
                pdf.multi_cell(cover.width, cover.height, text, align="C")
        else:
            text = "{}".format(pdf.page_no())
            pdf.set_xy(pdf.w - 1, pdf.h - 1)
            pdf.cell(0, 0, text, align="C")

    return pdf


from PIL import Image
if __name__ == "__main__":
    files = list(
        get_files_in_dir("C:/Users/andris/Desktop/00066_stabilized_adjusted"))
    step = int(len(files) / 20)
    files = files[::step]
    n = len(files)

    print("create pdf...")
    pdf = PDF(orientation='P', unit='in', format='A4')
    pdf.line_width = 0

    print("create rectangles...")
    image = Image.open(files[0])
    image_rect = Rectangle(0, 0, image.height, image.width)
    page_rect = Rectangle(0, 0, pdf.h, pdf.w)
    print_rect = page_rect.shrink(0.5)
    rect = image_rect.fitIn(print_rect)
    rectangles = [persp(rect, k=400, distance=i * step) for i in range(n)]
예제 #17
0
import utils

parser = argparse.ArgumentParser(description = 'Collect randomply sampling photos as dataset used for CrowdFlower.')
parser.add_argument('-d', help = 'The image data path.')
parser.add_argument('-o', help = 'The output file.')
parser.add_argument('-n', help = 'The number of sampled photos.')
parser.add_argument('-m', help = 'The output mode.')
parser.add_argument('-u', help = 'The URL prefix.')
 

args = parser.parse_args()

def write_file(filename, files, prefix, mode = 'a'):

    f = open(filename, mode)
    if mode == 'a':
        f.write("\n" + prefix)
    if mode == 'w':
        f.write(prefix)
    files.tofile(f, sep = "\n" + prefix)
    f.close()

(files, fullpath_files) = utils.get_files_in_dir(args.d, True)
if args.n != None:
    files = files[0:min(len(files), int(args.n))]
if args.m == None:
    args.m = 'a'
write_file(args.o, files, args.u, args.m)

예제 #18
0
def load_training_batch(load_path):
    batch_size = 1  # batch_size always 1, feed one gif
    """
    Get           gif array: [batch_size, pic_in_gif, 240, 240, 3] 
    coreesponding action:    [batch_size, pic_in_gif, 2]
    """
    print('in load_training_batch')
    all_gif_names = get_files_in_dir(root_path=load_path, suffix='.gif')
    # print(all_gif_names)
    print('len(all_gif_names) = ' + str(len(all_gif_names)))

    for start in range(0, len(all_gif_names), batch_size):
        end = min(start + batch_size, len(all_gif_names))
        # print('Batch start(%3d) -> end(%3d)' %(start,end))
        batch_gif_names = all_gif_names[start:end]

        # get batch_gifs [batch_size, pic_in_gif, 240, 240, 3]
        gifs_dict = {}
        gifs_dict = {
            gif_name: imageio.mimread(gif_name)
            for gif_name in batch_gif_names
        }
        # gifs = [imageio.mimread(gif_name)  for gif_name in batch_gif_names]

        che10000ck_all_gif_shape = True
        gifs_rgb = []
        # for g in gifs:
        for name, g in gifs_dict.items():
            g_ary = np.array(g, dtype='f')

            check_all_gif_shape = True
            # print('g_ary.shape', g_ary.shape)
            if len(g_ary.shape) < 4:
                # print('Strange g_ary shape')
                print(
                    'Strange g_ary shape, name = {}, g_ary.shape = {}'.format(
                        name, g_ary.shape))
                check_all_gif_shape = False
                continue
            try:
                g_ary = g_ary[:, :, :, :3]
                g_ary[:, :, :, 0] -= 103.939
                g_ary[:, :, :, 1] -= 116.779
                g_ary[:, :, :, 2] -= 123.68
                # g_ary.astype(float32)
                gifs_rgb.append(g_ary)
            except Exception as e:
                # double check, maybe no need
                print('g_ary = g_ary[:3] Exception is ' + str(e))
                print('g_ary.shape = ' + str(g_ary.shape))
                # print(batch_gif_names)
                print('name = ' + name)
                check_all_gif_shape = False
        #abandon this batch
        if not check_all_gif_shape:
            print('Abandon this batch from start(%3d) -> end(%3d)' %
                  (start, end))
            continue

        batch_gifs_normal = np.array(gifs_rgb)
        # print('batch_gifs_normal.shape = %s ' % str(batch_gifs_normal.shape))

        # get batch_actions [batch_size, pic_in_gif, 2]
        batch_actions = [
            np.loadtxt(
                join(load_path,
                     '{}.csv'.format(gif_name.split('/')[-1].split('.')[0])))
            for gif_name in batch_gif_names
        ]
        batch_actions = np.array(batch_actions)
        # print('batch_actions.shape = %s ' %  str(batch_actions.shape))

        # [[1,2]]
        # batch_gifs_normal.shape = 1,20, 100,100,3
        if batch_size == 1:
            batch_gifs_normal = np.squeeze(batch_gifs_normal)
            batch_actions = np.squeeze(batch_actions)

        yield batch_gifs_normal, batch_actions
        data = {'dir_port': SERVER_PORT}
        requests.post(server_init_url,
                      data=json.dumps(data),
                      headers=cf.JSON_HEADER)
    except:
        print('Registry server not ready')
        sys.exit(1)

    #Get dir server port
    try:
        dir_port_url = format_registry_req('dir_server',
                                           cf.REGISTRY_SERVER_PORT)
        response = json.loads(requests.get(dir_port_url).content.decode())
        dir_server_port = response['dir_port']
        if str(dir_server_port) == str(-1):
            sys.exit()
    except:
        print('No directory port up yet')
        sys.exit()

    #Get files supported by file server
    file_names = get_files_in_dir(FILE_SERVER_PATH)
    data = {'file_names': file_names}

    #Send batch of files
    url = format_node_req(SERVER_PORT, dir_server_port)
    requests.post(url, data=json.dumps(data), headers=cf.JSON_HEADER)

    app.run(host='0.0.0.0', port=SERVER_PORT, debug=False)
    print("File server node running on port: ", SERVER_PORT)