예제 #1
0
 def _push(self, app_id, app_files_dir, assert_no_files=True):
     token, signature = make_development_handshake('push', 'testuser',
         app_id)
     url = 'http://localhost:8000/v1/push/%s/?handshake_token=%s' \
         '&handshake_signature=%s' % (app_id, token, signature)
     server_hashes = get_hashes(url)
     self.assertTrue(isinstance(server_hashes, dict))
     if assert_no_files:
         self.assertEqual(len(server_hashes), 0)
     # Push our test files
     post_archive(app_files_dir, url, server_hashes)
     # Check it matches
     server_hashes = get_hashes(url)
     self.assertTrue(isinstance(server_hashes, dict))
     self.assertEqual(len(server_hashes), count_files(app_files_dir))
예제 #2
0
def log_data_info(data_dir, logger):
    """
    Log number of samples in @arg data_dir for each type.
    Return the number of samples in data_dir.
    """
    type_names = [
        dI for dI in os.listdir(data_dir) if os.path.isdir(pjoin(data_dir, dI))
    ]

    total_sample_num = 0

    for cls_name in type_names:
        cls_dir = pjoin(data_dir, cls_name)
        cls_num = utils.count_files(cls_dir, ".jpg")

        logger.info("Samples num of class {}: {}.".format(cls_name, cls_num))

        total_sample_num += cls_num

    return total_sample_num
예제 #3
0
data = "wikiart/wikiart/"
data_train = r"C:\Users\tgill\OneDrive\Documents\GD_AI\ArtGAN\wikipaintings_full\wikipaintings_train"
data_test = r"C:\Users\tgill\OneDrive\Documents\GD_AI\ArtGAN\wikipaintings_full\wikipaintings_val"
target_size = (224, 224)



X, y, classes = load_samples(10)
X_test, y_test, classes_test = load_samples(10, data_test)

#m = resnet_trained(20)
m = simple_gram(20)

print(m.summary())

n_files_train = count_files(data_train)
n_files_test = count_files(data_test)

nepochs = 20
epoch_size = 2500
batch_size = 32
steps_per_epoch = (n_files_train//batch_size)
v_step = 50 #n_files_test//batch_size
distortions=0.1


train_paths, y_train, classes = getPaths(data_train)
test_paths, y_test, classes = getPaths(data_test)

prep_func = imagenet_preprocess_input
예제 #4
0
def main():
    # parsing command-line options
    global options, db_file, _connect
    sql_dir = os.path.join(os.path.dirname(sys.argv[0]), 'sql')
    option_list = [
        make_option("-o",
                    "--out-dir",
                    dest="out_dir",
                    metavar="DIR",
                    help="save updated fb2 files to this dir"),
        make_option("-g",
                    "--generate-db",
                    dest="update_db",
                    action="store_true",
                    default=False,
                    help="generate db"),
        make_option("-d",
                    "--do-not-delete",
                    dest="nodel",
                    action="store_true",
                    default=False,
                    help="don't delete duplicate files"),
        make_option("-f",
                    "--do-not-fix",
                    dest="nofix",
                    action="store_true",
                    default=False,
                    help="don't fix an xml"),
        make_option("-u",
                    "--do-not-update",
                    dest="noup",
                    action="store_true",
                    default=False,
                    help="don't update fb2 meta info"),
        make_option("-z",
                    "--do-not-zip",
                    dest="nozip",
                    action="store_true",
                    default=False,
                    help="don't zip result files"),
        make_option("-i",
                    "--search-id",
                    dest="search_id",
                    action="store_true",
                    default=False,
                    help="search bookid in fb2"),
        make_option("-a",
                    "--save-deleted",
                    dest="save_deleted",
                    metavar="DIR",
                    help="save deleted fb2 files to this dir"),
        make_option("-c",
                    "--search-deleted",
                    dest="search_deleted",
                    metavar="DIR",
                    help="search deleted fb2 files in this dir"),
        make_option("-b",
                    "--save-bad-fb2",
                    dest="save_bad",
                    metavar="DIR",
                    help="save bad fb2 files to this dir"),
        make_option("-s",
                    "--sql-dir",
                    dest="sql_dir",
                    default=sql_dir,
                    metavar="DIR",
                    help="search sql files in this dir"),
        make_option("-e",
                    "--output-encoding",
                    dest="output_encoding",
                    default='utf-8',
                    metavar="ENC",
                    help="fb2 output encoding"),
        make_option("-l",
                    "--log-file",
                    dest="log_file",
                    metavar="FILE",
                    help="output log to this file"),
        make_option("-n",
                    "--not-found-file",
                    dest="not_found",
                    metavar="FILE",
                    help="save missing books to this file"),
        make_option("-F",
                    "--filename-pattern",
                    dest="fn_format",
                    metavar="PATTERN",
                    help="output filenames pattern"),
    ]
    parser = OptionParser(option_list=option_list,
                          usage=("usage: %prog [options] "
                                 "input-files-or-dirs"),
                          version="%prog " + prog_version)
    options, args = parser.parse_args()
    LogOptions.level = 0
    db_file = os.path.join(options.sql_dir, 'db.sqlite')
    _connect = sqlite3.connect(db_file)
    if options.update_db:
        # update db
        print_log('start update db')
        ret = update_db()
        if ret:
            print_log('done')
        else:
            print_log('fail')
            return
        if len(args) == 0:
            return
    #
    if len(args) == 0:
        sys.exit('wrong num args')
    in_file = args[0]
    if not options.out_dir:
        sys.exit('option --out-dir required')
    for f in args:
        if not os.path.exists(f):
            sys.exit('file does not exists: ' + f)
    if not os.path.isdir(options.out_dir):
        sys.exit('dir does not exists: ' + options.out_dir)
    if options.save_bad and not os.path.isdir(options.save_bad):
        sys.exit('dir does not exists: ' + options.save_bad)
    if options.save_deleted and not os.path.isdir(options.save_deleted):
        sys.exit('dir does not exists: ' + options.save_deleted)
    if not os.path.exists(db_file):
        print_log('start update db')
        ret = update_db()
        if ret:
            print_log('done')
        else:
            print_log('fail')
            return
    #
    stats.total_files = count_files(args)
    print 'total files:', stats.total_files
    if options.log_file:
        LogOptions.outfile = open(options.log_file, 'w')
    stats.starttime = time.time()
    process(args)
    et = time.time() - stats.starttime
    print 'elapsed time: %.2f secs' % et
예제 #5
0
def main():
    # parsing command-line options
    global options, db_file, _connect
    sql_dir = os.path.join(os.path.dirname(sys.argv[0]), 'sql')
    option_list = [
        make_option("-o", "--out-dir", dest="out_dir",
                    metavar="DIR", help="save updated fb2 files to this dir"),
        make_option("-g", "--generate-db", dest="update_db",
                    action="store_true", default=False,
                    help="generate db"),
        make_option("-d", "--do-not-delete", dest="nodel",
                    action="store_true", default=False,
                    help="don't delete duplicate files"),
        make_option("-f", "--do-not-fix", dest="nofix",
                    action="store_true", default=False,
                    help="don't fix an xml"),
        make_option("-u", "--do-not-update", dest="noup",
                    action="store_true", default=False,
                    help="don't update fb2 meta info"),
        make_option("-z", "--do-not-zip", dest="nozip",
                    action="store_true",
                    default=False, help="don't zip result files"),
        make_option("-i", "--search-id", dest="search_id",
                    action="store_true",
                    default=False, help="search bookid in fb2"),
        make_option("-a", "--save-deleted", dest="save_deleted",
                    metavar="DIR", help="save deleted fb2 files to this dir"),
        make_option("-c", "--search-deleted", dest="search_deleted",
                    metavar="DIR", help="search deleted fb2 files in this dir"),
        make_option("-b", "--save-bad-fb2", dest="save_bad",
                    metavar="DIR", help="save bad fb2 files to this dir"),
        make_option("-s", "--sql-dir", dest="sql_dir",
                    default=sql_dir, metavar="DIR",
                    help="search sql files in this dir"),
        make_option("-e", "--output-encoding", dest="output_encoding",
                    default = 'utf-8', metavar="ENC",
                    help="fb2 output encoding"),
        make_option("-l", "--log-file", dest="log_file",
                    metavar="FILE",
                    help="output log to this file"),
        make_option("-n", "--not-found-file", dest="not_found",
                    metavar="FILE",
                    help="save missing books to this file"),
        make_option("-F", "--filename-pattern", dest="fn_format",
                    metavar="PATTERN",
                    help="output filenames pattern"),
        ]
    parser = OptionParser(option_list=option_list,
                          usage=("usage: %prog [options] "
                                 "input-files-or-dirs"),
                          version="%prog "+prog_version)
    options, args = parser.parse_args()
    LogOptions.level = 0
    db_file = os.path.join(options.sql_dir, 'db.sqlite')
    _connect = sqlite3.connect(db_file)
    if options.update_db:
        # update db
        print_log('start update db')
        ret = update_db()
        if ret:
            print_log('done')
        else:
            print_log('fail')
            return
        if len(args) == 0:
            return
    #
    if len(args) == 0:
        sys.exit('wrong num args')
    in_file = args[0]
    if not options.out_dir:
        sys.exit('option --out-dir required')
    for f in args:
        if not os.path.exists(f):
            sys.exit('file does not exists: '+f)
    if not os.path.isdir(options.out_dir):
        sys.exit('dir does not exists: '+options.out_dir)
    if options.save_bad and not os.path.isdir(options.save_bad):
        sys.exit('dir does not exists: '+options.save_bad)
    if options.save_deleted and not os.path.isdir(options.save_deleted):
        sys.exit('dir does not exists: '+options.save_deleted)
    if not os.path.exists(db_file):
        print_log('start update db')
        ret = update_db()
        if ret:
            print_log('done')
        else:
            print_log('fail')
            return
    #
    stats.total_files = count_files(args)
    print 'total files:', stats.total_files
    if options.log_file:
        LogOptions.outfile = open(options.log_file, 'w')
    stats.starttime = time.time()
    process(args)
    et = time.time() - stats.starttime
    print 'elapsed time: %.2f secs' % et
예제 #6
0
def build_mel_3seconds_groups_dataset():
    conn = MyConn()
    config = Config()
    tracks = [
        r[0] for r in conn.query(table="sub_tracks",
                                 targets=["track_id"],
                                 conditions={"is_valid": 1})
    ]
    save_dir_prefix = "/Volumes/nmusic/NetEase2020/data/mel_3seconds_groups"
    n_dir, dir_size = 1, 100
    flag, saved_files = count_files(save_dir_prefix,
                                    return_files=True)  # 已经提取好的歌曲
    saved_tracks = [x[:-4] for x in saved_files]

    q_tracks = Queue()
    for t in tracks:
        if t not in saved_tracks:
            q_tracks.put(t)
    print(q_tracks.qsize())
    lock = Lock()

    def task(thread_id, task_args):
        conn = MyConn()
        while not q_tracks.empty():
            try:
                tid = q_tracks.get()

                lock.acquire()
                dirpath = assign_dir(prefix=save_dir_prefix,
                                     flag=task_args["flag"],
                                     n_dir=n_dir,
                                     dir_size=dir_size)
                if not os.path.exists(dirpath):
                    os.makedirs(dirpath)
                filepath = os.path.join(dirpath, "{}.pkl".format(tid))
                task_args["flag"] += 1
                lock.release()

                # 从数据库中获取歌曲的 chorus_start, mp3_path
                mp3_path, chorus_start = conn.query(
                    table="sub_tracks",
                    targets=["mp3_path", "chorus_start"],
                    conditions={"track_id": tid},
                    fetchall=False)

                music_vec = get_mel_3seconds_groups(mp3_path,
                                                    config,
                                                    offset=chorus_start,
                                                    duration=18)

                with open(filepath, 'wb') as f:
                    pickle.dump(music_vec, f)
            except KeyboardInterrupt:
                print("KeyboardInterrupt. q_tracks size: {}".format(
                    q_tracks.qsize()))
                break
            except:
                print(tid)
                print(traceback.format_exc())
        sys.exit(0)

    task_args = {}
    task_args["flag"] = flag
    threads_group = ThreadsGroup(task=task, n_thread=5, task_args=task_args)
    threads_group.start()
예제 #7
0
def build_vggish_embed_dataset():
    conn = MyConn()
    sql = "SELECT track_id FROM sub_tracks WHERE is_valid=1 AND vggish_embed_path IS NULL"
    tracks = [r[0] for r in conn.query(sql=sql)]

    save_dir_prefix = "/Volumes/nmusic/NetEase2020/data/vggish_embed"
    n_dir, dir_size = 1, 100
    flag, saved_files = count_files(save_dir_prefix,
                                    return_files=True)  # 已经提取好的歌曲
    saved_tracks = [x[:-4] for x in saved_files]

    q_tracks = Queue()
    vggish = torch.hub.load("harritaylor/torchvggish",
                            "vggish",
                            pretrained=True)
    for t in tracks:
        if t not in saved_tracks:
            q_tracks.put(t)
    print(q_tracks.qsize())
    lock = Lock()

    def task(thread_id, task_args):
        conn = MyConn()
        while not q_tracks.empty():
            try:
                tid = q_tracks.get()

                lock.acquire()
                dirpath = assign_dir(prefix=save_dir_prefix,
                                     flag=task_args["flag"],
                                     n_dir=n_dir,
                                     dir_size=dir_size)
                if not os.path.exists(dirpath):
                    os.makedirs(dirpath)
                filepath = os.path.join(dirpath, "{}.pkl".format(tid))
                task_args["flag"] += 1
                lock.release()

                # 从数据库中获取歌曲的 chorus_start, mp3_path
                rawmusic_path = conn.query(table="sub_tracks",
                                           targets=["rawmusic_path"],
                                           conditions={"track_id": tid},
                                           fetchall=False)[0]

                with open(rawmusic_path, "rb") as f:
                    y = pickle.load(f)
                embed = vggish(y, fs=22050)

                with open(filepath, 'wb') as f:
                    pickle.dump(embed, f)
            except KeyboardInterrupt:
                print("KeyboardInterrupt. q_tracks size: {}".format(
                    q_tracks.qsize()))
                break
            except:
                print(tid)
                print(traceback.format_exc())
        sys.exit(0)

    task_args = {}
    task_args["flag"] = flag
    threads_group = ThreadsGroup(task=task, n_thread=5, task_args=task_args)
    threads_group.start()
예제 #8
0
def extract_chorus_mark_rawmusic():
    '''
    基于每首歌的chorus_start和chorus_end提取rawmusic。
    使用多线程。
    '''
    conn = MyConn()
    sql = "SELECT track_id FROM sub_tracks WHERE rawmusic_path IS NULL AND valid_bnum=0 AND chorus_start>0"
    tracks = [r[0] for r in conn.query(sql=sql)]

    save_dir_prefix = "/Volumes/nmusic/NetEase2020/data/chorus_mark_rawmusic"
    n_dir, dir_size = 1, 100
    flag, saved_files = count_files(save_dir_prefix,
                                    return_files=True)  # 已经提取好的歌曲
    saved_tracks = [x[:-4] for x in saved_files]

    q_tracks = Queue()
    for t in tracks:
        if t not in saved_tracks:
            q_tracks.put(t)
    print(q_tracks.qsize())
    lock = Lock()

    def task(thread_id, task_args):
        conn = MyConn()
        while not q_tracks.empty():
            try:
                tid = q_tracks.get()

                lock.acquire()
                dirpath = assign_dir(prefix=save_dir_prefix,
                                     flag=task_args["flag"],
                                     n_dir=n_dir,
                                     dir_size=dir_size)
                if not os.path.exists(dirpath):
                    os.makedirs(dirpath)
                filepath = os.path.join(dirpath, "{}.pkl".format(tid))
                task_args["flag"] += 1
                lock.release()

                # 从数据库中获取歌曲的 chorus_start, mp3_path
                chorus_start, mp3_path = conn.query(
                    table="sub_tracks",
                    targets=["chorus_start", "mp3_path"],
                    conditions={"track_id": tid},
                    fetchall=False)

                y, sr = librosa.load(mp3_path,
                                     offset=chorus_start,
                                     duration=20)  # 副歌识别设定为20s

                with open(filepath, 'wb') as f:
                    pickle.dump(y, f)
            except KeyboardInterrupt:
                print("KeyboardInterrupt. q_tracks size: {}".format(
                    q_tracks.qsize()))
                break
            except:
                print(tid)
                print(traceback.format_exc())
        sys.exit(0)

    task_args = {}
    task_args["flag"] = flag
    threads_group = ThreadsGroup(task=task, n_thread=10, task_args=task_args)
    threads_group.start()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--source',
                        type=str,
                        help='Directory containing receipt images to crop')
    parser.add_argument('--destination',
                        type=str,
                        help='Directory to save cropped receipt images')
    parser.add_argument(
        '--scale',
        type=float,
        default=0.5,
        help=
        'Factor by which to scale the images when processing for cropping. 0.5 generally yields best results'
    )
    parser.add_argument(
        '--size',
        type=int,
        help=
        'Height and width of final cropped image. If set, overrides individual height and width arguments'
    )
    parser.add_argument('--width',
                        type=int,
                        default=299,
                        help='Width in pixels of final cropped image')
    parser.add_argument('--height',
                        type=int,
                        default=299,
                        help='Height in pixels of final cropped image')
    parser.add_argument('--vclip',
                        type=float,
                        default=0.5,
                        help='Upper percentage of cropped image to utilize.')
    args = parser.parse_args()
    src = args.source

    dst = args.destination
    if not dst or dst == '':
        dst = src + '_cropped'

    clip = args.vclip
    scale = args.scale
    resize = (args.height, args.width)
    if args.size is not None:
        resize = (args.size, args.size)

    print 'Source Directory:      ', src
    print 'Destination Directory: ', dst
    print 'Scale Factor:          ', scale
    print 'Clip Percentage:       ', clip * 100.0
    print 'Output Image Size:      {}px x {}px'.format(resize[0], resize[1])
    print

    total_images = count_files(src)
    processed_images = 0
    times = []
    remaining_time = ''

    for filename, image in get_images(src):
        sys.stdout.write('Processed {} of {} images [{}]\r'.format(
            processed_images, total_images, remaining_time))
        sys.stdout.flush()

        t0 = time.time()
        cropped_image = crop_vertical(image,
                                      scale=scale,
                                      resize=resize,
                                      clip=clip)
        save_image(cropped_image, dst + '/' + filename)
        processed_images += 1
        t1 = time.time()
        tot_time = t1 - t0
        times.append(tot_time)

        remaining_time_secs = (total_images -
                               processed_images) / np.mean(times)
        remaining_time = '{} remaining'.format(
            datetime.timedelta(seconds=int(remaining_time_secs)))

    print 'Processed {} images in {} secs (approx {} secs per image)'.format(
        total_images, np.sum(times), np.mean(times))