示例#1
0
def generate_from_metadata(file, num_tracks):
    """Return track id's by looking up the name on music brainz

	Args:
		fname: The file containing the track in question.
	
	Yields:
		A set of track_id, by querying based on id3 tags
	"""
    album = file.getMDAlbumTitle()
    title = file.getMDTrackTitle()
    artist = file.getMDTrackArtist()
    if album is None or title is None or artist is None:
        return  # Can't get metadata

    util.update_progress("Searching albums by text lookup: " + ` album ` +
                         " " + ` artist `)
    for i in flatten(
            util.combinations(lookups.get_releases_by_cdtext, album, artist,
                              num_tracks)):
        release = lookups.get_release_by_releaseid(i.release.id)
        util.update_progress("Trying " + release.title + " by text lookup")
        for trackind in range(len(release.tracks)):
            rtrackname = release.tracks[trackind].title
            if type(title) != type([]):
                title = [title]
            for t in title:
                if util.comp_name(rtrackname, t):
                    print "Using album based text comparison for", artist.strip(
                    ), album.strip(), "'s track", trackind + 1, ` rtrackname `
                    yield lookups.get_track_by_id(release.tracks[trackind].id)
                else:
                    print "Failed text lookup for %s" % t
def test_worker(model, sess, subset='kpval'):
    # build data reader
    reader = TestReader(batch_size=32,
                        subset=subset,
                        use_fb_data=FLAGS.use_fb_data)

    quest_ids = []
    result = []

    tf.logging.info('\nRunning inference on split %s...' % subset)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        mc_scores = sess.run(model._logits,
                             feed_dict=model.fill_feed_dict(outputs[:-3]))
        choice_idx = np.argmax(mc_scores, axis=1)

        cands, _qids, image_ids = outputs[-3:]
        for qid, cid, mcs in zip(_qids, choice_idx, cands):
            answer = mcs['cands'][cid]
            assert (mcs['quest_id'] == qid)
            result.append({u'answer': answer, u'question_id': qid})

        quest_ids.append(_qids)

    return quest_ids, result
示例#3
0
def display():
    if 'access_token' not in session:
        abort(400)

    access_token = session['access_token']
    if 'job' in session:
        job = get_job_from_key(session['job'], conn)
        # Only rely on a previous result if the same user is logged in (same access_token)
        if job is not None and access_token == job.meta.get('access_token', None):
            return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used'])

    try:
        client = Dropbox(access_token)
    except Exception:
        abort(401)

    account = client.users_get_current_account()
    session['username'] = account.name.display_name

    space_usage = client.users_get_space_usage()
    allocated, used = get_space_usage_info(space_usage)
    total_bytes = used
    session['used'] = human_readable(used)
    session['quota'] = human_readable(allocated)

    job = q.enqueue(walk_entire_dropbox, access_token, total_bytes)
    job.meta['access_token'] = access_token
    job.save()
    update_progress(job, 0, "/")
    session['job'] = job.key

    return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used'])
    def fit(self):
        kmeans = MiniBatchKMeans(self.nr_centroids, init='k-means++')
        
        for it in range(self.nr_it):
            print "Iteration {0} out of {1}".format(it, self.nr_it)            
            
            batches = randbr.RandomBatchReader()
            maxIterations = batches.nbatches
            for i, batch in enumerate(batches):
                
                if self.rotational_invariant_training:  
                    # rotate the batch 90, 180 and 270 degrees
                    batch90 = self.rotate_patches_90_degrees(batch,1)
                    batch180 = self.rotate_patches_90_degrees(batch,2)
                    batch270 = self.rotate_patches_90_degrees(batch,3)
                    
                    batch = np.concatenate((batch, np.concatenate((batch90, np.concatenate((batch180,batch270))))))
                   
                    
                if self.mirror_invariant_training:
                    # mirror the batch left-right, and up-down
                    batchlr = self.mirror_patches(batch, "lr")
                    batchud = self.mirror_patches(batch, "ud")
                                       
                    batch = np.concatenate((batch, np.concatenate((batchlr,batchud))))
                   

                 # Training   
                util.update_progress((i+(it*maxIterations))/(maxIterations*self.nr_it))
                kmeans.partial_fit(batch)
                
        util.update_progress(1.0)
        print "fitting done"
        return kmeans.cluster_centers_
def extract_stats(filepaths, image_size, square_function):
    print "Calculating mean, std and var of all images"

    #Running total (sum) of all images
    count_so_far = 0
    mean = np.zeros((image_size, image_size))
    M2 = np.zeros((image_size, image_size))

    n = len(filepaths)

    for i, filepath in enumerate(filepaths):

        image = misc.imread(filepath, flatten=1)

        image = process(image, square_function, image_size)
        # Online statistics
        count_so_far = count_so_far + 1
        delta = image - mean
        mean = mean + delta / count_so_far
        M2 = M2 + delta * (image - mean)

        if i % 50 == 0:
            util.update_progress(i / n)

    util.update_progress(1.0)

    mean_image = mean
    variance_image = M2 / (n - 1)
    std_image = np.sqrt(variance_image)

    print "Plotting mean image (only shows afterwards)"
    util.plot(mean_image, invert=True)

    return mean_image, variance_image, std_image
def generate_from_metadata(file, num_tracks):
	"""Return track id's by looking up the name on music brainz

	Args:
		fname: The file containing the track in question.
	
	Yields:
		A set of track_id, by querying based on id3 tags
	"""
	album = file.getMDAlbumTitle()
	title = file.getMDTrackTitle()
	artist = file.getMDTrackArtist()
	if album is None or title is None or artist is None:
		return # Can't get metadata
	
	util.update_progress("Searching albums by text lookup: "+`album`+" "+`artist`)
	for i in flatten(util.combinations(lookups.get_releases_by_cdtext,album, artist, num_tracks)):
		release = lookups.get_release_by_releaseid(i.release.id)
		util.update_progress("Trying "+release.title+" by text lookup")
		for trackind in range(len(release.tracks)):
			rtrackname = release.tracks[trackind].title
			if type(title) != type([]):
				title=[title]
			for t in title:
				if util.comp_name(rtrackname,t):
					print "Using album based text comparison for",artist.strip(),album.strip(),"'s track",trackind+1,`rtrackname`
					yield lookups.get_track_by_id(release.tracks[trackind].id)
				else:
					print "Failed text lookup for %s" % t
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET,
                              feat_type=config.feat_type, version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
示例#8
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=TEST_SET,
                              feat_type=config.feat_type,
                              version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    # to_sentence = SentenceGenerator(trainset='trainval')

    results = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(prob,
                                 feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        ans_cand_ids = np.argsort(-generated_ans, axis=1)

        quest_ids = outputs[-2]

        for quest_id, ids in zip(quest_ids, ans_cand_ids):
            answers = []
            for k in range(_K):
                aid = ids[k]
                ans = to_sentence.index_to_top_answer(aid)
                answers.append(ans)
            res_i = {'question_id': int(quest_id), 'answers': answers}
            results.append(res_i)

    eval_recall(results)
示例#9
0
def test(checkpoint_path=None):
    batch_size = 64
    config = ModelConfig()
    config.sample_negative = FLAGS.sample_negative
    config.use_fb_bn = FLAGS.use_fb_bn
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = TestReader(batch_size=batch_size,
                        subset=TEST_SET,
                        use_fb_data=FLAGS.use_fb_data)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    quest_ids = []
    result = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        mc_scores = sess.run(model._logits,
                             feed_dict=model.fill_feed_dict(outputs[:-3]))
        choice_idx = np.argmax(mc_scores, axis=1)

        cands, _qids, image_ids = outputs[-3:]
        for qid, cid, mcs in zip(_qids, choice_idx, cands):
            answer = mcs['cands'][cid]
            assert (mcs['quest_id'] == qid)
            result.append({u'answer': answer, u'question_id': qid})

        quest_ids.append(_qids)

    quest_ids = np.concatenate(quest_ids)

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
示例#10
0
def test(checkpoint_path=None):
    batch_size = 4
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=TEST_SET,
                              feat_type=config.feat_type,
                              version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(
        trainset='trainval',
        top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt')

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(prob,
                                 feed_dict=model.fill_feed_dict(outputs[:-2]))
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    gt = reader._answer
    n1, n2 = (gt == ans_ids).sum(), gt.size
    acc = n1 / float(n2)
    print('\nAcc: %0.2f, %d/%d' % (acc * 100., n1, n2))
    return acc
def test(checkpoint_path=None):
    batch_size = 128

    # build data reader
    reader = Reader(batch_size=batch_size, subset=TEST_SET, phase='test', version='v1')

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % ('v1',
                                                                     'Fusion'))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = RerankModel(phase='test', version='v1', num_cands=5)
    model.build()

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))

    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file='../iccv_vaq/data/vqa_trainval_top2000_answers.txt')

    ans_ids = []
    quest_ids = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.pop_batch()
        model_preds = sess.run(model.preds, feed_dict=model.fill_feed_dict(outputs))
        local_index = model_preds.argmax(axis=1)
        # local_index = outputs[-3].argmax(axis=1)  # ivqa
        # local_index = outputs[-4].argmax(axis=1) # vqa
        top_ans = np.array([cand[idx] for idx, cand in zip(local_index, outputs[3])])

        ans_ids.append(top_ans)
        quest_id = outputs[-1]
        quest_ids.append(quest_id)

    ans_ids = np.concatenate(ans_ids)
    quest_ids = np.concatenate(quest_ids)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % ('v1', TEST_SET)
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    # ana_ctx.close()
    return res_file, quest_ids
示例#12
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=TEST_SET,
                              feat_type=config.feat_type,
                              version=FLAGS.version)
    if checkpoint_path is None:
        print(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type))
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.qrd_prob

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    sess = tf.Session(graph=tf.get_default_graph(),
                      config=tf.ConfigProto(gpu_options=gpu_options))
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    gts = []
    preds = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        scores = sess.run(prob, feed_dict=model.fill_feed_dict(outputs))
        preds.append(scores.flatten())
        gts.append(outputs[-1])

    gts = np.concatenate(gts)
    preds = np.concatenate(preds)
    from scipy.io import savemat
    from sklearn.metrics import average_precision_score
    sv_file_name = os.path.basename(checkpoint_path)
    savemat('result/predictions_%s.mat' % sv_file_name, {
        'gt': gts,
        'preds': preds
    })
    ap = average_precision_score(1.0 - gts, 1.0 - preds)

    return float(ap)
示例#13
0
                def backoff_func(*args, **kwargs):
                        global lastwsquery
                        try:
                                return func(*args,**kwargs)
                        except ws.WebServiceError,e:
                                if (e.msg.find("503") != -1):
                                        util.update_progress("Caught " +webservice+ " 503, waiting 20s and trying again...")
                                else:
					# A bare raise will reraise the current exception
                                        raise
def test(checkpoint_path=None):
    batch_size = 1
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(
            FLAGS.checkpoint_dir % (FLAGS.model_type, config.feat_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()
    g_prob = model.prob
    g_att_map = model.attention_map
    # sess = tf.Session()
    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    visualiser = PredictionVisualiser(FLAGS.model_type, do_plot=True)

    ans_ids = []
    quest_ids = []
    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 100 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        if i < 100:
            continue
        generated_ans, att_map = sess.run([g_prob, g_att_map],
                                          feed_dict=model.fill_feed_dict(
                                              outputs[:-2]))
        # process attention map
        att_map = att_map.reshape([batch_size, 14, 14, -1])
        att_map = np.transpose(att_map, [0, 3, 1, 2])
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)
        gt_ans = outputs[3]

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

        if np.random.rand() > 0.05:
            visualiser.plot(quest_id, generated_ans, att_map)
示例#15
0
def train(iteration_rounds=10, iterations=25000, filename='training'):
    print 'starting training'
    alpha_list = []
    beta_list = []

    for q in range(1, iteration_rounds + 1):
        alphas = np.ones(len(possible_pages))
        betas = np.ones(len(possible_pages))
        print 'Starting round ' + str(q)

        for i in range(0, iterations):
            randrunid = random.randint(0, 10000)
            randi = random.randint(0, 10000)
            page_index = beta_util.draw_from_beta_distributions(
                alphas=alphas, betas=betas,
                possible_pages=possible_pages)  # welke arm wint

            bla = 0
            while bla < 50:
                try:
                    response = responder.respond_with_page(
                        i=randi,
                        runid=randrunid,
                        page=possible_pages[page_index],
                        teampw=teampw)
                except Exception:

                    bla = bla + 1
                    continue
                break

            success = response['effect']['Success']  # 1 of 0
            alphas, betas = beta_util.update_alphas_betas(
                index=page_index,
                success=success,
                price=possible_pages[page_index]['price'],
                alphas=alphas,
                betas=betas)
            util.update_progress(i / iterations)

        alpha_list.append(alphas)
        beta_list.append(betas)
        util.update_progress(1)
        print 'Round ' + str(q) + ' of ' + str(iteration_rounds) + ' finished.'

    alpha_list = np.array(alpha_list)
    beta_list = np.array(beta_list)

    final_alphas = alpha_list.mean(axis=0)
    final_betas = beta_list.mean(axis=0)
    beta_util.save_ab_to_filename(final_alphas, final_betas, name=filename)
    print 'training complete'
示例#16
0
def add_new_track(release, possible_releases, fileid, track, trackinfo,
                  impossible_releases):
    releaseid = release.id
    found_tracknumber = lookups.track_number(release.tracks, track)
    if releaseid in possible_releases:
        assert found_tracknumber not in possible_releases[releaseid]
        assert fileid not in possible_releases[releaseid].values(), (
            fileid, possible_releases[releaseid])
        possible_releases[releaseid][found_tracknumber] = fileid
        print "Found track", found_tracknumber, "(", release.tracks[
            found_tracknumber -
            1].title, ")", "of", release.title, ":", os.path.basename(
                fileid), "(tracks found: %s)\x1b[K" % (util.output_list(
                    possible_releases[releaseid].keys()))
        return
    else:
        possible_releases[releaseid] = {found_tracknumber: fileid}
        util.update_progress(
            "Considering new %s - %s (found track %d)" %
            (release.artist.name, release.title, found_tracknumber))

    # Right, lets see if we can find some other tracks quick smart
    for trackind in range(len(release.tracks)):
        # Don't waste time on things we've already found
        if (trackind + 1) in possible_releases[releaseid]:
            continue
        track = lookups.get_track_by_id(release.tracks[trackind].id)
        for fileid in trackinfo:
            if fileid in possible_releases[releaseid].values():
                continue
            if trackinfo[fileid].getPUID() in track.puids:
                # yay, found one.
                if verify_track(release, possible_releases,
                                impossible_releases, trackinfo, fileid, track):
                    possible_releases[releaseid][trackind + 1] = fileid
                    util.update_progress(
                        " Also found track %02d: %s" %
                        (trackind + 1, release.tracks[trackind].title))
                    break
    print " Found tracks: %s" % (util.output_list(
        possible_releases[releaseid].keys())),
    if util.list_difference(range(1,
                                  len(release.tracks) + 1),
                            possible_releases[releaseid].keys()):
        print " Missing tracks: %s" % (util.output_list(
            util.list_difference(range(1,
                                       len(release.tracks) + 1),
                                 possible_releases[releaseid].keys())))
    else:
        print
示例#17
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = Reader(batch_size=batch_size,
                    subset=TEST_SET,
                    feat_type=config.feat_type,
                    version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    quest_ids = []
    ans_preds = []
    gt_labels = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        _gt_labels = outputs[1]
        gt_labels.append(_gt_labels)
        ans_preds.append(generated_ans)

        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    ans_preds = np.concatenate(ans_preds)
    gt_labels = np.concatenate(gt_labels)
    return evaluate_result(ans_preds, gt_labels)
示例#18
0
def vaq_condition(checkpoint_path=None):
    subset = 'dev'
    model_config = ModelConfig()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = Reader(batch_size=1, subset=subset, output_attr=True, output_im=False,
                    output_qa=True, output_capt=False)

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'condition')
        model.build()
        saver = tf.train.Saver()

        sess = tf.Session()
        tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
        saver.restore(sess, checkpoint_path)

    fetch_op = model.losses
    num_batches = reader.num_batches

    save_file = 'data/%s_vaq_cond_score1000-2000_%s.hdf5' % ((FLAGS.model_type).lower(), subset)
    print('Save File: %s' % save_file)
    print('Running conditioning...')
    nlls, quest_ids = [], []
    for i in range(num_batches):
        update_progress(i / float(num_batches))

        outputs = reader.get_test_batch()
        im_feed, quest, _, ans_feed, quest_id, image_id = outputs

        losses = sess.run(fetch_op, feed_dict=model.fill_feed_dict(outputs[:-2]))
        scores = losses[:, :-1].mean(axis=1)
        scores = scores[np.newaxis, ::]
        nlls.append(scores)
        quest_ids.append(quest_id)

    nlls = np.concatenate(nlls, axis=0)
    quest_ids = np.concatenate(quest_ids, axis=0)
    print('\nSaving result files: %s...' % save_file)
    save_hdf5(save_file, {'nll': nlls, 'quest_ids': quest_ids})
示例#19
0
def _decode(fromname, towavname):
        if fromname.lower().endswith(".mp3"):
		args = ["mpg123","--quiet","--wav",towavname,fromname]
        elif fromname.lower().endswith(".flac"):
		args = ["flac","-d", "--totally-silent", "-f", "-o", towavname,fromname]
	elif fromname.lower().endswith(".ogg"):
		args = ["oggdec","--quiet","-o",towavname,fromname]
	else:
		raise DecodeFailed(fromname, "Don't know how to decode filename")
	
	try:
		util.update_progress("Decoding file")
		ret = subprocess.call(args)
	except OSError,e:
		raise DecodeFailed(fromname, "Cannot find decoder %s" % args[0])
示例#20
0
def test_model(ab_path='../data/alpha_beta/training.npz'):
    print 'starting training'
    alphas, betas = beta_util.load_ab(filename=ab_path)
    for q in range(10001, 10101):

        print 'Starting runId ' + str(q)
        runid = q
        revenue = 0

        for i in range(0, 10001):

            page_index = beta_util.draw_from_beta_distributions(
                alphas=alphas, betas=betas,
                possible_pages=possible_pages)  # welke arm wint

            bla = 0
            while bla < 50:
                try:
                    response = responder.respond_with_page(
                        i=i,
                        runid=runid,
                        page=possible_pages[page_index],
                        teampw=teampw)
                except Exception:

                    bla = bla + 1
                    continue
                break

            success = response['effect']['Success']  # 1 of 0
            revenue = revenue + success * possible_pages[page_index]['price']

            alphas, betas = beta_util.update_alphas_betas(
                index=page_index,
                success=success,
                price=possible_pages[page_index]['price'],
                alphas=alphas,
                betas=betas)
            util.update_progress(i / 10001)

        util.save_profit(revenue)
        beta_util.save_ab(alphas, betas)

        util.update_progress(1)
        print 'RunId ' + str(q) + ' of ' + str(10100) + ' finished.'

    beta_util.save_ab_to_filename(alphas, betas, name='testing')
    print 'training complete'
示例#21
0
        def ws_backoff(func):
                def backoff_func(*args, **kwargs):
                        global lastwsquery
                        try:
                                return func(*args,**kwargs)
                        except ws.WebServiceError,e:
                                if (e.msg.find("503") != -1):
                                        util.update_progress("Caught " +webservice+ " 503, waiting 20s and trying again...")
                                else:
					# A bare raise will reraise the current exception
                                        raise
                        except ws.ConnectionError,e:
                                if (e.msg.find("urlopen error timed out") != -1):
                                        util.update_progress("Caught " +webservice+ " urlopen timeout. Retrying...")
                                else:
                                        raise
示例#22
0
def add_new_track(release, possible_releases, fileid, track, trackinfo, impossible_releases):
	releaseid = release.id
	found_tracknumber=lookups.track_number(release.tracks, track)
	if releaseid in possible_releases:
		assert found_tracknumber not in possible_releases[releaseid]
		assert fileid not in possible_releases[releaseid].values(),(fileid,possible_releases[releaseid])
		possible_releases[releaseid][found_tracknumber]=fileid
		print "Found track",found_tracknumber,"(",release.tracks[found_tracknumber-1].title,")","of",release.title,":",os.path.basename(fileid),"(tracks found: %s)\x1b[K" % (util.output_list(possible_releases[releaseid].keys()))
		return
	else:
		possible_releases[releaseid]={found_tracknumber:fileid}
		util.update_progress("Considering new %s - %s (found track %d)" % (
			release.artist.name,
			release.title,
			found_tracknumber))

	# Right, lets see if we can find some other tracks quick smart
	for trackind in range(len(release.tracks)):
		# Don't waste time on things we've already found
		if (trackind+1) in possible_releases[releaseid]:
			continue
		track = lookups.get_track_by_id(release.tracks[trackind].id)
		for fileid in trackinfo:
			if fileid in possible_releases[releaseid].values():
				continue
			if trackinfo[fileid].getPUID() in track.puids:
				# yay, found one.
				if verify_track(release,
						possible_releases,
						impossible_releases,
						trackinfo,
						fileid,
						track):
					possible_releases[releaseid][trackind+1]=fileid
					util.update_progress(" Also found track %02d: %s" % (trackind+1,release.tracks[trackind].title))
					break
	print " Found tracks: %s" % (
		util.output_list(possible_releases[releaseid].keys())),
	if util.list_difference(range(1,len(release.tracks)+1),
			possible_releases[releaseid].keys()):
		print " Missing tracks: %s"% (
			util.output_list(
				util.list_difference(range(1,len(release.tracks)+1),
				possible_releases[releaseid].keys())))
	else:
		print
    def pipeline(self,
                 centroids,
                 data_file="../data/preprocessed.h5",
                 file_path="../data/activations/",
                 batch_size=-1,
                 n_pool_regions=4):
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        if batch_size == -1:
            meta = util.load_metadata()
            batch_size = meta['patches_per_image']

        batches = batchreader.BatchReader(batchsize=batch_size,
                                          filepath=data_file)  #

        dimensions = (batches.nbatches, len(centroids) * n_pool_regions
                      )  # Set dimensions to #imagesx4*#centroids
        activations = np.zeros(dimensions)

        for i, batch in enumerate(batches):
            activation = self.distance_to_centroids(
                batch, centroids
            )  # Calculate activations for each patch to each centroid

            pooled = pool(activation, n_pool_regions=n_pool_regions
                          )  # Returns a vector with length 4x#centroids
            activations[i] = pooled
            util.update_progress(i / batches.nbatches)

        util.update_progress(1)

        print "Normalizing activations..."
        activations = self.normalize(activations)
        print "Normalizing done"
        print "Writing activations to file:"
        f = h5py.File(file_path + str(len(centroids)) + "activationkmeans.h5",
                      "w")
        dataSet = f.create_dataset("activations", dimensions, dtype=np.float64)
        dataSet[...] = activations
        f.close()
        print "Writing done"

        return activations
示例#24
0
def trainLinearChainCRF(dataset, featureFunction, iters=10, dev_set=[]):
    """
    Given |dataset|, do stochastic gradient descent to obtain a parameter vector.
    @param dataset list (list string, list string) - A collection of labeled sequences. 
    """
    stepSize = 0.9

    # Get all viable tags
    TAGS = list(set(it.chain.from_iterable(ys for _, ys in dataset)))

    # Initialize with a simple CRF with 0 parameters.
    crf = LinearChainCRF(TAGS, featureFunction)
    timer = util.Timer()

    for i in xrange(iters):
        gradientCheck(crf, dataset[0][0], dataset[0][1])
        # Print status
        lhood = sum(computeLogProbability(crf, xs, ys)
                    for xs, ys in dataset) / len(dataset)
        print "Training set confusion matrix:"
        f1 = reportF1(dataset, crf)
        print "Development set confusion matrix:"
        dev_f1 = reportF1(dev_set, crf) if dev_set else 0.
        print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % (
            i, lhood, f1, dev_f1)

        timer.start()
        for (j, (xs, ys)) in enumerate(dataset):
            gradient = computeGradient(crf, xs, ys)
            for key, value in gradient.iteritems():
                crf.parameters[key] += stepSize * value
            util.update_progress(float(j) / len(dataset))
        util.update_progress(1.0)
        stepSize *= (1. + i) / (2. + i)
        print 'Iter %d took %0.2f seconds' % (i, timer.ticks())

    lhood = sum(computeLogProbability(crf, xs, ys)
                for xs, ys in dataset) / len(dataset)
    f1 = reportF1(dataset, crf)
    dev_f1 = reportF1(dev_set, crf) if dev_set else 0.
    print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % (
        i, lhood, f1, dev_f1)

    return crf
示例#25
0
def _ensure_16bit_wave(filename):
	""" Check the 'width' of a given WAVE file and ensure it is 16-bit."""
	wav = wave.open(filename, 'rb')
	width = wav.getsampwidth()
	wav.close()
	if width != 2:
		newfile = filename + ".16bit.wav"
		try:
			util.update_progress("Forcing WAV file to 16 bit pcm")
			args = ["sndfile-convert", "-pcm16", filename, newfile]
			ret = subprocess.call(args)
			if ret != 0:
				raise DecodeFailed(filename, "Subprocess returned %d" % ret)
			if os.path.exists(filename):
				os.unlink(filename)
				os.rename(newfile, filename)
		except OSError,e:
			raise Exception("Only 16-bit sample widths are supported unless libsndfile is installed")
		finally:
示例#26
0
def _decode(fromname, towavname):
    if fromname.lower().endswith(".mp3"):
        args = ["mpg123", "--quiet", "--wav", towavname, fromname]
    elif fromname.lower().endswith(".flac"):
        args = [
            "flac", "-d", "--totally-silent", "-f", "-o", towavname, fromname
        ]
    elif fromname.lower().endswith(".ogg"):
        args = ["oggdec", "--quiet", "-o", towavname, fromname]
    else:
        raise DecodeFailed(fromname, "Don't know how to decode filename")

    try:
        util.update_progress("Decoding file")
        ret = subprocess.call(args)
        if ret != 0:
            raise DecodeFailed(fromname, "Subprocess returned %d" % ret)
    except OSError, e:
        raise DecodeFailed(fromname, "Cannot find decoder %s" % args[0])
示例#27
0
def train(iteration_rounds = 10, iterations = 25000, filename = 'training'):
    print 'starting training'
    alpha_list = []
    beta_list = []
    
    for q in range(1,iteration_rounds+1):
        alphas = np.ones(len(possible_pages))
        betas = np.ones(len(possible_pages))
        print 'Starting round ' + str(q)
        
        
        for i in range(0,iterations):
            randrunid = random.randint(0,10000)
            randi = random.randint(0,10000)
            page_index = beta_util.draw_from_beta_distributions(alphas=alphas, betas=betas, possible_pages = possible_pages) # welke arm wint
            
            bla = 0
            while bla<50:
                 try:
                     response = responder.respond_with_page(i=randi, runid=randrunid, page = possible_pages[page_index], teampw = teampw)
                 except Exception:
                     
                     bla = bla+1
                     continue
                 break
             
            success = response['effect']['Success'] # 1 of 0
            alphas, betas = beta_util.update_alphas_betas(index=page_index, success = success, price = possible_pages[page_index]['price'], alphas = alphas, betas = betas)
            util.update_progress(i/iterations)
            
        alpha_list.append(alphas)
        beta_list.append(betas)
        util.update_progress(1)
        print 'Round ' + str(q) + ' of ' + str(iteration_rounds) + ' finished.'
            
    alpha_list = np.array(alpha_list)
    beta_list = np.array(beta_list)
    
    final_alphas = alpha_list.mean(axis=0)
    final_betas = beta_list.mean(axis=0)
    beta_util.save_ab_to_filename(final_alphas, final_betas, name=filename)
    print 'training complete' 
示例#28
0
def hist_eq(image_dir='test_hist/',
            target_dir='test_result_hist/',
            method='CLAHE'):

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    tasks = glob.glob(image_dir + '*.jpeg')
    job_total = len(tasks)

    print 'Processing images matching ' + image_dir + '*.jpeg'

    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count() * 2

    for im_name in tasks:
        jobs.put(im_name)

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Thread(target=worker, args=(i, jobs, result, target_dir, method))
        p.daemon = True
        p.start()

    print 'Starting workers (', NUMBER_OF_PROCESSES, ')!'

    n_complete = 0
    for t in xrange(len(tasks)):
        r = result.get()
        n_complete += 1
        util.update_progress(n_complete / job_total)
        result.task_done()
        #print t, 'done'

    for w in xrange(NUMBER_OF_PROCESSES):
        jobs.put(None)

    print 'Done!'
    result.join()
    jobs.close()
    result.close()
def test(checkpoint_path=None):
    batch_size = 128
    config = ModelConfig()
    # Get model function
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = TestReader(batch_size=batch_size, subset=TEST_SET)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(
            FLAGS.checkpoint_dir %
            (FLAGS.version, FLAGS.model_type, FLAGS.delta))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    model.build()

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    print('Running inference on split %s...' % TEST_SET)
    aug_quest_ids, scores = [], []
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        rank_score = sess.run(model.prob,
                              feed_dict=model.fill_feed_dict(outputs[:3]))

        _, quest_ids, image_ids = outputs[3:]
        scores.append(rank_score)
        aug_quest_ids.append(quest_ids)

    aug_quest_ids = np.concatenate(aug_quest_ids)
    scores = np.concatenate(scores)
    return convert_to_questions(aug_quest_ids, scores)
示例#30
0
		def delay(*args,**kwargs):
			global lastwsquery
			if webservice not in lastwsquery:
				lastwsquery[webservice]=startup

			lastwsquery[webservice] = max(
				lastwsquery[webservice],
				time.time() - webservices[webservice]["freequeries"] * MINDELAY)
				
			wait=0
			if time.time()-lastwsquery[webservice]<MINDELAY:
				wait=MINDELAY-(time.time()-lastwsquery[webservice])
				if PROFILE==1:
					util.update_progress("Waiting %.2fs for %s" % (wait,func.__name__))
				time.sleep(wait)
			t=time.time()
			ret=func(*args,**kwargs)
			if PROFILE>=2:
				util.update_progress("%s took %.2fs (after a %.2fs wait)" % (func.__name__,time.time()-t,wait))
			lastwsquery[webservice]+=MINDELAY
			return ret
示例#31
0
def _ensure_16bit_wave(filename):
    """ Check the 'width' of a given WAVE file and ensure it is 16-bit."""
    wav = wave.open(filename, 'rb')
    width = wav.getsampwidth()
    wav.close()
    if width != 2:
        newfile = filename + ".16bit.wav"
        try:
            util.update_progress("Forcing WAV file to 16 bit pcm")
            args = ["sndfile-convert", "-pcm16", filename, newfile]
            ret = subprocess.call(args)
            if ret != 0:
                raise DecodeFailed(filename, "Subprocess returned %d" % ret)
            if os.path.exists(filename):
                os.unlink(filename)
                os.rename(newfile, filename)
        except OSError, e:
            raise Exception(
                "Only 16-bit sample widths are supported unless libsndfile is installed"
            )
        finally:
示例#32
0
def hist_eq(image_dir = 'test_hist/', target_dir = 'test_result_hist/', method = 'CLAHE'):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    #pic_list = os.listdir(image_dir)
    pic_list = glob.glob(image_dir+'/*.jpeg')
    list_length = len(pic_list)
    
    util.update_progress(0)
    for j, image_path in enumerate(pic_list):
        
        img = cv2.imread(image_path,1)
        # Use file name only, without .jpeg
        image_name = image_path.split('/')[-1][:-5] 
        
        b,g,r = cv2.split(img)        
        
        if method == 'HE':
            cv2.equalizeHist(b,b)
            cv2.equalizeHist(g,g)
            cv2.equalizeHist(r,r)
        else:
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            clahe.apply(g,g)
            if not method =='CLAHE_G':
                clahe.apply(b,b)
                clahe.apply(r,r)
            
        recombined = cv2.merge((b,g,r))
        cv2.imwrite(target_dir + image_name + method +'.jpeg', recombined)
        util.update_progress(j/list_length)
        
    util.update_progress(1)
def hist_eq(image_dir = 'test_hist/', target_dir = 'test_result_hist/', method = 'CLAHE'):

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)


    tasks = glob.glob(image_dir+'*.jpeg')
    job_total = len(tasks)

    print 'Processing images matching ' + image_dir+ '*.jpeg'

    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()*2

    for im_name in tasks:
        jobs.put(im_name)

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Thread(target=worker, args=(i, jobs, result, target_dir, method))
        p.daemon = True
        p.start()

    print 'Starting workers (', NUMBER_OF_PROCESSES, ')!'

    n_complete = 0
    for t in xrange(len(tasks)):
        r = result.get()
        n_complete += 1
        util.update_progress(n_complete/job_total)
        result.task_done()
        #print t, 'done'

    for w in xrange(NUMBER_OF_PROCESSES):
        jobs.put(None)

    print 'Done!'
    result.join()
    jobs.close()
    result.close()
示例#34
0
def trainLinearChainCRF(dataset, featureFunction, iters = 10, dev_set = []):
    """
    Given |dataset|, do stochastic gradient descent to obtain a parameter vector.
    @param dataset list (list string, list string) - A collection of labeled sequences. 
    """
    stepSize = 0.9

    # Get all viable tags
    TAGS = list(set( it.chain.from_iterable( ys for _, ys in dataset ) ))

    # Initialize with a simple CRF with 0 parameters.
    crf = LinearChainCRF(TAGS, featureFunction)
    timer = util.Timer()

    for i in xrange(iters):
        gradientCheck(crf, dataset[0][0], dataset[0][1])
        # Print status
        lhood = sum( computeLogProbability(crf, xs, ys) for xs, ys in dataset ) / len(dataset)
        print "Training set confusion matrix:"
        f1  = reportF1( dataset, crf ) 
        print "Development set confusion matrix:"
        dev_f1  = reportF1( dev_set, crf ) if dev_set else 0.
        print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % (i, lhood, f1, dev_f1)

        timer.start()
        for (j, (xs, ys)) in enumerate(dataset):
            gradient = computeGradient(crf, xs, ys)
            for key, value in gradient.iteritems():
                crf.parameters[key] += stepSize * value
            util.update_progress( float(j) / len(dataset) )
        util.update_progress( 1.0 )
        stepSize *= (1. + i) / (2. + i)
        print 'Iter %d took %0.2f seconds' % (i, timer.ticks())

    lhood = sum( computeLogProbability(crf, xs, ys) for xs, ys in dataset ) / len(dataset)
    f1  = reportF1( dataset, crf ) 
    dev_f1  = reportF1( dev_set, crf ) if dev_set else 0.
    print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % (i, lhood, f1, dev_f1)

    return crf
	def test_run(self, startid, endid, starti, endi):
		
		n_iter  = 0 
		max_iter = (endid-startid)*(endi - starti)
		util.update_progress(0)
		start_time = time.time()
		for runid in np.arange(startid, endid+1, 1):
			for i in np.arange(starti, endi+1, 1):
				context = self.contextGetter.call(i, runid)
				page, pageindex = self.give_page(context)
				response = responder.respond_with_page(i=i, runid=runid, page = page, teampw = self.teampw)				
				success = response['effect']['Success']
				self.revenue += success*page['price']
				
				self.update_alpha_betas(success, pageindex, context)
				n_iter += 1
				util.update_progress(n_iter/max_iter)
				
				if n_iter%10 == 0:
					util.save_profit(self.revenue)
		
		util.update_progress(1)
		time_elapsed = time.time()-start_time
		av_time = float(time_elapsed/max_iter)
		print "Total time: " + str(time_elapsed) + " av_time: " + str(av_time)
示例#36
0
def display():
    if 'access_token' not in session:
        abort(400)

    access_token = session['access_token']
    if 'job' in session:
        job = get_job_from_key(session['job'], conn)
        # Only rely on a previous result if the same user is logged in (same access_token)
        if job is not None and access_token == job.meta.get(
                'access_token', None):
            return render_template('display.html',
                                   username=session['username'],
                                   quota=session['quota'],
                                   used=session['used'])

    try:
        client = Dropbox(access_token)
    except Exception:
        abort(401)

    account = client.users_get_current_account()
    session['username'] = account.name.display_name

    space_usage = client.users_get_space_usage()
    allocated, used = get_space_usage_info(space_usage)
    total_bytes = used
    session['used'] = human_readable(used)
    session['quota'] = human_readable(allocated)

    job = q.enqueue(walk_entire_dropbox, access_token, total_bytes)
    job.meta['access_token'] = access_token
    job.save()
    update_progress(job, 0, "/")
    session['job'] = job.key

    return render_template('display.html',
                           username=session['username'],
                           quota=session['quota'],
                           used=session['used'])
示例#37
0
def run(iterations = 350000, filename = '../data/alpha_beta/1.npz'):
	
	
	
	if not os.path.isfile(filename):
         
		alphas = np.ones(len(possible_pages))
		betas = np.ones(len(possible_pages))
		revenue = 0
	else:
		alphas, betas = beta_util.load_ab()
		revenue = util.load_profit()
	
	for i in range(0, iterations):
         randi = random.randint(0,9000)
         randrunid = random.randint(0,10000)
        		
         page_index = beta_util.draw_from_beta_distributions(alphas=alphas, betas=betas, possible_pages = possible_pages) # welke arm wint
         bla = 0
         while bla<50:
              try:
                  response = responder.respond_with_page(i=randi, runid=randrunid, page = possible_pages[page_index], teampw = teampw)
              except Exception:
                  
                  bla = bla+1
                  continue
              break
         success = response['effect']['Success'] # 1 of 0
        		
         alphas, betas = beta_util.update_alphas_betas(index=page_index, success = success, price = possible_pages[page_index]['price'], alphas = alphas, betas = betas)
         revenue = revenue + success*possible_pages[page_index]['price']
        		
         if i%100 == 0:
             beta_util.save_ab(alphas, betas)
             util.save_profit(revenue)
         util.update_progress(i/iterations)
		
	util.update_progress(1)
示例#38
0
	def run(self, iterations = 400000):
		
		
		for i in range(0, iterations):
			
			randi = random.randint(0,9900)
			randrunid = random.randint(0,10000)
			page, pageindex = self.give_page()# welke arm wint
			response = responder.respond_with_page(i=randi, runid=randrunid, page = page, teampw = self.teampw)
			
			success = response['effect']['Success'] # 1 of 0
			
			self.update_alpha_betas(success, pageindex)

			self.revenue += success*page['price']
			
			if i%10 == 0:
				self.save_ab()
#				util.save_profit(self.revenue)
			
			util.update_progress(i/iterations)
			
		util.update_progress(1)	
示例#39
0
def test_model(ab_path = '../data/alpha_beta/training.npz'):
    print 'starting training'
    alphas,betas = beta_util.load_ab(filename=ab_path)
    for q in range(10001,10101):
       
        print 'Starting runId ' + str(q)
        runid = q
        revenue = 0
        
        for i in range(0,10001):
            
            page_index = beta_util.draw_from_beta_distributions(alphas=alphas, betas=betas, possible_pages = possible_pages) # welke arm wint
            
            bla = 0
            while bla<50:
                 try:
                     response = responder.respond_with_page(i=i, runid=runid, page = possible_pages[page_index], teampw = teampw)
                 except Exception:
                     
                     bla = bla+1
                     continue
                 break
             
            success = response['effect']['Success'] # 1 of 0
            revenue = revenue + success*possible_pages[page_index]['price']            
            
            alphas, betas = beta_util.update_alphas_betas(index=page_index, success = success, price = possible_pages[page_index]['price'], alphas = alphas, betas = betas)
            util.update_progress(i/10001)
            
        util.save_profit(revenue)
        beta_util.save_ab(alphas, betas)
        
        util.update_progress(1)
        print 'RunId ' + str(q) + ' of ' + str(10100) + ' finished.'
            
    beta_util.save_ab_to_filename(alphas, betas, name='testing')
    print 'training complete' 
示例#40
0
        def delay(*args, **kwargs):
            global lastwsquery
            if webservice not in lastwsquery:
                lastwsquery[webservice] = startup

            lastwsquery[webservice] = max(
                lastwsquery[webservice],
                time.time() -
                webservices[webservice]["freequeries"] * MINDELAY)

            wait = 0
            if time.time() - lastwsquery[webservice] < MINDELAY:
                wait = MINDELAY - (time.time() - lastwsquery[webservice])
                if PROFILE == 1:
                    util.update_progress("Waiting %.2fs for %s" %
                                         (wait, func.__name__))
                time.sleep(wait)
            t = time.time()
            ret = func(*args, **kwargs)
            if PROFILE >= 2:
                util.update_progress("%s took %.2fs (after a %.2fs wait)" %
                                     (func.__name__, time.time() - t, wait))
            lastwsquery[webservice] += MINDELAY
            return ret
示例#41
0
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    pic_list = os.listdir(image_dir)
    list_length = len(pic_list)

    util.update_progress(0)
    for j, image_name in enumerate(pic_list):

        img = cv2.imread(image_dir + image_name,1)

        cimg = copy.copy(img)
        height, width = img.shape[:2]

        helpert = height/size
        small_height = height/helpert
        small_width = width/helpert
        small_img = cv2.resize(img, (int(small_width),int(small_height)))

        ret,thresh = cv2.threshold(small_img, 10, 150, cv2.THRESH_BINARY)

        gray = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY)
        gray = cv2.medianBlur(gray,5)

        #find circles about the size of image height
        circles = cv2.HoughCircles(gray,cv2.cv.CV_HOUGH_GRADIENT,1,20, param1=100,param2=20,minRadius=int(small_height/2.05),maxRadius=int(small_height/2)+int(small_height*0.03))

        if circles is None:
            #find circles larger than image height
            circles = cv2.HoughCircles(gray,cv2.cv.CV_HOUGH_GRADIENT,1,20, param1=100,param2=20,minRadius=int(small_height/2),maxRadius=int(small_height/2)+int(small_height*0.15))

        if circles is None:
            #find circles smaller than image height
            circles = cv2.HoughCircles(gray,cv2.cv.CV_HOUGH_GRADIENT,1,20, param1=100,param2=20,minRadius=int(small_height/2.25),maxRadius=int(small_height/2)-int(small_height*0.02))

        if not circles is None:
            circles = np.uint16(np.around(circles))
            rad=0.0

            for i in circles[0,:]:

                if i[2]> rad:
                    rad = i[2]
                    circle = i


            circle_init = np.zeros(shape = cimg.shape, dtype = cimg.dtype)
            cv2.circle(circle_init, (int((circle[0]/small_width)*width), int((circle[1]/small_height)*height)), int(circle[2]*helpert), (255,255,255), -1)
            cimg= cv2.bitwise_and(cimg, circle_init)

            cv2.imwrite(target_dir + image_name, cimg)
        util.update_progress(j/list_length)

    util.update_progress(1)
示例#42
0
def read_test_csv(file_path = '../data/test.csv'):
    f = open(file_path)
    try:
        reader = csv.reader(f)
        reader.next()
        util.update_progress(0)
        for i, row in enumerate(reader):
            image = np.array(row)
            save_as_image(image, "testset",i , file_path = '../data/')
            util.update_progress(i/28000)
            
    finally:
        f.close()
        util.update_progress(1)
def store(runid, file_path = '../context/', i = 0):
    if not os.path.exists(file_path):
            os.makedirs(file_path)
            
    contextarray = []
    getter = context_getter.ContextGetter(runid)

    print 'getting context of run: ' + str(runid)
    util.update_progress(0)
    for i, context in enumerate(getter):
        contextarray.append(context)        
        util.update_progress(i/getter.max_calls)
    util.update_progress(1)
      
    print 'writing context to json'      
    with open(file_path + str(runid), 'w+') as contextfile:
        json.dump(contextarray, contextfile)
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    util.update_progress(0)


    tasks = glob.glob(image_dir+'*.jpeg')
    job_total = len(tasks)

    print 'Processing images matching ' + image_dir+ '*.jpeg'

    jobs = Queue()
    result = JoinableQueue()
    NUMBER_OF_PROCESSES = cpu_count()*2

    for im_name in tasks:
        jobs.put(im_name)

    for i in xrange(NUMBER_OF_PROCESSES):
        p = Thread(target=worker, args=(i, jobs, result, target_dir, size))
        p.daemon = True
        p.start()

    print 'Starting workers (', NUMBER_OF_PROCESSES, ')!'

    n_complete = 0
    for t in xrange(len(tasks)):
        r = result.get()
        n_complete += 1
        util.update_progress(n_complete/job_total)
        result.task_done()
        #print t, 'done'

    for w in xrange(NUMBER_OF_PROCESSES):
        jobs.put(None)

    util.update_progress(1)

    print 'Done!'
    time.sleep(1)
    result.join()
    jobs.close()
    result.close()
示例#45
0
				# This is a very broad error so it should go last (other errors inherit from IOError)
				except IOError,e:
					#url lib converts socket errors to cryptic IOErrors for some strange reason
					if e.errno == "socket error" and e.strerror.args[0] == "timed out":
						print ("Caught " +webservice+ " IO timeout")
					else:
						# A bare raise will reraise the current exception
						raise

				#bail if we've made enough attempts
				if i >= WSATTEMPTS:
					break
				else:
					i+=1

	                        util.update_progress("waiting 20 seconds and trying again...")
				time.sleep(20)
				util.update_progress("20 second backoff is over. Retrying now...")
	                        # Reset the timer delayed uses so that we don't
	                        # end up with a bunch of queries causing
	                        # another 503
	                        lastwsquery[webservice]=time.time()

			print "Giving up on call to %s after %d tries." % (webservice, WSATTEMPTS)
			raise

                backoff_func.__name__=func.__name__
                return backoff_func
        return ws_backoff

def delayed(webservice="default"):
productids = np.arange(10, 26, 1)
prices = [10,15,20,25,30,35,40] 


pass_file = '../password.pass'
f = open(pass_file, 'rb')
teampw = f.next() 

# aantal samples waar op getrained wordt
iterations = 10

#maak pagina's
possible_pages = beta_util.create_possible_pages(headers=headers, adtypes=adtypes, colors=colors, productids=productids, prices=prices) 


util.update_progress(0)




def run(iterations = 350000, filename = '../data/alpha_beta/1.npz'):
	
	
	
	if not os.path.isfile(filename):
         
		alphas = np.ones(len(possible_pages))
		betas = np.ones(len(possible_pages))
		revenue = 0
	else:
		alphas, betas = beta_util.load_ab()
示例#47
0
    except ErrorResponse, e:
        abort(401)

    account = client.account_info()
    session['username'] = account['display_name']
    quota = float(account['quota_info']['quota'])
    shared = float(account['quota_info']['shared'])
    normal = float(account['quota_info']['normal'])
    total_bytes = int(normal + shared)
    session['used'] = human_readable(normal + shared)
    session['quota'] = human_readable(quota)

    job = q.enqueue(walk, client, get_metadata(client, '/'), 0, total_bytes)
    job.meta['access_token'] = session['access_token'];
    job.save()
    update_progress(job, 0, "/")
    session['job'] = job.key

    return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used'])

@app.route('/display_result')
def display_result():
    if 'job' not in session:
        return jsonify(ready=False, progress=0)

    job = get_job_from_key(session['job'], conn)
    if job is None:
        abort(400)

    if job.result is None:
        return jsonify(ready=False, current=job.meta['current'], progress=job.meta['progress'])
示例#48
0
def guess_album2(trackinfo):
	# trackinfo is
	#  <fname> => <musicfile.MusicFile>
	#
	# returns a list of possible release id's
	#
	# This version works by trying a breadth first search of releases to try
	# and avoid wasting a lot of time finding releases which are going to
	# be ignored.
	#
	# This function returns a list of release id's
	possible_releases={}
	impossible_releases=[]
	track_generator={}
	completed_releases=[]

	start_time = time.time()

	if trackinfo=={}:
		print "No tracks to identify?"
		return

	for (fileid,file) in trackinfo.iteritems():
		track_generator[fileid]=itertools.chain(
			file.getTracks(),
			strat_musicbrainzid.generate_from_metadata(file),
			strat_transitive.generate_track_puid_possibilities(
				file.getTracks()),
			strat_metadata.generate_from_metadata(
				file,
				len(trackinfo)),
			strat_trackname.generate_track_name_possibilities(	
					file,
					fileid,
					possible_releases)
			)

	while track_generator!={}:
		timelimit = albumidentifyconfig.config.getint("albumidentify","timelimit")
		if  timelimit > 0 and time.time() - start_time > timelimit:
			print "TIMEOUT EXCEEDED, GIVING UP"
			break
		fileid = choose_track(
				possible_releases,
				 track_generator,
				 trackinfo)
		try:
			track = track_generator[fileid].next()
		except StopIteration:
			end_of_track(
				possible_releases,
				impossible_releases,
				track_generator,
				trackinfo,
				fileid)
			# If we have no more possible releases for the track
			# we're giving up on, we can't # get any more.
			# So give up now.
			if possible_releases == {}:
				return
			continue

		for releaseid in (x.id for x in track.releases):

			# Skip releases we've already seen before.
			if releaseid in impossible_releases:
				continue

			util.update_progress("Considering %s" % (
				musicbrainz2.utils.extractUuid(releaseid)))
			try:
				release = lookups.get_release_by_releaseid(releaseid)
			except Exception,e:
				util.report("WARNING: Unexpected exception looking for "+musicbrainz2.utils.extractUuid(releaseid)+": "+str(e))
				continue

			# Is the track usable?
			if not verify_track(release, 
					possible_releases,
					impossible_releases,
					trackinfo,
					fileid,
					track):
				continue

			add_new_track(release, 
					possible_releases, 
					fileid, 
					track, 
					trackinfo, 
					impossible_releases)

			if len(possible_releases[releaseid])==len(trackinfo) \
					and releaseid not in completed_releases:
				print release.title,"seems ok\x1b[K"
				print "Musicbrainz Release Id: %s.html" % release.id
				yield releaseid, possible_releases[releaseid]
				completed_releases.append(releaseid)
示例#49
0
				# This is a very broad error so it should go last (other errors inherit from IOError)
				except IOError,e:
					#url lib converts socket errors to cryptic IOErrors for some strange reason
					if e.errno == "socket error" and e.strerror.args[0] == "timed out":
						print ("Caught " +webservice+ " IO timeout")
					else:
						# A bare raise will reraise the current exception
						raise

				#bail if we've made enough attempts
				if i >= attempts:
					break
				else:
					i+=1

	                        util.update_progress("waiting %d seconds and trying again..." % cooldown)
				time.sleep(cooldown)
				util.update_progress("%d second backoff is over. Retrying now..." % cooldown)
	                        # Reset the timer delayed uses so that we don't
	                        # end up with a bunch of queries causing
	                        # another 503
	                        lastwsquery[webservice]=time.time()

			print "Giving up on call to %s after %d tries." % (webservice, attempts)
			raise

                backoff_func.__name__=func.__name__
                return backoff_func
        return ws_backoff

def delayed(webservice="default"):
示例#50
0
def verify_track(release, possible_releases, impossible_releases, 
			trackinfo, fileid, track):
	# Step One: Has this file already been found on this release?
	releaseid = release.id
	if releaseid in possible_releases and fileid in possible_releases[releaseid].values():
		util.update_progress("Already found on this release:" + fileid )
		return False
	# Step Two: Check for the right number of tracks
	if len(release.tracks) != len(trackinfo):
		# Ignore release -- wrong number of tracks
		util.update_progress(release.title.encode("ascii","ignore")[:40]+": wrong number of tracks (%d not %d)" % (len(release.tracks),len(trackinfo)))
		impossible_releases.append(releaseid)
		return False

	# Step Three: Have we found a file for this track on this release?
	tracknum = lookups.track_number(release.tracks, track)
	if releaseid in possible_releases and tracknum in possible_releases[releaseid]:
		util.update_progress("Already found a file for track %02d: %s" % (tracknum,possible_releases[releaseid][tracknum]))
		return False

	# Step Four: (optionally) Check that track 'n' maps to file 'n'.
	if FORCE_ORDER:
		found_tracknumber=lookups.track_number(release.tracks, track)
		file_ids = trackinfo.keys()
		file_ids = sort.sorted_list(file_ids)
		if found_tracknumber != file_ids.index(fileid)+1:
			util.update_progress(release.title[:40]+": track at wrong position")
			return False

	# Step Five: Make sure if there is another mapping on this album
	# that we don't accept this one.
 	if trackinfo[fileid].getPUID() in get_puids_for_release(releaseid):
		if trackinfo[fileid].getPUID() not in lookups.get_track_by_id(track.id).puids:
			print "Track exists elsewhere on this release"
			print "",fileid
			print "",track.title
			
			for ntrackind,ntrack in enumerate(release.tracks):
				ntrack = lookups.get_track_by_id(ntrack.id)
				if trackinfo[fileid].getPUID() in ntrack.puids:
					print " should be:",ntrack.title
					
			return False

	# Step Six: Make sure the song is within 10% of the length of the 
	# track we expect it to be.
	track = lookups.get_track_by_id(track.id)
	if track.getDuration() is not None:
		dur_ratio = track.getDuration() * 1.0 / trackinfo[fileid].getDuration()
		if dur_ratio < .9 or dur_ratio > 1.1:
			print "Track lengths differ"
			print " (%s) %s" % (
				duration_to_string(trackinfo[fileid].getDuration()),
				trackinfo[fileid].getFilename(),
				)
			print " (%s) %s" % (
				duration_to_string(track.getDuration()),
				track.title,
				)
			return False

	# Well, after passing through that gauntlet, we might consider this track!
	return True
示例#51
0
def hash_file(fname):
	t = time.time()
	h=hashlib.md5(open(fname,"r").read()).hexdigest()
	util.update_progress("Hashed file (%fs)" % (time.time()-t))
	return h
示例#52
0
def populate_fingerprint_cache(fname):
	util.update_progress("Looking up fingerprint for "+os.path.basename(fname))
	return fingerprint_any(fname)