def load_lfw_dataset_EW(use_raw=False, dimx=45, dimy=45): df_attrs = pd.read_csv(ATTRS_NAME, sep='\t', skiprows=1) df_attrs.columns = list(df_attrs.columns)[1:] + ["NaN"] df_attrs = df_attrs.drop("NaN", axis=1) imgs_with_attrs = set(map(tuple, df_attrs[["person", "imagenum"]].values)) # read photos photos = [] mapping = [] index = 0 with ZipFile(IMAGES_NAME_POC) as f: for m in tqdm_utils.tqdm_notebook_failsafe(f.namelist()): # prepare image img = decode_image_from_raw_bytes(f.open(m).read()) img = cv2.resize(img, (dimx, dimy)) # parse person fname = os.path.split(m)[-1] fname_splitted = fname[:-4].replace('_', ' ').split() if fname_splitted[-1][-1].isdigit(): photo_number = int(fname_splitted[-1]) photos.append(img) mapping.append([index, '_'.join(fname_splitted)]) index += 1 photos = np.stack(photos).astype('uint8') return photos, mapping
def on_epoch_begin(self, epoch, logs=None): print('\nEpoch %d/%d' % (epoch + 1, self.epochs)) if "steps" in self.params: self.use_steps = True self.target = self.params['steps'] else: self.use_steps = False self.target = self.params['samples'] self.prog_bar = tqdm_utils.tqdm_notebook_failsafe(total=self.target) self.log_values_by_metric = defaultdict(list)
def iterate_minibatches(inputs, targets, batchsize, shuffle=False): assert len(inputs) == len(targets) if shuffle: indices = np.random.permutation(len(inputs)) for start_idx in tqdm_utils.tqdm_notebook_failsafe(range(0, len(inputs) - batchsize + 1, batchsize)): if shuffle: excerpt = indices[start_idx:start_idx + batchsize] else: excerpt = slice(start_idx, start_idx + batchsize) yield inputs[excerpt], targets[excerpt]
def test_validation_loss(decoder, s, generate_batch, val_img_embeds, val_captions_indexed): np.random.seed(300) random.seed(300) val_loss = 0 batches_for_eval = 1000 for _ in tqdm_utils.tqdm_notebook_failsafe(range(batches_for_eval)): val_loss += s.run( decoder.loss, generate_batch(val_img_embeds, val_captions_indexed, 32, 20)) val_loss /= 1000. return val_loss
def download_file(url, file_path): r = requests.get(url, stream=True) total_size = int(r.headers.get('content-length')) try: with open(file_path, 'wb', buffering=16 * 1024 * 1024) as f: bar = tqdm_utils.tqdm_notebook_failsafe(total=total_size, unit='B', unit_scale=True) bar.set_description(os.path.split(file_path)[-1]) for chunk in r.iter_content(32 * 1024): f.write(chunk) bar.update(len(chunk)) bar.close() except Exception: print("Download failed") finally: if os.path.getsize(file_path) != total_size: os.remove(file_path) print("Removed incomplete download")
def download_file(url, file_path): r = requests.get(url, stream=True) total_size = int(r.headers.get('content-length')) bar = tqdm_utils.tqdm_notebook_failsafe(total=total_size, unit='B', unit_scale=True) bar.set_description(os.path.split(file_path)[-1]) incomplete_download = False try: with open(file_path, 'wb', buffering=16 * 1024 * 1024) as f: for chunk in r.iter_content(4 * 1024 * 1024): f.write(chunk) bar.update(len(chunk)) except Exception as e: raise e finally: bar.close() if os.path.exists(file_path) and os.path.getsize(file_path) != total_size: incomplete_download = True os.remove(file_path) if incomplete_download: raise Exception("Incomplete download")
def load_lfw_dataset(use_raw=False, dx=80, dy=80, dimx=45, dimy=45): # read attrs df_attrs = pd.read_csv(ATTRS_NAME, sep='\t', skiprows=1) df_attrs = pd.DataFrame(df_attrs.iloc[:, :-1].values, columns=df_attrs.columns[1:]) imgs_with_attrs = set(map(tuple, df_attrs[["person", "imagenum"]].values)) # read photos all_photos = [] photo_ids = [] with tarfile.open(RAW_IMAGES_NAME if use_raw else IMAGES_NAME) as f: for m in tqdm_utils.tqdm_notebook_failsafe(f.getmembers()): if m.isfile() and m.name.endswith(".jpg"): # prepare image img = decode_image_from_raw_bytes(f.extractfile(m).read()) img = img[dy:-dy, dx:-dx] img = cv2.resize(img, (dimx, dimy)) # parse person fname = os.path.split(m.name)[-1] fname_splitted = fname[:-4].replace('_', ' ').split() person_id = ' '.join(fname_splitted[:-1]) photo_number = int(fname_splitted[-1]) if (person_id, photo_number) in imgs_with_attrs: all_photos.append(img) photo_ids.append({ 'person': person_id, 'imagenum': photo_number }) photo_ids = pd.DataFrame(photo_ids) all_photos = np.stack(all_photos).astype('uint8') # preserve photo_ids order! all_attrs = photo_ids.merge(df_attrs, on=('person', 'imagenum')).drop(["person", "imagenum"], axis=1) return all_photos, all_attrs
def reading_thread(zip_fn): zf = zipfile.ZipFile(zip_fn) for fn in tqdm_utils.tqdm_notebook_failsafe(zf.namelist()): if kill_read_thread.is_set(): break if os.path.splitext(fn)[-1] in extensions: buf = zf.read(fn) # read raw bytes from zip for fn img = decode_image_from_buf(buf) # decode raw bytes img = crop_and_preprocess(img, input_shape, preprocess_for_model) while True: try: q.put((os.path.split(fn)[-1], img), timeout=1) # put in queue except queue.Full: if kill_read_thread.is_set(): break continue break read_thread_completed.set() # read all images
# # Submit to Coursera # In[25]: # token expires every 30 min COURSERA_TOKEN = "*****************" COURSERA_EMAIL = "d****************m" # In[26]: from submit import submit_char_rnn samples = [generate_sample(' Al') for i in tqdm_utils.tqdm_notebook_failsafe(range(25))] submission = (history, samples) submit_char_rnn(submission, COURSERA_EMAIL, COURSERA_TOKEN) # # Try it out! # # __Disclaimer:__ This part of assignment is entirely optional. You won't receive bonus points for it. However, it's a fun thing to do. Please share your results on course forums. # # You've just implemented a recurrent language model that can be tasked with generating any kind of sequence, so there's plenty of data you can try it on: # # * Novels/poems/songs of your favorite author # * News titles/clickbait titles # * Source code of Linux or Tensorflow # * Molecules in [smiles](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) format # * Melody in notes/chords format
# you can load "weights_{epoch}" and continue training # uncomment the next line if you need to load weights # saver.restore(s, os.path.abspath("weights")) """Look at the training and validation loss, they should be decreasing!""" # actual training loop MAX_LEN = 20 # truncate long captions to speed up training # to make training reproducible np.random.seed(42) random.seed(42) for epoch in range(n_epochs): train_loss = 0 pbar = tqdm_utils.tqdm_notebook_failsafe(range(n_batches_per_epoch)) counter = 0 for _ in pbar: train_loss += s.run([decoder.loss, train_step], generate_batch(train_img_embeds, train_captions_indexed, batch_size, MAX_LEN))[0] counter += 1 pbar.set_description("Training loss: %f" % (train_loss / counter)) train_loss /= n_batches_per_epoch val_loss = 0 for _ in range(n_validation_batches): val_loss += s.run( decoder.loss,
def sample_probas(bsize): plt.title('Generated vs real data') plt.hist(np.exp(discriminator.predict(sample_data_batch(bsize)))[:,1], label='D(x)', alpha=0.5,range=[0,1]) plt.hist(np.exp(discriminator.predict(generator.predict(sample_noise_batch(bsize))))[:,1], label='D(G(z))',alpha=0.5,range=[0,1]) plt.legend(loc='best') plt.show() """### Training Main loop. We just train generator and discriminator in a loop and plot results once every N iterations. """ from IPython import display for epoch in tqdm_utils.tqdm_notebook_failsafe(range(3000)): feed_dict = { real_data:sample_data_batch(100), noise:sample_noise_batch(100) } for i in range(5): s.run(disc_optimizer,feed_dict) s.run(gen_optimizer,feed_dict) if epoch %100==0: display.clear_output(wait=True) sample_images(2,3,True) sample_probas(1000)
for _ in range(10): print(generate_sample()) # with prefix conditioning for _ in range(10): print(generate_sample(' Trump')) """# Submit to Coursera""" # token expires every 30 min COURSERA_TOKEN = "XXXXXXXXXXXX" COURSERA_EMAIL = "XXXXXXXXXXXXXXXXXXXXXXXXXX" from submit import submit_char_rnn samples = [ generate_sample(' Al') for i in tqdm_utils.tqdm_notebook_failsafe(range(25)) ] submission = (history, samples) submit_char_rnn(submission, COURSERA_EMAIL, COURSERA_TOKEN) """# Try it out! __Disclaimer:__ This part of assignment is entirely optional. You won't receive bonus points for it. However, it's a fun thing to do. Please share your results on course forums. You've just implemented a recurrent language model that can be tasked with generating any kind of sequence, so there's plenty of data you can try it on: * Novels/poems/songs of your favorite author * News titles/clickbait titles * Source code of Linux or Tensorflow * Molecules in [smiles](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) format * Melody in notes/chords format * IKEA catalog titles * Pokemon names * Cards from Magic, the Gathering / Hearthstone If you're willing to give it a try, here's what you wanna look at: