def test_feature(): """ TO BE COMPLETED BY STUDENT Change the code so that multiple columns are displayed. Each column should contain a different (randomly picked) patch and it's mean color. Run the script first to see what the output is. """ num_cols = 3 # CHANGE THIS i = 0 for col in range(num_cols): i += 1 # patch_idx = np.random.randint(0, len(main.data)) patch_idx = 11020 # CHANGE THIS patch = main.data[patch_idx] feature = main.feature(patch) patch_mean = feature.reshape(1, 1, 3).repeat(32, 0).repeat(32, 1) # grid plot of size 2 x num_cols plt.subplot(2, num_cols, i) plt.title(str(patch_idx)) plt.imshow(patch) plt.subplot(2, num_cols, num_cols + i) plt.imshow(patch_mean) fig = plt.gcf() plt.show() fname = utils.datetime_filename('output/A1_test/features/grid.png') fig.savefig(fname, format='png', dpi=300)
def test_neighbors(): num_cols = 3 num_rows = 3 main.nn.n_neighbors = num_cols i = 0 for row in range(num_rows): patch_idx = np.random.randint(0, len(main.data)) patch = main.data[patch_idx] neighbors = main.get_patch(patch).reshape(-1, 32, 32, 3) for col in range(num_cols): i += 1 if col == 0: plt.subplot(num_rows, num_cols, i) plt.title(str(patch_idx)) plt.imshow(patch) else: plt.subplot(num_rows, num_cols, i) # plt.title(str(patch_idx)) plt.imshow(neighbors[col]) fig = plt.gcf() plt.show() fname = utils.datetime_filename('output/A1_test/neighbors/grid.png') fig.savefig(fname, format='png', dpi=300)
def main(): """ Assignment 3 - Neural Network Features """ # The program will start execution here # Change the filename to load your favourite picture file = './images/lion2.jpg' train_features = False train = True img = Image.open(file).convert('RGB') img = utils.resize_proportional(img, new_height=900) target_image = np.array(img) / 255 # This will execute the Mosaicking algorithm of Assignment 3 main = Assignment3() main.encode_features(train_features) main.train(train) output_image = main.mosaic(target_image) # Saving the image inside in project root folder output_image *= 255 im = Image.fromarray(output_image.astype('uint8')) im.save(utils.datetime_filename('output/A3/mosaics/mosaic.png'))
def test_cluster(): clusters = [5, 6, 7] num_cols = 5 num_rows = len(clusters) i = 0 for row in range(num_rows): cluster = clusters[row] cluster_center = main.data[main.closest[cluster]] labels = main.kmeans.labels_ cluster_indices = [i for i, l in enumerate(labels) if l == cluster] d = main.kmeans.transform(main.data.reshape(len(main.data), -1))[:, cluster] ind = np.argsort(d)[:num_cols] closest = main.data[ind] for col in range(num_cols): i += 1 # else: # patch_idx = random.choice(cluster_indices) # patch = main.data[patch_idx] patch = closest[col] # grid plot of size 2 x num_cols plt.subplot(num_rows, num_cols, i) if col == 0: plt.title('Center') # plt.title(str(patch_idx)) plt.imshow(patch) fig = plt.gcf() plt.show() fname = utils.datetime_filename('output/A2_test/clusters/samples.png') fig.savefig(fname, format='png', dpi=300)
def scrape(tweets_per_file=100000, words_per_track=50): """ for easier reference, we save 100k tweets per file """ f = open(datetime_filename(prefix=RAW_TWEET_DIR + '/gn_tweet_'), 'w') tweet_count = 0 hour_count = 0 #track_grams #14 # track_grams #0 #track_words #73 #31 #18 #0 try: label.next_ start_time = time.time() tracking = get_track_words(words_per_track, hour_count, track_lst_2) # change here the track list print(tracking, len(tracking)) for line in api.GetStreamFilter(follow=None, track=tracking, locations=None, languages=None, delimited=None, stall_warnings=None, filter_level=None): if 'text' in line: #and line['lang'] == u'und': #text = line['text'].encode('utf-8').replace('\n', ' ') f.write('{}\n'.format(line)) tweet_count += 1 if tweet_count % tweets_per_file == 0: # start new batch f.close() f = open( datetime_filename(prefix=RAW_TWEET_DIR + '/gn_tweet_'), 'w') continue next_time = time.time() print(next_time, start_time, next_time - start_time) if int((next_time - start_time) / 3600.0) >= 3: # trunc 3h track1, track2; hour_count += 1 goto.next_ except KeyboardInterrupt: print('Twitter stream collection aborted') finally: f.close() return tweet_count
def get_old_tweets(check_keys=True): tweet_count = 0 csvFile = open(datetime_filename(prefix=RAW_TWEET_DIR + '/gn_tweet_'), 'a') csvWriter = csv.writer(csvFile) try: maxTweets = -1 # the number of tweets you require for i, tweet in enumerate( sntwitter.TwitterSearchScraper( " ".join(track_lst_3) + ' since:2006-03-21 until:2021-01-31').get_items() ): # " "=and if i >= maxTweets and maxTweets > -1: break print(i, [t for t in (tweet.__class__.__dict__['_fields'])], "=", [t for t in tweet]) # if tweet_count == 0: csvWriter.writerow([ t for t in tweet.__class__.__dict__['_fields'] ]) # write header if check_keys: text = str(tweet.content + tweet.username).lower() if any( word.lower() in text for word in track_lst_3 ): # strict check if have any keywords (strict: e.g., key=día, text|user=dia, then exclude it ) csvWriter.writerow( [attr for attr in tweet] ) # I want all attr, If you need less information, just provide the attributes else: csvWriter.writerow( [attr for attr in tweet] ) # I want all attr, If you need less information, just provide the attributes tweet_count += 1 except KeyboardInterrupt: print('Twitter scrape collection aborted') finally: csvFile.close() return tweet_count
def main(): """ Assignment 1a - Average Patch Features Assignment 1b - Nearest Neighbor Search """ # The program will start execution here # Change the filename to load your favourite picture file = './images/lion2.jpg' # Setting this to True will train the model (or pre-compute the features) # All models are automatically saved in the folder 'models' # After the model is trained well, you can set this to false train_features = False train_model = False # Load image and resize it to a fixed size (keeping aspect ratio) img = Image.open(file).convert('RGB') img = utils.resize_proportional(img, new_height=2000) target_image = np.array(img) / 255 # This will execute the Mosaicking algorithm of Assignment 1 main = Assignment1() main.encode_features(train_features) main.train(train_model) t0 = time.time() output_image = main.mosaic_fast(target_image) print(time.time() - t0, 'seconds.') # Saving the image inside in project root folder output_image *= 255 im = Image.fromarray(output_image.astype('uint8')) im.save(utils.datetime_filename('output/A1/mosaics/mosaic.png'))
# 2. read files # 2.1 tweets all_files = glob.glob(raw_tweet_dir + "/gn_*utc.csv") usecols = ['tweet_id', 'date', 'tweet', 'lang_twitter'] raw_tweet_files = dd.read_csv(all_files, usecols=lambda c: c in set(usecols), dtype={'tweet_id': 'str'}) print("raw_tweet_files\n", raw_tweet_files.head(3), "\n", len(raw_tweet_files.tweet_id)) raw_tweet_files = raw_tweet_files.compute() if sample1000: # write tweets raw_tweet_files.sample(1000).to_csv( datetime_filename(prefix=raw_tweet_dir + '/gn_tweet_' + str(len(raw_tweet_files.tweet_id)) + '_', extension='_sample1000.csv'), encoding='utf-8', index=False) print("tweets sample exported...") sys.exit(0) # 2.2 lang all_files = glob.glob( raw_tweet_dir + "/tweets_languages_gn_tweet*utc.csv" ) # iglob(raw_tweet_dir + "/tweets_languages_gn_tweet*utc.csv", recursive=True) usecols = [ 'tweet_id', 'lang', 'lang_fasttext', 'lang_polyglot', 'lang_textcat' ] raw_lang_files = dd.read_csv(all_files, usecols=lambda c: c in set(usecols),