Пример #1
0
def test_feature():
    """
    TO BE COMPLETED BY STUDENT

    Change the code so that multiple columns are displayed.
    Each column should contain a different (randomly picked) patch and it's mean color.
    Run the script first to see what the output is.

    """
    num_cols = 3  # CHANGE THIS

    i = 0
    for col in range(num_cols):
        i += 1
        # patch_idx = np.random.randint(0, len(main.data))
        patch_idx = 11020  # CHANGE THIS
        patch = main.data[patch_idx]
        feature = main.feature(patch)
        patch_mean = feature.reshape(1, 1, 3).repeat(32, 0).repeat(32, 1)

        # grid plot of size 2 x num_cols
        plt.subplot(2, num_cols, i)
        plt.title(str(patch_idx))
        plt.imshow(patch)

        plt.subplot(2, num_cols, num_cols + i)
        plt.imshow(patch_mean)

    fig = plt.gcf()
    plt.show()
    fname = utils.datetime_filename('output/A1_test/features/grid.png')
    fig.savefig(fname, format='png', dpi=300)
Пример #2
0
def test_neighbors():
    num_cols = 3
    num_rows = 3

    main.nn.n_neighbors = num_cols
    i = 0

    for row in range(num_rows):
        patch_idx = np.random.randint(0, len(main.data))
        patch = main.data[patch_idx]
        neighbors = main.get_patch(patch).reshape(-1, 32, 32, 3)

        for col in range(num_cols):
            i += 1

            if col == 0:
                plt.subplot(num_rows, num_cols, i)
                plt.title(str(patch_idx))
                plt.imshow(patch)
            else:
                plt.subplot(num_rows, num_cols, i)
                # plt.title(str(patch_idx))
                plt.imshow(neighbors[col])

    fig = plt.gcf()
    plt.show()
    fname = utils.datetime_filename('output/A1_test/neighbors/grid.png')
    fig.savefig(fname, format='png', dpi=300)
Пример #3
0
def main():
    """
    Assignment 3 - Neural Network Features

    """

    # The program will start execution here
    # Change the filename to load your favourite picture
    file = './images/lion2.jpg'
    train_features = False
    train = True

    img = Image.open(file).convert('RGB')
    img = utils.resize_proportional(img, new_height=900)
    target_image = np.array(img) / 255

    # This will execute the Mosaicking algorithm of Assignment 3
    main = Assignment3()
    main.encode_features(train_features)
    main.train(train)
    output_image = main.mosaic(target_image)

    # Saving the image inside in project root folder
    output_image *= 255
    im = Image.fromarray(output_image.astype('uint8'))
    im.save(utils.datetime_filename('output/A3/mosaics/mosaic.png'))
Пример #4
0
def test_cluster():
    clusters = [5, 6, 7]
    num_cols = 5
    num_rows = len(clusters)

    i = 0
    for row in range(num_rows):
        cluster = clusters[row]
        cluster_center = main.data[main.closest[cluster]]
        labels = main.kmeans.labels_
        cluster_indices = [i for i, l in enumerate(labels) if l == cluster]

        d = main.kmeans.transform(main.data.reshape(len(main.data),
                                                    -1))[:, cluster]
        ind = np.argsort(d)[:num_cols]
        closest = main.data[ind]

        for col in range(num_cols):
            i += 1

            # else:
            # patch_idx = random.choice(cluster_indices)
            # patch = main.data[patch_idx]
            patch = closest[col]
            # grid plot of size 2 x num_cols
            plt.subplot(num_rows, num_cols, i)
            if col == 0:
                plt.title('Center')
            # plt.title(str(patch_idx))
            plt.imshow(patch)

    fig = plt.gcf()
    plt.show()
    fname = utils.datetime_filename('output/A2_test/clusters/samples.png')
    fig.savefig(fname, format='png', dpi=300)
Пример #5
0
def scrape(tweets_per_file=100000, words_per_track=50):
    """
  for easier reference, we save 100k tweets per file
  """
    f = open(datetime_filename(prefix=RAW_TWEET_DIR + '/gn_tweet_'), 'w')
    tweet_count = 0
    hour_count = 0
    #track_grams #14 # track_grams #0
    #track_words #73 #31 #18 #0
    try:
        label.next_
        start_time = time.time()
        tracking = get_track_words(words_per_track, hour_count,
                                   track_lst_2)  # change here the track list
        print(tracking, len(tracking))
        for line in api.GetStreamFilter(follow=None,
                                        track=tracking,
                                        locations=None,
                                        languages=None,
                                        delimited=None,
                                        stall_warnings=None,
                                        filter_level=None):
            if 'text' in line:  #and line['lang'] == u'und':
                #text = line['text'].encode('utf-8').replace('\n', ' ')
                f.write('{}\n'.format(line))
                tweet_count += 1
                if tweet_count % tweets_per_file == 0:  # start new batch
                    f.close()
                    f = open(
                        datetime_filename(prefix=RAW_TWEET_DIR + '/gn_tweet_'),
                        'w')
                    continue
            next_time = time.time()
            print(next_time, start_time, next_time - start_time)
            if int((next_time - start_time) /
                   3600.0) >= 3:  # trunc 3h track1, track2;
                hour_count += 1
                goto.next_
    except KeyboardInterrupt:
        print('Twitter stream collection aborted')
    finally:
        f.close()
        return tweet_count
Пример #6
0
def get_old_tweets(check_keys=True):
    tweet_count = 0
    csvFile = open(datetime_filename(prefix=RAW_TWEET_DIR + '/gn_tweet_'), 'a')
    csvWriter = csv.writer(csvFile)

    try:
        maxTweets = -1  # the number of tweets you require
        for i, tweet in enumerate(
                sntwitter.TwitterSearchScraper(
                    " ".join(track_lst_3) +
                    ' since:2006-03-21 until:2021-01-31').get_items()
        ):  # " "=and
            if i >= maxTweets and maxTweets > -1:
                break
            print(i, [t for t in (tweet.__class__.__dict__['_fields'])], "=",
                  [t for t in tweet])
            #
            if tweet_count == 0:
                csvWriter.writerow([
                    t for t in tweet.__class__.__dict__['_fields']
                ])  # write header
            if check_keys:
                text = str(tweet.content + tweet.username).lower()
                if any(
                        word.lower() in text for word in track_lst_3
                ):  # strict check if have any keywords (strict: e.g., key=día, text|user=dia, then exclude it )
                    csvWriter.writerow(
                        [attr for attr in tweet]
                    )  # I want all attr, If you need less information, just provide the attributes
            else:
                csvWriter.writerow(
                    [attr for attr in tweet]
                )  # I want all attr, If you need less information, just provide the attributes
            tweet_count += 1
    except KeyboardInterrupt:
        print('Twitter scrape collection aborted')
    finally:
        csvFile.close()
        return tweet_count
Пример #7
0
def main():
    """
    Assignment 1a - Average Patch Features
    Assignment 1b - Nearest Neighbor Search

    """

    # The program will start execution here
    # Change the filename to load your favourite picture
    file = './images/lion2.jpg'

    # Setting this to True will train the model (or pre-compute the features)
    # All models are automatically saved in the folder 'models'
    # After the model is trained well, you can set this to false
    train_features = False
    train_model = False


    # Load image and resize it to a fixed size (keeping aspect ratio)
    img = Image.open(file).convert('RGB')
    img = utils.resize_proportional(img, new_height=2000)
    target_image = np.array(img) / 255

    # This will execute the Mosaicking algorithm of Assignment 1
    main = Assignment1()
    main.encode_features(train_features)
    main.train(train_model)

    t0 = time.time()
    output_image = main.mosaic_fast(target_image)
    print(time.time() - t0, 'seconds.')

    # Saving the image inside in project root folder
    output_image *= 255
    im = Image.fromarray(output_image.astype('uint8'))
    im.save(utils.datetime_filename('output/A1/mosaics/mosaic.png'))
Пример #8
0
# 2. read files
# 2.1 tweets
all_files = glob.glob(raw_tweet_dir + "/gn_*utc.csv")
usecols = ['tweet_id', 'date', 'tweet', 'lang_twitter']
raw_tweet_files = dd.read_csv(all_files,
                              usecols=lambda c: c in set(usecols),
                              dtype={'tweet_id': 'str'})
print("raw_tweet_files\n", raw_tweet_files.head(3), "\n",
      len(raw_tweet_files.tweet_id))
raw_tweet_files = raw_tweet_files.compute()
if sample1000:
    # write tweets
    raw_tweet_files.sample(1000).to_csv(
        datetime_filename(prefix=raw_tweet_dir + '/gn_tweet_' +
                          str(len(raw_tweet_files.tweet_id)) + '_',
                          extension='_sample1000.csv'),
        encoding='utf-8',
        index=False)
    print("tweets sample exported...")
    sys.exit(0)

# 2.2 lang
all_files = glob.glob(
    raw_tweet_dir + "/tweets_languages_gn_tweet*utc.csv"
)  # iglob(raw_tweet_dir + "/tweets_languages_gn_tweet*utc.csv", recursive=True)
usecols = [
    'tweet_id', 'lang', 'lang_fasttext', 'lang_polyglot', 'lang_textcat'
]
raw_lang_files = dd.read_csv(all_files,
                             usecols=lambda c: c in set(usecols),