def get_artists(): tokens = hp.get_tokens() hp.check_expiration(tokens) # Get request to followed artists endpoint headers = {'Authorization': f'Bearer {tokens["access_token"]}'} r = requests.get(MY_FOLLOWED_ARTISTS_URL, headers=headers) response = r.json() artist_ids = [] artists = response['artists']['items'] for artist in artists: artist_ids.append(artist['id']) # While next results page exists, get it and its artist_ids while response['artists']['next']: next_page_uri = response['artists']['next'] r = requests.get(next_page_uri, headers=headers) response = r.json() for artist in response['artists']['items']: artist_ids.append(artist['id']) print('Retrieved artist IDs!') session['artist_ids'] = artist_ids return redirect('/get_albums')
def add_to_playlist(): tokens = hp.get_tokens() hp.check_expiration(tokens) playlist_id = session['playlist_id'] track_uris = hp.get_track_uris() # split up the request if number of tracks is too big. Spotify API max 100 per req. tracks_list = track_uris['uris'] number_of_tracks = len(tracks_list) # split track_uris list into 3 sub lists if number_of_tracks > 200: three_split = np.array_split(tracks_list, 3) for lst in three_split: hp.add_tracks(tokens, playlist_id, list(lst)) # split track_uris list into 2 sub lists elif number_of_tracks > 100: two_split = np.array_split(tracks_list, 2) for lst in two_split: hp.add_tracks(tokens, playlist_id, list(lst)) else: hp.add_tracks(tokens, playlist_id, tracks_list) print('Added tracks to playlist!') # redirect to playlsit page & shut down flask server hp.shutdown_server(request.environ) return redirect(session['playlist_url'])
def refresh_tokens(): tokens = hp.get_tokens() payload = { 'grant_type': 'refresh_token', 'refresh_token': tokens['refresh_token'] } base64encoded = str( base64.b64encode(f'{CLIENT_ID}:{CLIENT_SECRET}'.encode('ascii')), 'ascii') headers = {'Authorization': f'Basic {base64encoded}'} # post request for new tokens r = requests.post(SPOTIFY_TOKEN_URL, data=payload, headers=headers) response = r.json() hp.refresh_tokens(response['access_token'], tokens['refresh_token'], response['expires_in']) print('Tokens refreshed!') return redirect('/get_artists')
def get_tracks(): tokens = hp.get_tokens() hp.check_expiration(tokens) album_ids = session['album_ids'] track_uris = [] for id in album_ids: uri = f'https://api.spotify.com/v1/albums/{id}/tracks' headers = {'Authorization': f'Bearer {tokens["access_token"]}'} r = requests.get(uri, headers=headers) response = r.json() for track in response['items']: track_uris.append(track['uri']) hp.store_track_uris(track_uris) print('Retrieved tracks!') return redirect('/create_playlist')
def get_albums(): tokens = hp.get_tokens() hp.check_expiration(tokens) artist_ids = session['artist_ids'] album_ids = [] album_names = { } # used to check for duplicates with different id's * issue with some albums # set time frame for new releases (4 weeks) today = datetime.now() number_weeks = timedelta(weeks=4) time_frame = (today - number_weeks).date() for id in artist_ids: uri = f'https://api.spotify.com/v1/artists/{id}/albums?include_groups=album,single&country=US' headers = {'Authorization': f'Bearer {tokens["access_token"]}'} r = requests.get(uri, headers=headers) response = r.json() albums = response['items'] for album in albums: # check for tracks that are new releases (4 weeks) try: release_date = datetime.strptime( album['release_date'], '%Y-%m-%d') # convert release_date string to datetime album_name = album['name'] artist_name = album['artists'][0]['name'] if release_date.date() > time_frame: # if we do find a duplicate album name, check if it's by a different artist if album_name not in album_names or artist_name != album_names[ album_name]: album_ids.append(album['id']) album_names[album_name] = artist_name except ValueError: # there appear to be some older release dates that only contain year (2007) - irrelevant print( f'Release date found with format: {album["release_date"]}') session['album_ids'] = album_ids print('Retrieved album IDs!') return redirect('/get_tracks')
def create_playlist(): tokens = hp.get_tokens() hp.check_expiration(tokens) current_date = (date.today()).strftime('%m-%d-%Y') playlist_name = f'New Monthly Releases - {current_date}' # make request to create_playlist endpoint uri = f'https://api.spotify.com/v1/users/{USER_ID}/playlists' headers = { 'Authorization': f'Bearer {tokens["access_token"]}', 'Content-Type': 'application/json' } payload = {'name': playlist_name} r = requests.post(uri, headers=headers, data=json.dumps(payload)) response = r.json() session['playlist_id'] = response['id'] # store our new playlist's id session['playlist_url'] = response['external_urls'][ 'spotify'] # store new playlist's url print(f'{r.status_code} - Created playlist!') return redirect('/add_to_playlist')
train_data = train_data1 + train_data2 valid_data = valid_data1 + valid_data2 print(len(train_data), len(valid_data)) else: raise Exception("invalid TRAIN_NOISE_TYPE") if START_EPOCH != 1: # if not training from scratch or for inference print(f"loading vocab from {VOCAB_PATH}") vocab = load_vocab_dict(VOCAB_PATH) else: print( f"loading vocab from train data itself and saving it at {VOCAB_PATH}" ) vocab = get_tokens([i[0] for i in train_data], keep_simple=True, min_max_freq=(2, float("inf")), topk=100000, intersect=vocab_ref, load_char_tokens=True) save_vocab_dict(VOCAB_PATH, vocab) print("") #print(vocab["token_freq"]) print([*vocab.keys()]) #print([(idx,vocab["idx2token"][idx]) for idx in range(100)]) print("") # see how many tokens in labels are going to be UNK # print ( num_unk_tokens([i[0] for i in train_data], vocab) ) # print ( num_unk_tokens([i[0] for i in valid_data], vocab) ) ############################################# # load ElmoSCTransformer #############################################
import random import math import helpers import spacy original_tokens = helpers.get_tokens() original_tokens = map(lambda token: token.text, original_tokens) original_tokens = list(dict.fromkeys(original_tokens)) tokens = helpers.get_tokens() tokens = map(lambda token: token.text, tokens) # drop dupes tokens = list(dict.fromkeys(tokens)) tokens_count = len(tokens) twenty_percent = math.ceil(tokens_count * 0.2) token_indices_to_scramble = random.sample(range(1, tokens_count), twenty_percent) for index in token_indices_to_scramble: operations = [ helpers.ScrambleOperations.add_letter, helpers.ScrambleOperations.remove_letter, helpers.ScrambleOperations.swap_letter ] tokens[index] = random.choice(operations)(tokens[index]) helpers.write_tokens(tokens) for index in range(len(tokens)):
open('data/preprocessed/images_val2014_all.txt', 'r').read().decode('utf8').splitlines() images_val_path = \ open('data/preprocessed/images_val2014_path.txt', 'r').read().decode('utf8').splitlines() answers_val_all = \ open('data/preprocessed/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines() print ('ques_val, size = {}, sampel = {}'.format(len(questions_val), questions_val[0])) print ('ques_lengths_val, size = {}, sample = {}'.format(len(questions_lengths_val), questions_lengths_val[0])) print ('ans_val, size = {}, sample = {}'.format(len(answers_val), answers_val[0])) print ('imag_val, size = {}, sample = {}'.format(len(images_val), images_val[0])) print ('imag_val_path, size = {}, sample = {}'.format(len(images_val_path), images_val_path[0])) print ('ans_val_all, size = {}, sample = {}'.format(len(answers_val_all), answers_val_all[0])) print(temp) print(temp) ques_tokens_train = get_tokens(questions_train) ques_tokens_val = get_tokens(questions_val) ''' counts = {} count_thr = 5 for i, tokens in enumerate(ques_tokens_train):#change to train for token in tokens: counts[token] = counts.get(token, 0) + 1 cw = sorted([(count,w) for w,count in counts.iteritems()], reverse=True) print('top words and their counts:') print('\n'.join(map(str,cw[:20]))) # print some stats total_words = sum(counts.itervalues()) print('total words:', total_words) bad_words = [w for w,n in counts.iteritems() if n <= count_thr]