示例#1
0
import numpy as np

def printUsage():
	print("Usage: python songsPerYear.py {user_id}")
	
	
try:
	user = sys.argv[1]
except IndexError as err:
	printUsage()
	sys.exit(1)
	
user_folder = './'+user

# Set up data
allData = importMappedData(user_folder+'/mapped_data.tsv')
data = mbzMeta(user_folder+'/meta.tsv',allData)
data = data.merge(allData, on='track_id')

# Select the attribute we want to visualise
attribute = data['year'].value_counts()

labels = attribute.index.values
# Deal with unknown (empty string)
labels = ["Unknown" if x == '' else x for x in labels]

frequency = attribute.tolist() 
 
# Generate the y positions. Later, replace them with labels
y_pos = range(len(labels))
import matplotlib.pyplot as plt
import pandas as pd
import sys
import os
from functions import importMappedData, trackMeta, mbzMeta, getUsers
from collections import OrderedDict, Counter
from operator import itemgetter

user = sys.argv[1]
data = importMappedData()
userData = getUsers()
userInfo = userData[userData["user_id"] == user]

gender = ""
age = 0
country = ""

try:
    gender = userInfo['gender'].values[0]
    age = userInfo['age'].values[0]
    country = userInfo['country'].values[0]
except IndexError as err:
    pass

print(user)
print("gender: " + gender)
print("age: " + str(age))
print("country: " + country)

# Create a DF with data relevant to the specified user
userPlays = pd.DataFrame(data[data["user_id"] == user],
示例#3
0
from operator import itemgetter
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.svm import SVR
import matplotlib.pyplot as plt
from functions import importMappedData, mbzMeta

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# SET UP DATA

# Include the metadata for every song
data = mbzMeta(importMappedData())

# Clean-up genres column
data['genres'] = data['genres'].apply(lambda x: x.replace("[","").replace("]","").replace("'","") )

# For joining two dataframes use track_id as key
trackids = data.index

# Convert artist_id to numerical
artistids = data["artist_id"].tolist()
artistle = LabelEncoder()
artistle.fit_transform(artistids)
artist_classes = artistle.classes_
encoded_artists = artistle.transform(artistids)
encoded_artists = pd.Series(encoded_artists)