Exemplo n.º 1
0
    def train(self, test_users=None, b=None):
        block_size = 5000
        self.model = {}
        self.test_users = test_users

        if test_users is None:
            test_users = range(self.n_users)

        tu = {}
        if b is not None:
            for u in test_users:
                tu[u] = set(test_users[u][:b])
        else:
            for u in test_users:
                tu[u] = set(test_users[u])

        for i, u in enumerate(tu):
            if self.verbose and (i + 1) % 100 == 0:
                print("%d/%d" % (i + 1, len(test_users)))
                utils.fl()

            #Xp = list(self.data.get_items(u)) #get list of songs
            Xp = [int(self.K_indexing[str(idx)]) for idx in tu[u]]
            mtx_indexes = [a / block_size for a in Xp]
            pos_indexes = [a % block_size for a in Xp]

            npos = len(Xp)

            kp = np.zeros((npos, npos))

            for j in range(npos):
                #kp[j] = self.K[Xp[j],Xp].todense()
                kp[j] = self.K[mtx_indexes[j]][pos_indexes[j], Xp]

            kp = co.matrix(kp)

            kn = self.q_[Xp, :]

            I = self.lambda_p * utc.identity(npos)
            P = kp + I
            q = -kn
            G = -utc.identity(npos)
            h = utc.zeroes_vec(npos)
            A = utc.ones_vec(npos).T
            b = co.matrix(1.0)

            solver.options['show_progress'] = False
            sol = solver.qp(P, q, G, h, A, b)

            band = np.zeros((npos, len(self.inverse_indexing)))
            for j in range(npos):
                band[j] = self.K[mtx_indexes[j]][pos_indexes[j], :]

            self.model[u] = (co.matrix(band.T) * (sol['x'])) - self.q_
            #print self.model[u]

        # endfor
        return self
Exemplo n.º 2
0
    def train(self, test_users=None, b=None, selection=None):
        self.model = {}
        self.test_users = test_users

        if test_users is None:
            test_users = range(self.n_users)

        tu = {}
        if b is not None:
            for u in test_users:
                tu[u] = set(test_users[u][:b])
        else:
            for u in test_users:
                tu[u] = set(test_users[u])

        for i, u in enumerate(tu):
            if self.verbose and (i + 1) % 100 == 0:
                print("%d/%d" % (i + 1, len(test_users)))
                utils.fl()

            Xp = [idx for idx in tu[u]]

            npos = len(Xp)

            kp = np.zeros((npos, npos))

            for j in range(npos):
                kp[j] = self.K[Xp[j], Xp].todense()

            kp = co.matrix(kp)

            kn = self.q_[Xp, :]

            I = self.lambda_p * utc.identity(npos)
            P = kp + I
            q = -kn
            G = -utc.identity(npos)
            h = utc.zeroes_vec(npos)
            A = utc.ones_vec(npos).T
            b = co.matrix(1.0)

            solver.options['show_progress'] = False
            sol = solver.qp(P, q, G, h, A, b)
            #print(selection[u][0:5])
            #utils.fl()
            tmp = (self.K[np.array(Xp)[:, None], selection[u]].T).dot(
                sol['x']) - self.q_[[a for a in selection[u]]]

            self.model[u] = np.zeros(len(self.items_list)) - np.inf
            #print(tmp.shape)
            self.model[u][selection[u]] = tmp[:, 0]
            #for i, v in enumerate(selection[u]):
            #    self.model[u][v] = tmp[i]
            #print self.model[u]

        # endfor
        return self
Exemplo n.º 3
0
def build_selection(k, p2s_test, global_popularity, t2t_id, S, p2t, lengths,
                    test_seed, P, power):
    selection = {}

    for user in p2s_test[test_seed]:
        if (p2t[str(user)] == "" or p2t[str(user)] not in t2t_id):
            selection[user] = global_popularity[:k]
        else:
            row = S[t2t_id[p2t[str(user)]], :]**power / lengths
            selection[user] = utils.scores_sorter(P.dot(row), k)
    print("selection built")
    utils.fl()
    return selection
Exemplo n.º 4
0
def multiline_xy_plot(x, ys, xlabel, ylabel, legend_labels, filename, y_errorbars=None, save_figure = True):
    assert isinstance(ys, list)
    cmap = brewer2mpl.get_map('Set1', 'qualitative', 9)
    p = Ppl(cmap, alpha=1)
    fig, ax = plt.subplots(1)    
    ax.set_xlabel(fl(xlabel))
    ax.set_ylabel(fl(ylabel))
    ax.yaxis.get_major_formatter().set_powerlimits((0, 1))
    for i, y in enumerate(ys):
        #print "Errorbar: %s"%y_errorbar
        #if y_errorbars is not None: ax.errorbar(x, y, yerr=y_errorbar, fmt='o')
        #print legend_labels[i]
        p.plot(ax, x, y, linewidth=2, label = legend_labels[i])
    p.legend(ax, loc=0, frameon = False)

    if save_figure: fig.savefig(filename)
    return ax, fig
Exemplo n.º 5
0
def plot_image_matrix(filename, jaccard_matrix, x_ticklabels = None, y_ticklabels = None):
    colormap = brewer2mpl.get_map('Set1', 'qualitative', 9)
    p = Ppl(colormap, alpha=1)
    fig, ax = plt.subplots(1)
    masked_jaccard = np.ma.masked_where(np.isnan(jaccard_matrix), jaccard_matrix)
    p.pcolormesh(fig, ax, masked_jaccard)
    print x_ticklabels
    print y_ticklabels
    #ax.imshow(jaccard_matrix, interpolation = 'none')
    #ax.set_xticks([])
    yticks = range(jaccard_matrix.shape[0]+1)
    xticks = range(jaccard_matrix.shape[1]+1)
    ax.set_yticks(yticks)
    ax.set_xticks(xticks)
    #x_ticklabels.reverse()
    ax.set_yticklabels(y_ticklabels)
    ax.set_xticklabels(x_ticklabels)
    ax.set_ylabel(fl('ratioagent'))
    ax.set_xlabel(fl('ratiolatency'))
    fig.savefig(filename)
    return fig, ax
Exemplo n.º 6
0
def make_scatter_plot_for_labelled_data(data_frame, x_name, y_name, labels, filename, colormap, x_function = 'dummy', y_function = 'dummy', legend = False, point_size = 5, omit_largest = 0, labels_to_plot = []):
    ### Originally created for issue_28
    if not labels_to_plot: labels_to_plot = set(labels)
    assert omit_largest < max(set(labels)), "omit_largest must be smaller than number of clusters"
    colors = colormap.mpl_colors
    def dummy(a): return a
    p = Ppl(colormap, alpha=1)

    fig, ax = plt.subplots(1)
    #ax.set_autoscale_on(False)
    ax.set_xlim([eval(x_function)(min(data_frame[x_name])), eval(x_function)(max(data_frame[x_name]))])
    ax.set_ylim([eval(y_function)(min(data_frame[y_name])), eval(y_function)(max(data_frame[y_name]))])
    #x_label = x_name.capitalize().replace('_', ' ')
    if x_function == 'log': x_label += ' (log)'
    #y_label = y_name.capitalize().replace('_', ' ')
    if y_function == 'log': y_label += ' (log)'
    ax.set_xlabel(fl(x_name))
    ax.set_ylabel(fl(y_name))
    ax.xaxis.get_major_formatter().set_powerlimits((0, 1))
    ax.yaxis.get_major_formatter().set_powerlimits((0, 1))
    # Show the whole color range
    
    cluster_size = map(lambda l: len(labels[labels == l]), set(labels))
    sizes, groups = zip(*sorted(zip(cluster_size, set(labels)), reverse=True))
    #print sizes, groups
    for order_to_plot, group in enumerate(list(groups)[-(len(groups)-omit_largest):]):
        #print order_to_plot, sizes[order_to_plot], group, cluster_size[group]
        if group in labels_to_plot:
            #print 'Plotting points in group %s'%group
            x = eval(x_function)(data_frame[labels == group][x_name])
            y = eval(y_function)(data_frame[labels == group][y_name])
            p.scatter(ax, x, y, label='C%s: %s'%(group, list(sizes)[order_to_plot]), s=point_size, linewidth=0, zorder=order_to_plot, color=colors[group])
    if legend: 
        legend = p.legend(ax, loc=0, fancybox=True, markerscale=5, frameon=False)
        legend.set_zorder(100)

    #ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params')
    

    fig.savefig(filename)
Exemplo n.º 7
0
	def mkplot(fitness):
		mask = f[fitness] < f[fitness].quantile(0.95)
		mask &= f.overshoot < 5
		masked_fit = f[mask][fitness]
		masked_par = p[mask]
		counts, bins = np.histogram(masked_fit, bins = 50)
		ws = (bins[1] - bins[0])
		means = concat(map(lambda bin: masked_par[(masked_fit > bin - ws) & (masked_fit < bin + ws)].mean(), bins), axis=1).transpose()
		means_as_list = map(lambda x: list(means[x]), means.columns)
		xlabel = fl(fitness)
		ylabel = ''
		legend_labels = map(fl, means.columns)
		filename = folder + '%s.png'%fitness
		multiline_xy_plot(bins, means_as_list, xlabel, ylabel, legend_labels, filename)
Exemplo n.º 8
0
	def calc_and_plot(ratio_direction):	
		for fitness in f.columns:
			ssmm_ys = list()
			sc_ys = list()
			legend_labels = list()
			for ratio_lower, ratio_upper in zip_to_tuples(ratio_range):
				ratio_mask = (ratio_lower < p.ratio) & (p.ratio < ratio_upper)
				ssmm_lat_range = concat(map(lambda l: f[get_ssmmlat_mask(l,l+20) & ratio_mask].mean(), ssmmlatencyrange), axis=1).transpose()
				ssmm_ys.append(ssmm_lat_range[fitness])
				sc_lat_range = concat(map(lambda l: f[get_sclat_mask(l,l+20) & ratio_mask].mean(), sclatencyrange), axis=1).transpose()
				sc_ys.append(sc_lat_range[fitness])
				legend_labels.append(r'$\displaystyle %s < %s < %s$'%(round(ratio_lower,1), fl(ratio_direction, mathmode = False), round(ratio_upper,1)))
			filename = '%s_%s_%s_mmlatency.png'%(folder, ratio_direction, fitness)
			
			multiline_xy_plot(ssmm_lat_range.index, ssmm_ys, xlabel = 'ssmm_latency_mu', ylabel = fitness, legend_labels = legend_labels, filename = filename)
			filename = '%s_%s_%s_sclatency.png'%(folder, ratio_direction, fitness)
			
			multiline_xy_plot(sc_lat_range.index, sc_ys, xlabel = 'sc_latency_mu', ylabel = fitness, legend_labels = legend_labels, filename = filename)
Exemplo n.º 9
0
sys.path.append(
    os.path.dirname(os.path.dirname(os.path.dirname(
        os.path.abspath(__file__)))))
import utils
'''
File used to build the Kernel between songs: songs are represented with playlists they belongs to. In each
element of the kernel matrix we will have the cosine similarity between representation of two songs
'''

#s2p = utils.jload("../data/validation/s2p.json")
s2p = utils.jload("./s2p.json")
for s in s2p:
    s2p[s] = set(s2p[s])

print "Creating s2d...",
utils.fl()
s2d = {}
for ss in s2p:
    s = int(ss)
    if s not in s2d:
        s2d[s] = {}
    for p in s2p[ss]:
        if p not in s2d[s]:
            s2d[s][p] = 1.
        else:
            s2d[s][p] += 1.
print "done!"
utils.fl()

print "Creating norms...",
utils.fl()
Exemplo n.º 10
0
    def train(self, test_users=None, bucket=None):
        t = time.time()
        self.model = {}

        Kar = self.Klist[1].todense()
        Kal = self.Klist[2].todense()

        self.test_users = test_users
        if test_users is None:
            self.test_users = range(self.n_users)

        if bucket is not None:
            tu = {u: set(self.test_users[u][:bucket]) for u in self.test_users}
        else:
            tu = {u: set(self.test_users[u]) for u in self.test_users}

        for i, u in enumerate(tu):
            if self.verbose and (i + 1) % 10 == 0:
                print("%d/%d" % (i + 1, len(self.test_users)))
                print(str(time.time() - t))
                utils.fl()

            Xp = [int(self.s2k[idx]) for idx in tu[u]]
            idxar = [self.k2ar[p] for p in Xp]
            idxal = [self.k2al[p] for p in Xp]
            npos = len(Xp)

            kp = np.zeros((npos, npos))
            for j in range(npos):
                kp[j] = self.mu[0] * self.Klist[0][Xp[j],
                                                   Xp].todense().reshape(
                                                       (npos, ))
                kp[j] = kp[j] + self.mu[1] * Kar[idxar[j], idxar].reshape(
                    (npos, ))
                kp[j] = kp[j] + self.mu[2] * Kal[idxal[j], idxal].reshape(
                    (npos, ))

            kp = co.matrix(kp)
            kn = self.q_[Xp, :]

            I = self.lambda_p * utc.identity(npos)
            P = kp + I
            q = -kn
            G = -utc.identity(npos)
            h = utc.zeroes_vec(npos)
            A = utc.ones_vec(npos).T
            b = co.matrix(1.0)

            solver.options['show_progress'] = False
            sol = solver.qp(P, q, G, h, A, b)

            K = co.matrix(self.mu[0] * self.Klist[0][Xp, :].todense())

            idxar = np.array(idxar).reshape((len(idxar), 1))
            idxal = np.array(idxal).reshape((len(idxal), 1))

            K = K + co.matrix(self.mu[1] * Kar[idxar, self.k2ar])
            K = K + co.matrix(self.mu[2] * Kal[idxal, self.k2al])

            self.model[u] = (K.T * sol['x']) - self.q_

        return self
Exemplo n.º 11
0
    def train(self, test_users=None, b=None):
        t = time.time()
        self.model = {}
        self.test_users = test_users

        if test_users is None:
            test_users = range(self.n_users)

        tu = {}
        if b is not None:
            for u in test_users:
                tu[u] = set(test_users[u][:b])
        else:
            for u in test_users:
                tu[u] = set(test_users[u])

        for i, u in enumerate(tu):
            if self.verbose and (i + 1) % 10 == 0:
                print("%d/%d" % (i + 1, len(test_users)))
                print(str(time.time() - t))
                utils.fl()

            Xp = [int(self.K_indexing[str(idx)]) for idx in tu[u]]

            npos = len(Xp)

            kp = np.zeros((npos, npos))

            for j in range(npos):
                kp[j] = self.K[Xp[j], Xp].todense()
            '''
            for row in range(kp.shape[0]):
                x = len(self.u2i[int(self.inverse_indexing[Xp[row]])])
                sx = math.sqrt(self.identity_disjunctive(self.N, x))
                xN = (x / N)
                for col in range(row+1, kp.shape[0]):
                    if kp[row,col] == 0.:
                        y = len(self.u2i[int(self.inverse_indexing[Xp[col]])])
                        sz = math.sqrt(self.identity_disjunctive(self.N, z))
                        kp[row,col] = (2. / (sx*sz)) * xN * (z / (N-1.))
                        kp[row,col] = kp[col,row]
            '''

            for row in range(kp.shape[0]):
                x = len(self.u2i[self.inverse_indexing[Xp[row]]])
                for col in range(row + 1, kp.shape[0]):
                    if kp[row, col] == 0.:
                        z = len(self.u2i[self.inverse_indexing[Xp[col]]])
                        mx, mn = (x, z) if x >= z else (z, x)
                        kp[row, col] = self.imp_dic["%d,%d" % (mx, mn)]
                        kp[col, row] = kp[row, col]

            kp = co.matrix(kp)

            kn = self.q_[Xp, :]

            I = self.lambda_p * utc.identity(npos)
            P = kp + I
            q = -kn
            G = -utc.identity(npos)
            h = utc.zeroes_vec(npos)
            A = utc.ones_vec(npos).T
            b = co.matrix(1.0)

            solver.options['show_progress'] = False
            sol = solver.qp(P, q, G, h, A, b)

            K_imp = co.matrix(self.K[Xp, :].todense())
            for row in range(K_imp.size[0]):
                x = len(self.u2i[self.inverse_indexing[Xp[row]]])
                for col in range(K_imp.size[1]):
                    if K_imp[row, col] == 0.:
                        z = len(self.u2i[self.inverse_indexing[col]])
                        mx, mn = (x, z) if x >= z else (z, x)
                        K_imp[row, col] = self.imp_dic["%d,%d" % (mx, mn)]

            self.model[u] = (K_imp.T * sol['x']) - self.q_
        #import numpy.random as rnd
        #with open("../files/"+rnd.randint()+".txt") as F:
        # endfor
        return self
Exemplo n.º 12
0
    def train(self, test_users=None, b=None, test_titles=None, c=0):
        self.model = {}
        self.test_users = test_users
        self.c = c
        if test_users is None:
            test_users = range(self.n_users)

        tu = {}
        if b is not None:
            for u in test_users:
                tu[u] = set(test_users[u][:b])
        else:
            for u in test_users:
                tu[u] = set(test_users[u])

        for i, u in enumerate(tu):
            t = time.time()
            beta = self.Beta[test_titles[u]]
            beta_K = self.K.dot(beta.T).todense()
            if self.verbose and (i + 1) % 1 == 0:
                print("%d/%d" % (i + 1, len(test_users)))
                utils.fl()

            Xp = np.array([idx for idx in tu[u]])

            npos = len(Xp) + 1
            kp = np.zeros((npos, npos))
            #print(self.K[Xp[:,None],Xp].todense())
            kp[:npos - 1, :npos - 1] = self.K[Xp[:, None], Xp].todense()

            kp[npos - 1, :] = np.concatenate(
                (self.K[Xp, :].dot(beta.T).todense(), np.ones(
                    (1, 1)))).flatten()
            kp[:npos - 1, npos - 1] = kp[npos - 1, :npos - 1]

            q0 = (co.matrix(beta.todense()) * self.q_)[0, 0]
            kp = co.matrix(kp)

            kn = co.matrix(
                np.concatenate(
                    (np.array(self.q_[list(Xp), :]), np.array([[q0]]))))

            I = self.lambda_p * utc.identity(npos)
            P = kp + I
            q = -kn
            G = np.vstack((-np.eye(npos), np.zeros(npos)))
            G[npos, -1] = 1
            G = co.matrix(G)
            h = np.zeros(npos + 1)
            h[-1] = self.c
            h = co.matrix(h)
            A = utc.ones_vec(npos).T
            b = co.matrix(1.0)

            solver.options['show_progress'] = False
            sol = solver.qp(P, q, G, h, A, b)

            self.model[u] = np.array((self.K[Xp, :].T).dot(sol['x'][:-1]) +
                                     beta_K * sol['x'][-1] -
                                     self.q_).flatten()
            #print self.model[u]
            print(time.time() - t)
        # endfor
        return self