def main():
	local_path = os.getcwd()
	src_name = 'pretestData.csv'
	src_path = os.path.join(local_path, src_name)
	pd_data = pd.read_csv(src_path, header = 0, index_col = 0)
	int_col = [int(x) for x in pd_data.columns.values]
	pd_data.columns = int_col
	data = (pd_data.values) * 100
	print data.shape, type(data)

	k = 4
	model = ChengChurch(n_clusters=k, max_msr=100, deletion_threshold=1.1, inverse_rows=True, random_state=0)
	model.fit(data)

	nau = np.array([])
	nam = np.array([])
	for i in range(k):
		print model.get_indices(i)
		print model.get_shape(i)
		nau = np.append(nau, model.get_indices(i)[0])
		nam = np.append(nam, model.get_indices(i)[1])
	print 'user number of clustered = ', len(set(nau)), 'movie number of clustered = ', len(set(nam))
lines = list(list(int(i) for i in line if i) for line in lines)
data = np.array(lines)
pd_data = pd.DataFrame(data)
pd_data.to_csv('lymphoma.csv')
"""

pd_data = pd.read_csv("lymphoma.csv", header=0, index_col=0)
data = pd_data.values
print type(data), data.shape
# replace missing values, just as in the paper
generator = np.random.RandomState(0)
idx = np.where(data == 999)
data[idx] = generator.randint(-800, 801, len(idx[0]))

# cluster with same parameters as original paper
model = ChengChurch(n_clusters=100, max_msr=1200, deletion_threshold=1.2, inverse_rows=True, random_state=0)
model.fit(data)

# find bicluster with smallest msr and plot it
msr = lambda a: (np.power(a - a.mean(axis=1, keepdims=True) - a.mean(axis=0) + a.mean(), 2).mean())
msrs = list(msr(model.get_submatrix(i, data)) for i in range(100))
arr = model.get_submatrix(np.argmin(msrs), data)
print type(arr), arr.shape
df = DataFrame(arr)
df["row"] = map(str, range(arr.shape[0]))
parallel_coordinates(df, "row", linewidth=1.5)
plt.xlabel("column")
plt.ylabel("expression level")
plt.gca().legend_ = None
plt.show()