def test_item_build(): data = dict() data['name'] = 'u2' data['popularity'] = 5.0 item = Item(MOVIEID) item.add_data(data) assert_true(isinstance(item, Item)) assert_equal(str(item.get_data()), "{'popularity': 5.0, 'name': 'u2'}")
def setUp(self): data = Data() for stars, item_id, user_id in ratings: data.add_tuple((stars, item_id, user_id)) movies = dict() for mid, name, genres in movie_genres: movie = Item(mid) movie.add_data({'name': name, 'genres': genres}) movies[mid] = movie self.ratings = data self.movies = movies
def read_items(filename): items = dict() for line in open(filename): #1::Toy Story (1995)::Animation|Children's|Comedy data = line.strip('\r\n').split('|') item_id = int(data[0]) item_name = data[1] release_date = data[2] empty_field = data[3] IMDb_URL = data[4] genres = data[5:22] item = Item(item_id) item.add_data({'name': item_name, 'release_date': release_date, 'genres': genres}) items[item_id] = item return items
def read_items(filename): items = dict() for line in open(filename): #1::Toy Story (1995)::Animation|Children's|Comedy data = line.strip('\r\n').split('::') item_id = int(data[0]) item_name = data[1] genres = data[2].split('|') item = Item(item_id) item.add_data({'name': item_name, 'genres': genres}) items[item_id] = item tags=[] for genre in genres: tags.append(tag_keys[genre_list.index(genre)]) problems_recsys[problem_keys[item_id%3300+1]] = {'name': problems[problem_keys[item_id%3300+1]]['title'], 'tags': tags} return items
def read_item_data(filename): itemdict = {} f = open(item_file,'r') for r in f: p = r.split('::') if p[0] in itemdict: print "Duplicate!", p[0] else: item = Item(p[0]) idat = {} if len(p)>1: idat['Title'] = p[1] if len(p)>2: idat['Genres'] = p[2] item.add_data(idat) itemdict[p[0]] = item return itemdict
def read_item_data(filename): itemdict = {} f = open(item_file, 'r') for r in f: p = r.split('::') if p[0] in itemdict: print "Duplicate!", p[0] else: item = Item(p[0]) idat = {} if len(p) > 1: idat['Title'] = p[1] if len(p) > 2: idat['Genres'] = p[2] item.add_data(idat) itemdict[p[0]] = item return itemdict
def load_movies(filename): """ Load movies. """ pattern = re.compile(r'''((?:[^,"']|"[^"]*"|'[^']*')+)''') items = dict() for line in open(filename): try: data = pattern.split(line)[1::2] item_id = int(data[0]) item_name = data[1] genres = data[2].split('|') item = Item(item_id) item.add_data({'name': item_name, 'genres': genres}) items[item_id] = item except ValueError: continue return items
def _read_items(filename): items = dict() for line in open(filename): #1::Toy Story (1995)::Animation|Children's|Comedy data = line.strip('\r\n').split('::') item_id = data[0] item_name = data[1] genres = data[2:] items[item_id] = Item(item_id) items[item_id].add_data({'name': item_name, 'genres': genres}) return items
def _read_items(filename): items = dict() for line in codecs.open(filename, 'r', 'latin1'): #1::Toy Story (1995)::Animation|Children's|Comedy data = line.strip('\r\n').split('::') item_id = int(data[0]) item_name = data[1] str_genres = data[2] genres = [] for genre in str_genres.split('|'): genres.append(genre) items[item_id] = Item(item_id) items[item_id].add_data({'name': item_name, 'genres': genres}) return items
def test_user_add_item(): u = User(USERID) item = Item(ARTISTID) item.add_data({'name': ARTISTID}) u.add_item(item, PLAYS) assert_equal(str(u.get_items()), '[(u2, 25)]')
if __name__ == '__main__': #Dataset PERCENT_TRAIN = 100 data = Data() data.load('/Users/jennyyuejin/recommender/Data/test_0/userProd.data', sep='\t', format={'col':0, 'row':1, 'value':2, 'ids':int}) #Train & Test data train, test = data.split_train_test(percent=PERCENT_TRAIN, shuffle_data=True) print len(train), 'training data points;', len(test), 'testing data points' itemId = 0 item = Item(itemId) item.add_data({'name': 'project0', 'popularity': 0.5, 'tags': [0, 0, 1] }) itemId = 1 item2 = Item(itemId) item2.add_data({'name': 'project1', 'popularity': 0.9, 'tags': [0, 0, 1] })