示例#1
0
def computeHistograms():
    """Calculate color histograms for all covers."""
    imgPath = cfg.databasePath() + '/Covers'
    config = cfg.readConfig()
    hBins = config.getint('Images', 'h_bins')
    sBins = config.getint('Images', 's_bins')
    with open(cfg.databasePath() + '/Sets.json', 'r') as setsFile:
        sets = json.load(setsFile)
    coverIds = [int(x.split('.')[0]) for x in os.listdir(imgPath)]
    for curSet in sets:
        idList = [x for x in sets[curSet] if x in coverIds]
        data = np.zeros((len(idList), hBins * sBins + 1))
        data[:, 0] = idList
        for i in range(len(idList)):
            coverFilename = imgPath + '/{}.jpg'.format(idList[i])
            img = cv2.imread(coverFilename, cv2.IMREAD_COLOR)
            if img is None or img.shape[2] != 3:
                print('Skipping ' + str(idList[i]))
                continue
            img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            for j in range(sBins):
                minS = j * 256 / sBins
                maxS = (j + 1) * 256 / sBins
                relPix = img[np.logical_and(img[:, :, 1] > minS,
                                            img[:, :, 1] < maxS)]
                data[i, 1+hBins*j:1+hBins*(j+1)], _ = \
                 np.histogram(relPix[:, 0], bins=hBins)
        data[:, 1:] = data[:, 1:] / np.sum(data[:, 1:], axis=1)[:, np.newaxis]
        data = data[~np.isnan(data).any(axis=1)]
        np.savetxt(cfg.databasePath() + '/{}_hist.csv'.format(curSet), data)
示例#2
0
def buildBagOfWords():
    """Transform image features to a BoW vector."""
    featPath = cfg.databasePath() + '/Features'
    config = cfg.readConfig()
    k = config.getint('Images', 'n_clusters')
    centers = np.loadtxt(cfg.databasePath() + '/Centers.csv')
    searcher = NearestNeighbors(n_neighbors=1, n_jobs=-1)
    searcher.fit(centers)
    with open(cfg.databasePath() + '/Sets.json', 'r') as setsFile:
        sets = json.load(setsFile)
    for curSet in sets:
        if os.path.exists(cfg.databasePath() + '/{}_img.csv'.format(curSet)):
            print('Skipping {} set.'.format(curSet))
            continue
        idList = [
            x for x in sets[curSet]
            if os.path.exists(featPath + '/{}.npy'.format(x))
        ]
        data = np.zeros((len(idList), k + 1))
        data[:, 0] = idList
        for i in range(len(idList)):
            features = np.load(featPath + '/{}.npy'.format(idList[i]))
            assignments = searcher.kneighbors(features, return_distance=False)
            for item in assignments:
                data[i, item + 1] += 1
        data[:, 1:] = data[:, 1:] / np.sum(data[:, 1:], axis=1)[:, np.newaxis]
        np.savetxt(cfg.databasePath() + '/{}_img.csv'.format(curSet), data)
示例#3
0
def downloadSwitch():
	"""Download games for the Nintendo Switch platform."""
	if not os.path.exists(cfg.databasePath() + '/Games'):
		os.makedirs(cfg.databasePath() + '/Games')
	config = cfg.readConfig()
	fields = config['Database']['fields'].split(',')
	api = igdb(config['Database']['api_key'])
	res = api.games({
		'fields': fields,
		'filters': {
			'[release_dates.platform][any]': 130
		},
		'scroll': 1,
		'limit': 50
	})
	for game in res.body:
		filename = cfg.databasePath() + '/Games/{}.json'.format(game['id'])
		with open(filename, 'w') as outFile:
			json.dump(game, outFile, indent='\t')
	nPages = round(int(res.headers['X-Count']) / 50)
	for _ in range(nPages):
		scrolled = api.scroll(res)
		if type(scrolled.body) is list:
			for game in scrolled.body:
				filename = cfg.databasePath() + '/Games/{}.json'.format(game['id'])
				with open(filename, 'w') as outFile:
					json.dump(game, outFile, indent='\t')
示例#4
0
def splitDatabase():
    """Split database into train, validation and test sets."""
    config = cfg.readConfig()
    dataPath = cfg.databasePath() + '/Games'
    idList = []
    for file in os.listdir(dataPath):
        if os.path.isfile(dataPath + '/' + file):
            idList.append(int(file.split('.')[0]))
    trainSize = int(config.getfloat('Database', 'train_size') * len(idList))
    validSize = int(config.getfloat('Database', 'valid_size') * len(idList))
    sets = {}
    sets['train'] = random.sample(idList, trainSize)
    idList = [x for x in idList if x not in sets['train']]
    sets['valid'] = random.sample(idList, validSize)
    sets['test'] = [x for x in idList if x not in sets['valid']]
    with open(cfg.databasePath() + '/Sets.json', 'w') as outFile:
        json.dump(sets, outFile)
示例#5
0
def clusterFeatures():
    """Perform clustering on the extracted features."""
    featPath = cfg.databasePath() + '/Features'
    with open(cfg.databasePath() + '/Sets.json', 'r') as setsFile:
        idList = json.load(setsFile)['train']
    arrays = (np.load(featPath + '/' + str(id) + '.npy', allow_pickle=False)
              for id in idList
              if os.path.exists(featPath + '/' + str(id) + '.npy'))
    features = np.vstack(arrays)
    print(features.shape)
    print('Loading complete')
    config = cfg.readConfig()
    k = config.getint('Images', 'n_clusters')
    model = MiniBatchKMeans(n_clusters=k,
                            batch_size=50000,
                            verbose=True,
                            compute_labels=False)
    model.fit(features)
    np.savetxt(cfg.databasePath() + '/Centers.csv', model.cluster_centers_)
示例#6
0
def encodeData():
	"""Encode game metadata."""
	config = cfg.readConfig()
	esrbCode = config.get('Preprocessing', 'esrb').split(',')
	modesCode = config.get('Preprocessing', 'game_modes').split(',')
	genresCode = config.get('Preprocessing', 'genres').split(',')
	themesCode = config.get('Preprocessing', 'themes').split(',')
	modesCode = [int(x) for x in modesCode]
	genresCode = [int(x) for x in genresCode]
	themesCode = [int(x) for x in themesCode]
	dim2 = sum([len(esrbCode), len(modesCode), len(genresCode), len(themesCode)])
	with open(cfg.databasePath() + '/Sets.json', 'r') as setsFile:
		sets = json.load(setsFile)
	for set in sets:
		idList = sets[set]
		data = np.zeros((len(idList), dim2 + 1), dtype=int)
		data[:, 0] = idList
		y = np.zeros((len(idList), 2))
		y[:, 0] = idList
		for i in range(len(idList)):
			p = 1
			gamePath = cfg.databasePath() + '/Games/{}.json'.format(idList[i])
			with open(gamePath, 'r') as gameFile:
				gameData = json.load(gameFile)
			esrb = gameData['esrb']['rating'] if 'esrb' in gameData else 1
			data[i, p:p+len(esrbCode)] = \
				[1 if esrb - 1 == i else 0 for i in range(len(esrbCode))]
			p += len(esrbCode)
			modes = gameData['game_modes'] if 'game_modes' in gameData else []
			data[i, p:p+len(modesCode)] = encodeMultiLabel(modes, modesCode)
			p += len(modesCode)
			genres = gameData['genres'] if 'genres' in gameData else []
			data[i, p:p+len(genresCode)] = encodeMultiLabel(genres, genresCode)
			p += len(genresCode)
			themes = gameData['themes'] if 'themes' in gameData else []
			data[i, p:p+len(themesCode)] = encodeMultiLabel(themes, themesCode)
			y[i, 1] = gameData['aggregated_rating']
		np.savetxt(cfg.databasePath() + '/{}_data.csv'.format(set), data, fmt='%d')
		np.savetxt(cfg.databasePath() + '/{}_y.csv'.format(set), y, fmt='%.2f')
示例#7
0
def vectorizeSummaries():
	"""Create word count matrix from game summaries."""
	with open(cfg.configPath() + '/Vocabulary.txt', 'r') as inFile:
		vocab = [x.rstrip() for x in inFile]
	config = cfg.readConfig()
	params = config['Text']
	vectorizer = CountVectorizer(**params)
	vectorizer.set_params(vocabulary=vocab)
	with open(cfg.databasePath() + '/Sets.json', 'r') as setsFile:
		sets = json.load(setsFile)
	ids = []
	summaries = []
	for id in sets['train']:
		with open(cfg.databasePath() + '/Games/{}.json'.format(id), 'r') as inFile:
			gameData = json.load(inFile)
		if 'summary' in gameData:
			ids.append(id)
			summaries.append(gameData['summary'])
	vectorizer.fit(summaries)
	train = np.zeros((len(ids), 1 + len(vectorizer.get_feature_names())))
	train[:, 0] = ids
	train[:, 1:] = vectorizer.transform(summaries).todense()
	np.savetxt(cfg.databasePath() + '/train_text.csv', train, fmt='%d')
	ids = []
	summaries = []
	for id in sets['valid']:
		with open(cfg.databasePath() + '/Games/{}.json'.format(id), 'r') as inFile:
			gameData = json.load(inFile)
		if 'summary' in gameData:
			ids.append(id)
			summaries.append(gameData['summary'])
	valid = np.zeros((len(ids), 1 + len(vectorizer.get_feature_names())))
	valid[:, 0] = ids
	valid[:, 1:] = vectorizer.transform(summaries).todense()
	np.savetxt(cfg.databasePath() + '/valid_text.csv', valid, fmt='%d')
	ids = []
	summaries = []
	for id in sets['test']:
		with open(cfg.databasePath() + '/Games/{}.json'.format(id), 'r') as inFile:
			gameData = json.load(inFile)
		if 'summary' in gameData:
			ids.append(id)
			summaries.append(gameData['summary'])
	test = np.zeros((len(ids), 1 + len(vectorizer.get_feature_names())))
	test[:, 0] = ids
	test[:, 1:] = vectorizer.transform(summaries).todense()
	np.savetxt(cfg.databasePath() + '/test_text.csv', test, fmt='%d')
	ids = []
	summaries = []
	for id in sets['rank_train']:
		with open(cfg.databasePath() + '/Games/{}.json'.format(id), 'r') as inFile:
			gameData = json.load(inFile)
		if 'summary' in gameData:
			ids.append(id)
			summaries.append(gameData['summary'])
	test = np.zeros((len(ids), 1 + len(vectorizer.get_feature_names())))
	test[:, 0] = ids
	test[:, 1:] = vectorizer.transform(summaries).todense()
	np.savetxt(cfg.databasePath() + '/rank_train_text.csv', test, fmt='%d')
	ids = []
	summaries = []
	for id in sets['rank_test']:
		with open(cfg.databasePath() + '/Games/{}.json'.format(id), 'r') as inFile:
			gameData = json.load(inFile)
		if 'summary' in gameData:
			ids.append(id)
			summaries.append(gameData['summary'])
	test = np.zeros((len(ids), 1 + len(vectorizer.get_feature_names())))
	test[:, 0] = ids
	test[:, 1:] = vectorizer.transform(summaries).todense()
	np.savetxt(cfg.databasePath() + '/rank_test_text.csv', test, fmt='%d')