def read_bandit_data(self, repo_dir, instance_type): if repo_dir.endswith('.txt'): fileName = repo_dir else: fileName = repo_dir + instance_type + '.txt' if not os.path.exists(fileName): print("Dataset:read_bandit_data\t[ERR]\tInput file not found at ", fileName, flush=True) sys.exit(0) f = open(fileName, 'r') allLines = f.readlines() f.close() header = allLines[0] commentIndex = header.find('#') if commentIndex >= 0: header = header[:commentIndex] tokens = header.split() numRecords = int(tokens[0]) numFeatures = int(tokens[1]) numLabels = int(tokens[2]) currIndex = 0 instanceList = [] print("Dataset:read_bandit_data\t[LOG]\tFilename: %s Number of instances: %d" %\ (fileName, numRecords), flush=True) for i in range(numRecords): currIndex += 1 currentLine = allLines[currIndex] commentIndex = currentLine.find('#') if commentIndex >= 0: currentLine = currentLine[:commentIndex] tokens = currentLine.split() sampledAction = tokens[0] sampledLoss = float(tokens[1]) sampledPropensity = float(tokens[2]) newInstance = None sampledY = None instanceFeature = None if instance_type == 'MultiClass': newInstance = Instance.MultiClass(numLabels, numFeatures) sampledY = int(sampledAction) instanceFeature = scipy.sparse.csr_matrix( (1, numFeatures), dtype=numpy.longdouble) elif instance_type == 'MultiLabel': newInstance = Instance.MultiLabel(numLabels, numFeatures) sampledY = numpy.zeros(numLabels, dtype=numpy.int) if sampledAction != '-1': for eachLabel in sampledAction.split(','): sampledY[int(eachLabel)] = 1 instanceFeature = scipy.sparse.csr_matrix( (1, numFeatures), dtype=numpy.longdouble) elif instance_type == 'Brute': newInstance = Instance.Brute(numFeatures) sampledY = int(sampledAction) if instance_type == 'MultiClass' or instance_type == 'MultiLabel': for j in range(3, len(tokens)): idVal = tokens[j].split(':') instanceFeature[0, int(idVal[0])] = float(idVal[1]) elif instance_type == 'Brute': numActions = int(tokens[3]) instanceFeature = scipy.sparse.csr_matrix( (numActions, numFeatures), dtype=numpy.longdouble) for k in range(numActions): currIndex += 1 currentAction = allLines[currIndex] commentIndex = currentAction.find('#') if commentIndex >= 0: currentAction = currentAction[:commentIndex] tokens = currentAction.split() currentRow = int(tokens[0]) for j in range(1, len(tokens)): idVal = tokens[j].split(':') instanceFeature[currentRow, int(idVal[0])] = float(idVal[1]) newInstance.set(sampledPropensity, sampledLoss, instanceFeature, sampledY) instanceList.append(newInstance) if i % 20 == 0: print(".", flush=True, end='') print('') print("Dataset:read_bandit_data\t[LOG]\tFinished loading filename: %s Number of instances: %d" %\ (fileName, numRecords), flush=True) return instanceList