def parse_dataset_size(self, info): index = 100 for item in info: if item: break else: index = index * 10 self.train_dataset = Dataset('train_' + self.dataset_size_dict[index]) self.test_dataset = Dataset('test_1K.xlsx')
def load_dataset_into_memory(): # load the dataset into memory db = Dataset(filename, delimiter=delimiter, metadata=metadata) if debug: print('Db loaded:') print("Attributes: %s\nPredict Class: %s\nNumerical Classes: %s" % (db.attributes, db.target_attribute, db.numeric)) return db
def getDataset(self): QApplication.instance().processEvents() Validator.validateFilePath(self.inputFilePath) dataset = Dataset(self.inputFilePath, self.responseVariable, self.predictors, self.categoricalVariables) dataset.validateVariableLists() dataset.preprocessDataset(self.standardize) return dataset
def load_db(self): # Load DB try: self.db.close() except AttributeError: # db is not set pass try: self.db = Dataset(self.filename, delimiter=self.delimiter_char.get(), metadata=self.filename_meta) self.Error_Message['fg'] = "#267f36" self.Error_Message['text'] = "Dataset has been loaded successfully" except Exception as e: self.Error_Message['fg'] = "#ff3c3c" self.Error_Message['text'] = e return
delimiter = ';' metadata = "" seed = None for i in range(len(argv)): if (argv[i] in ['--file', '-f']): filename = argv[i + 1] elif (argv[i] in ['--delimiter', '-d']): delimiter = argv[i + 1] elif (argv[i] in ['--meta', '-m']): metadata = argv[i + 1] elif (argv[i] in ['--seed', '-s']): seed = int(argv[i + 1]) elif (argv[i] in ['--help', '-h']): print_usage(argv[0]) exit(0) if seed is not None: random.seed(int(seed)) # load the dataset to memory db = Dataset(filename, delimiter=delimiter, metadata=metadata) # generates a decision tree from the dataset tree = DecisionTree(db.data, db.attributes, db.target_attribute, db.numeric, single_tree_print=True) # print the resultant tree on the terminal print(tree)
reg_layers = eval(args.reg_layers) num_negatives = args.num_neg learner = args.learner learning_rate = args.lr batch_size = args.batch_size epochs = args.epochs verbose = args.verbose topK = 10 evaluation_threads = 1 # mp.cpu_count() print("MLP arguments: %s " % (args)) model_out_file = 'Pretrain/%s_MLP_%s_%d.h5' % (args.dataset, args.layers, time()) # Loading data t1 = time() dataset = Dataset(args.path + args.dataset) train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives num_users, num_items = train.shape print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d" % (time() - t1, num_users, num_items, train.nnz, len(testRatings))) # Build model model = get_model(num_users, num_items, layers, reg_layers) if learner.lower() == "adagrad": model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "rmsprop": model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy') elif learner.lower() == "adam": model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy') else: model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy')
def loadGMF(): dataset = Dataset('../Data/ml-1m') model = src.GMF.get_model(dataset.num_users, dataset.num_items, 8) model.load_weights('../Pretrain/ml-1m_GMF_8_1501651698.h5') hits, ndcgs = src.evaluate.evaluate_model(model, dataset.testRatings, dataset.testNegatives, 10, 1)
cumsum = np.cumsum(np.insert(x, 0, 0)) sums = cumsum[N:] - cumsum[:-N] return sums[::N] if __name__ == '__main__': config = ConfigParser.ConfigParser() config.read('config.ini') section = 'SEARCH' data_file = config.get(section, 'ARTICLES_DATA_FILE') search_term = config.get(section, 'SEARCH_TERM').decode('utf8') window = int(config.get(section, 'WINDOW_SIZE')) output_file = config.get(section, 'OUTPUT_FILE') article_dataset = Dataset(data_file) filtered_articles = article_dataset.search_articles(search_term) counts_map = dd(lambda: [0, 0]) for a in filtered_articles: date = a[0] articles, words = counts_map[date] counts_map[date] = [articles + 1, words + a[1]] delta = END_DATE - START_DATE for i in range(delta.days + 1): date = START_DATE + dt.timedelta(days=i) if date not in counts_map: counts_map[date] = [0, 0]