def get_basic_data_from_set(set_num): """ Load the Ubisoft provided file for a set, add some basic data to it and split it between useful and useless according to basic_sieve. """ global data_sets return get_data_from_file(data_sets[set_num], add_basic_data, basic_sieve)
def load_missing_profiles_data(self): """ Load the profile files in profiles directory and adds them to profiles database if they do not exist """ filename_re = re.compile(r'^\d+\.json$') # Get list of profiles in prodiles directory file_list = os.listdir(self.PROFILES_DIR) profile_list = [] for filename in file_list: path = os.path.join(self.PROFILES_DIR, filename) if os.path.isfile(path) and filename_re.match(filename): profile_list.append(filename.split('.')[0]) # Load applies data applies = self.get_applies() # Process profiles for uid in profile_list: # Check if uid is already in database if uid not in self.profiles: # Read file contents path = self.get_profile_path(uid) profile = json.loads(get_data_from_file(path)) profile = self.profile_sanity_check(profile) # Append applies data if uid in applies: profile.update(applies[uid]) # Save profile data to database self.profiles[uid] = profile # Regen main files self.update_main_files()
def get_index(self): """ Get index data """ test_and_create_file(self.INDEX_FILE, json.dumps([])) index = json.loads(get_data_from_file(self.INDEX_FILE)) if not isinstance(index, list): raise IndexDataError( '%s does not contain a JSON list as root element' % self.INDEX_FILE) return index
def get_data(): """ Load original data from file """ signatures = [] dataPath = "../data/Task2" for uid in range(1, 41): personSigs = [] for sig in range(1, 41): fileName = "U%dS%d.TXT" % (uid, sig) filePath = os.path.join(dataPath, fileName) X, Y, T, P = utils.get_data_from_file(filePath) personSigs.append([X, Y, P]) signatures.append(personSigs) return signatures
def get_data_from_task2(self): """ Load original data from svc2004 task2 """ LOGGER.info("Getting signatures") signatures = [] dataPath = "../data/Task2" for uid in range(1, 41): personSigs = [] for sig in range(1, 41): fileName = "U%dS%d.TXT" % (uid, sig) filePath = os.path.join(dataPath, fileName) X, Y, T, P = utils.get_data_from_file(filePath) personSigs.append([X, Y, P]) signatures.append(personSigs) return signatures
def client_data_callback(self, server, message, path, query, client, **kwargs): logging.debug('[%s] client data: Request at %s' % (message.method, path)) # Default response and status code response = '' status_code = 404 match = re.match(CLIENTDATA_REGEX, path[len(self.client_data_url):]) if match: filename = match.groupdict()['filename'] filepath = os.path.join(self.args['profiles_dir'], filename) logging.debug('Serving %s -> %s' % (filename, filepath)) try: response = get_data_from_file(filepath) status_code = 200 except Exception, e: logging.error('clientdata: %s' % e)
def get_applies(self, uid=None): """ Get all applies data or for an specific profile given it's uid """ test_and_create_file(self.APPLIES_FILE, json.dumps({})) applies = json.loads(get_data_from_file(self.APPLIES_FILE)) if not isinstance(applies, dict): raise AppliesDataError( '%s does not contain a JSON object as root element' % self.APPLIES_FILE) if uid: if uid in applies: return applies[uid] else: raise AppliesDataError( 'There is not applies information for given uid: %s' % uid) return applies
def load_training_data(self, training_file, uid_file, iid_file, data_copy=False): print('Load training data from %s' % (training_file)) self.uids = get_id_dict_from_file(uid_file) self.iids = get_id_dict_from_file(iid_file) self.data = get_data_from_file(training_file, self.uids, self.iids) self.epoch_sample_limit = len(self.data) self.n_users = len(self.uids) self.n_items = len(self.iids) self.tr_data = self._data_to_training_dict(self.data, self.uids, self.iids) self.tr_users = list(self.tr_data.keys()) if not data_copy: del self.data print('Loading finished!')
def load_training_data(self, uid_file: str, iid_file: str, tr_file: str, data_copy: bool = False) -> None: tprint('Load training data from %s' % (tr_file)) self.uids = get_id_dict_from_file(uid_file) self.iids = get_id_dict_from_file(iid_file) self.data = get_data_from_file(tr_file, self.uids, self.iids) self.epoch_sample_limit = len(self.data) assert isinstance(self.uids, dict) assert isinstance(self.iids, dict) self.n_users = len(self.uids) assert self.n_users > 0 self.n_items = len(self.iids) assert self.n_items > 0 self.tr_data = self._data_to_training_dict(self.data, self.uids, self.iids) assert isinstance(self.tr_data, dict) self.tr_users = list(self.tr_data.keys()) if not data_copy: del self.data tprint('Loading finished!')
print("The path of result file: " + result_file_path) result_file = open(result_file_path, "wt") result_file.write(str(options) + "\n") result_file.flush() config_dict = vars(options) with open(log_dir + "/song.{}".format(options.suffix) + "_config.json", "w") as f_out: json.dump(config_dict, f_out, indent=4) # Read in data and separate them into training part and development part print("Loading training set...") if options.infile_format == "fof": train_set, len_node, len_in_node, len_out_node, entity_size = get_data_from_fof(options) else: train_set, len_node, len_in_node, len_out_node, entity_size = get_data_from_file(options.train_path, options) random.shuffle(train_set) dev_set = train_set[:200] train_set = train_set[200:] print('Number of training samples:' + str(len(train_set))) print('Number of development samples:' + str(len(dev_set))) print("Number of node: " + str(len_node) + ", while max allowed is " + str(options.max_node_num)) print("Number of parent node: " + str(len_in_node) + ", truncated to " + str(options.max_in_node_num)) print("Number of child node: " + str(len_out_node) + ", truncated to " + str(options.max_out_node_num)) print("The entity size: " + str(entity_size) + ", truncated to " + str(options.max_entity_size)) # Build dictionary and mapping of words, characters, edges
# log信息共有四个等级,按重要性递增为: # INFO(通知)<WARNING(警告)<ERROR(错误)<FATAL(致命的 # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' # 输出 INFO + WARNING + ERROR + FATAL # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # 输出 WARNING + ERROR + FATAL os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 输出 ERROR + FATAL # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # 输出 FATAL if __name__ == '__main__': startTime = time() batch_size = 25 capacity = 256 # 内存中存储的最大数据容量 means = [123.68, 116.779, 103.939] # VGG训练时图像预处理所减均值R(GB三通道) 已经在vgg类中进行了处理 # xs, ys = utils.get_file('./cat_and_dog/train') # 获取图像列表和标签列表 xs, ys = utils.get_data_from_file( "D:/Myproject/Python/Datasets/dogs-vs-cats/dogs-vs-cats/train" ) # 获取图像列表和标签列表 image_batch, label_batch = utils.get_batch(xs, ys, 224, 224, batch_size, capacity) # 通过读取列表来载入批量的图片及标签 x = tf.placeholder(tf.float32, [None, 224, 224, 3]) y = tf.placeholder(tf.int32, [None, 2]) # 对 猫 和 狗 两个类别进行判定 vgg = Vgg16(x) fc8_finetuining = vgg.probs loss_function = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=fc8_finetuining, labels=y)) # 交叉熵损失函数 optimizer = tf.train.GradientDescentOptimizer(
from argparse import Namespace from utils import asMinutes, timeSince, get_data_from_file, get_batches, array_to_vocab from numpy.random import choice, randint flags = Namespace( train_file='asimov.txt', seq_size=16, batch_size=64, embedding_size=64, lstm_size=64, gradients_norm=5, predict_top_k=5, ) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') int_to_vocab, vocab_to_int, n_vocab, in_text, out_text, random_samples = get_data_from_file( flags.train_file, flags.batch_size, flags.seq_size) net = RNNModule(n_vocab, flags.seq_size, flags.embedding_size, flags.lstm_size) net = net.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=0.01) def train(): print("Training...") iteration = 0 start = time.time() epochs = 100 for e in range(1, epochs): batches = get_batches(in_text, out_text, flags.batch_size, flags.seq_size)
def main(): file_name = './results/0001.txt' data = Preprocess(get_data_from_file(file_name)) data.extreme_filter() data.outlier_filter()
learning_rate = 0.001 # 训练完整数据集迭代轮数 num_epochs = 10 # 数据块大小 batch_size = 128 # 执行Dropout操作所需的概率值 dropout_rate = 0.5 # 类别数目 num_classes = 2 # 需要重新训练的层 train_layers = ['fc8', 'fc7', 'fc6'] # 读取本地图片,制作自己的训练集,返回image_batch,label_batch # train, train_label = utils.get_files(train_dir) train, train_label = utils.get_data_from_file(train_dir) x, y = utils.get_batch(train, train_label, image_size, image_size, batch_size, 2000) # 用于计算图输入和输出的TF占位符,每次读取一小部分数据作为当前的训练数据来执行反向传播算法 # x =tf.placeholder(tf.float32,[batch_size,227,227,3],name='x-input') # y =tf.placeholder(tf.float32,[batch_size,num_classes]) keep_prob = tf.placeholder(tf.float32) # 定义神经网络结构,初始化模型 model = AlexNet(x, keep_prob, num_classes, train_layers) # 获得神经网络前向传播的输出 score = model.fc8 # 获得想要训练的层的可训练变量列表 var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]