示例#1
0
def get_basic_data_from_set(set_num):
    """
    Load the Ubisoft provided file for a set, add some basic data to it and 
    split it between useful and useless according to basic_sieve.
    """
    global data_sets
    return get_data_from_file(data_sets[set_num], add_basic_data, basic_sieve)
示例#2
0
    def load_missing_profiles_data(self):
        """
        Load the profile files in profiles directory and adds them to profiles
        database if they do not exist
        """
        filename_re = re.compile(r'^\d+\.json$')
        # Get list of profiles in prodiles directory
        file_list = os.listdir(self.PROFILES_DIR)
        profile_list = []
        for filename in file_list:
            path = os.path.join(self.PROFILES_DIR, filename)
            if os.path.isfile(path) and filename_re.match(filename):
                profile_list.append(filename.split('.')[0])
        # Load applies data
        applies = self.get_applies()
        # Process profiles
        for uid in profile_list:
            # Check if uid is already in database
            if uid not in self.profiles:
                # Read file contents
                path = self.get_profile_path(uid)
                profile = json.loads(get_data_from_file(path))
                profile = self.profile_sanity_check(profile)
                # Append applies data
                if uid in applies:
                    profile.update(applies[uid])
                # Save profile data to database
                self.profiles[uid] = profile

        # Regen main files
        self.update_main_files()
示例#3
0
def get_basic_data_from_set(set_num):
    """
    Load the Ubisoft provided file for a set, add some basic data to it and 
    split it between useful and useless according to basic_sieve.
    """
    global data_sets
    return get_data_from_file(data_sets[set_num], add_basic_data, basic_sieve)
示例#4
0
 def get_index(self):
     """
     Get index data
     """
     test_and_create_file(self.INDEX_FILE, json.dumps([]))
     index = json.loads(get_data_from_file(self.INDEX_FILE))
     if not isinstance(index, list):
         raise IndexDataError(
             '%s does not contain a JSON list as root element' %
             self.INDEX_FILE)
     return index
示例#5
0
def get_data():
    """
    Load original data from file
    """
    signatures = []
    dataPath = "../data/Task2"
    for uid in range(1, 41):
        personSigs = []
        for sig in range(1, 41):
            fileName = "U%dS%d.TXT" % (uid, sig)
            filePath = os.path.join(dataPath, fileName)
            X, Y, T, P = utils.get_data_from_file(filePath)
            personSigs.append([X, Y, P])
        signatures.append(personSigs)
    return signatures
示例#6
0
def get_data():
    """
    Load original data from file
    """
    signatures = []
    dataPath = "../data/Task2"
    for uid in range(1, 41):
        personSigs = []
        for sig in range(1, 41):
            fileName = "U%dS%d.TXT" % (uid, sig)
            filePath = os.path.join(dataPath, fileName)
            X, Y, T, P = utils.get_data_from_file(filePath)
            personSigs.append([X, Y, P])
        signatures.append(personSigs)
    return signatures
示例#7
0
 def get_data_from_task2(self):
     """
     Load original data from svc2004 task2
     """
     LOGGER.info("Getting signatures")
     signatures = []
     dataPath = "../data/Task2"
     for uid in range(1, 41):
         personSigs = []
         for sig in range(1, 41):
             fileName = "U%dS%d.TXT" % (uid, sig)
             filePath = os.path.join(dataPath, fileName)
             X, Y, T, P = utils.get_data_from_file(filePath)
             personSigs.append([X, Y, P])
         signatures.append(personSigs)
     return signatures
示例#8
0
 def client_data_callback(self, server, message, path, query, client,
                          **kwargs):
     logging.debug('[%s] client data: Request at %s' %
                   (message.method, path))
     # Default response and status code
     response = ''
     status_code = 404
     match = re.match(CLIENTDATA_REGEX, path[len(self.client_data_url):])
     if match:
         filename = match.groupdict()['filename']
         filepath = os.path.join(self.args['profiles_dir'], filename)
         logging.debug('Serving %s -> %s' % (filename, filepath))
         try:
             response = get_data_from_file(filepath)
             status_code = 200
         except Exception, e:
             logging.error('clientdata: %s' % e)
示例#9
0
 def get_applies(self, uid=None):
     """
     Get all applies data or for an specific profile given it's uid
     """
     test_and_create_file(self.APPLIES_FILE, json.dumps({}))
     applies = json.loads(get_data_from_file(self.APPLIES_FILE))
     if not isinstance(applies, dict):
         raise AppliesDataError(
             '%s does not contain a JSON object as root element' %
             self.APPLIES_FILE)
     if uid:
         if uid in applies:
             return applies[uid]
         else:
             raise AppliesDataError(
                 'There is not applies information for given uid: %s' % uid)
     return applies
示例#10
0
文件: bpr.py 项目: ziiin/top-k-rec
 def load_training_data(self,
                        training_file,
                        uid_file,
                        iid_file,
                        data_copy=False):
     print('Load training data from %s' % (training_file))
     self.uids = get_id_dict_from_file(uid_file)
     self.iids = get_id_dict_from_file(iid_file)
     self.data = get_data_from_file(training_file, self.uids, self.iids)
     self.epoch_sample_limit = len(self.data)
     self.n_users = len(self.uids)
     self.n_items = len(self.iids)
     self.tr_data = self._data_to_training_dict(self.data, self.uids,
                                                self.iids)
     self.tr_users = list(self.tr_data.keys())
     if not data_copy:
         del self.data
     print('Loading finished!')
示例#11
0
    def load_training_data(self,
                           uid_file: str,
                           iid_file: str,
                           tr_file: str,
                           data_copy: bool = False) -> None:
        tprint('Load training data from %s' % (tr_file))
        self.uids = get_id_dict_from_file(uid_file)
        self.iids = get_id_dict_from_file(iid_file)
        self.data = get_data_from_file(tr_file, self.uids, self.iids)
        self.epoch_sample_limit = len(self.data)
        assert isinstance(self.uids, dict)
        assert isinstance(self.iids, dict)
        self.n_users = len(self.uids)
        assert self.n_users > 0
        self.n_items = len(self.iids)
        assert self.n_items > 0
        self.tr_data = self._data_to_training_dict(self.data, self.uids,
                                                   self.iids)
        assert isinstance(self.tr_data, dict)
        self.tr_users = list(self.tr_data.keys())

        if not data_copy:
            del self.data
        tprint('Loading finished!')
示例#12
0
    print("The path of result file: " + result_file_path)
    result_file = open(result_file_path, "wt")
    result_file.write(str(options) + "\n")
    result_file.flush()

    config_dict = vars(options)
    with open(log_dir + "/song.{}".format(options.suffix) + "_config.json", "w") as f_out:
        json.dump(config_dict, f_out, indent=4)

    #  Read in data and separate them into training part and development part

    print("Loading training set...")
    if options.infile_format == "fof":
        train_set, len_node, len_in_node, len_out_node, entity_size = get_data_from_fof(options)
    else:
        train_set, len_node, len_in_node, len_out_node, entity_size = get_data_from_file(options.train_path, options)

    random.shuffle(train_set)
    dev_set = train_set[:200]
    train_set = train_set[200:]

    print('Number of training samples:' + str(len(train_set)))
    print('Number of development samples:' + str(len(dev_set)))

    print("Number of node: " + str(len_node) + ", while max allowed is " + str(options.max_node_num))
    print("Number of parent node: " + str(len_in_node) + ", truncated to " + str(options.max_in_node_num))
    print("Number of child node: " + str(len_out_node) + ", truncated to " + str(options.max_out_node_num))
    print("The entity size: " + str(entity_size) + ", truncated to " + str(options.max_entity_size))

    # Build dictionary and mapping of words, characters, edges
示例#13
0
# log信息共有四个等级,按重要性递增为:
# INFO(通知)<WARNING(警告)<ERROR(错误)<FATAL(致命的
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'  # 输出 INFO + WARNING + ERROR + FATAL
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'  # 输出 WARNING + ERROR + FATAL
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 输出 ERROR + FATAL
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 输出 FATAL

if __name__ == '__main__':
    startTime = time()
    batch_size = 25
    capacity = 256  # 内存中存储的最大数据容量
    means = [123.68, 116.779, 103.939]  # VGG训练时图像预处理所减均值R(GB三通道) 已经在vgg类中进行了处理

    # xs, ys = utils.get_file('./cat_and_dog/train')  # 获取图像列表和标签列表
    xs, ys = utils.get_data_from_file(
        "D:/Myproject/Python/Datasets/dogs-vs-cats/dogs-vs-cats/train"
    )  # 获取图像列表和标签列表

    image_batch, label_batch = utils.get_batch(xs, ys, 224, 224, batch_size,
                                               capacity)  # 通过读取列表来载入批量的图片及标签

    x = tf.placeholder(tf.float32, [None, 224, 224, 3])
    y = tf.placeholder(tf.int32, [None, 2])  # 对 猫 和 狗  两个类别进行判定

    vgg = Vgg16(x)
    fc8_finetuining = vgg.probs

    loss_function = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=fc8_finetuining,
                                                labels=y))  # 交叉熵损失函数
    optimizer = tf.train.GradientDescentOptimizer(
示例#14
0
from argparse import Namespace
from utils import asMinutes, timeSince, get_data_from_file, get_batches, array_to_vocab
from numpy.random import choice, randint

flags = Namespace(
    train_file='asimov.txt',
    seq_size=16,
    batch_size=64,
    embedding_size=64,
    lstm_size=64,
    gradients_norm=5,
    predict_top_k=5,
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
int_to_vocab, vocab_to_int, n_vocab, in_text, out_text, random_samples = get_data_from_file(
    flags.train_file, flags.batch_size, flags.seq_size)

net = RNNModule(n_vocab, flags.seq_size, flags.embedding_size, flags.lstm_size)
net = net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)


def train():
    print("Training...")
    iteration = 0
    start = time.time()
    epochs = 100
    for e in range(1, epochs):
        batches = get_batches(in_text, out_text, flags.batch_size,
                              flags.seq_size)
示例#15
0
def main():
    file_name = './results/0001.txt'
    data = Preprocess(get_data_from_file(file_name))
    data.extreme_filter()
    data.outlier_filter()
learning_rate = 0.001
# 训练完整数据集迭代轮数
num_epochs = 10
# 数据块大小
batch_size = 128

# 执行Dropout操作所需的概率值
dropout_rate = 0.5
# 类别数目
num_classes = 2
# 需要重新训练的层
train_layers = ['fc8', 'fc7', 'fc6']

# 读取本地图片,制作自己的训练集,返回image_batch,label_batch
# train, train_label = utils.get_files(train_dir)
train, train_label = utils.get_data_from_file(train_dir)
x, y = utils.get_batch(train, train_label, image_size, image_size, batch_size, 2000)

# 用于计算图输入和输出的TF占位符,每次读取一小部分数据作为当前的训练数据来执行反向传播算法
# x =tf.placeholder(tf.float32,[batch_size,227,227,3],name='x-input')
# y =tf.placeholder(tf.float32,[batch_size,num_classes])
keep_prob = tf.placeholder(tf.float32)

# 定义神经网络结构,初始化模型
model = AlexNet(x, keep_prob, num_classes, train_layers)
# 获得神经网络前向传播的输出
score = model.fc8

# 获得想要训练的层的可训练变量列表
var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]