Exemplo n.º 1
0
def make_kvs():
    print("!!!! do you konw what does this means !!!")
    id_set = set()
    with open(fname) as f:
        for line in f:
            space = re.sub('[;#\n]', ' ', line)
            words = space.split(" ")
            for word in words:
                id_set.add(word)
    id_set.remove('')  # 不知道为什么有一个蛇皮操作导致含有 ''的字符
    id_set.remove('in_links')
    id_set.remove('link_ID')
    id_set.remove('out_links')

    kvs = {}
    vks = {}
    count = 0
    for i in id_set:
        kvs[i] = str(count)
        vks[str(count)] = i
        count = count + 1
    fp.clear(fp.kv_dir)
    with open(fp.kvs, "w+") as f:
        for i in kvs:
            line = i + " " + kvs[i] + "\n"
            f.write(line)
    with open(fp.vks, "w+") as f:
        for i in vks:
            line = i + " " + vks[i] + "\n"
            f.write(line)
Exemplo n.º 2
0
def make_matrix_day():
    """
    创建数据之前,首先会清理整个文件夹
    为了创建文件使用一个很智障的操作
    所有6月的数据全部都是被抛弃了,其他的函数用于处理这一个问题
    数值缺失的问题使用的方法是:
    使用0 代替 ,然后使用其他的方法搞定数值的问题,而且同时需要处理的问题不只是这一个,
    错误的数据的处理的方法应该是相同的
    创建数据的使用的方法为:天 小时 2分钟 也就是维度为 92 24 30 的矩阵
    """
    fp.clear(fp.matrix)
    files = [
        os.path.join(fp.time_dir, f) for f in os.listdir(fp.time_dir)
        if os.path.isfile(os.path.join(fp.time_dir, f))
    ]

    # make matrix for every file and data in june is discared
    for tfile in files:
        M = np.zeros((92, 24, 30), dtype=np.float32)
        import ntpath
        fname = ntpath.basename(tfile)
        fname = fname.split('.')[0]
        fname += "d"
        fname += ".npy"
        fname = os.path.join(fp.matrix, fname)
        with open(tfile) as f:
            for line in f:
                line = re.sub("[\n]", "", line)
                words = line.split(" ")
                if (words[1] == "06"):
                    continue
                M[map_day(words[1], words[2])][int(
                    words[3])][int(words[4]) // 2] = float(words[5])
        np.save(fname, M)
Exemplo n.º 3
0
def seperate_links():
    """
    由于使用的文件的处理的方式为append,所以需要首先将links 中间的文件全部删除
    处理之后文件如果放置在相同的位置,那么绝对不可以使用.txt 结尾
    """
    fp.clear(fp.links_dir)
    with open(fp.compressed_train_data) as f:
        for line in f:
            words = line.split(" ")
            link_file = os.path.join(fp.links_dir, words[0] + ".txt")
            with open(link_file, "a+") as g:
                g.write(line)
Exemplo n.º 4
0
def show_data():
    logdir = fp.to_dir + "/show_data"
    fp.clear(logdir)

    x = tf.placeholder(tf.float32, [30], name="x")
    tf.summary.histogram("day", x)
    train_writer = tf.summary.FileWriter(logdir)
    merged = tf.summary.merge_all()
    data = np.load(fp.matrix + "/1.npy")
    with tf.Session() as sess:
        for i in range(32 * 30):
            summary = sess.run(merged, feed_dict={x: data[:, :, i]})
            train_writer.add_summary(summary, i)
    train_writer.close()
Exemplo n.º 5
0
def sort_link_bytime():
    """
    相同的原因,首先会对于该文件夹中间的数据进行全部清除,然后添加
    """
    fp.clear(fp.time_dir)
    files = [
        os.path.join(links_dir, f) for f in os.listdir(links_dir)
        if os.path.isfile(os.path.join(links_dir, f))
    ]
    for linkf in files:
        with open(linkf) as f:
            for line in sorted(f):
                import ntpath
                fname = ntpath.basename(linkf)
                fname = fname.split('.')[0]
                fname += ".time"
                timef = os.path.join(time_dir, fname)
                with open(timef, "a+") as g:
                    g.write(line)
Exemplo n.º 6
0
def navie_full(full_data=False):
    """
    仅仅使用基本最简单的全连接来得到的
    """
    train_data = np.load(fp.train)
    test_data = np.load(fp.test)
    june = None
    source_data = None

    if (full_data):
        whole = np.load(fp.fix_data)
        whole = whole[:, :, 6:9, :]
        train_data = whole.reshape(132 * 92, 90)
        june = np.load(fp.June)
        source_data = june.reshape(132 * 30, 60)

    if (full_data):
        assert train_data.shape == (132 * 92, 90)
        if (june is not None):
            print("go on!")

    if (june is None):
        print("shoule be false")

    fp.clear(fp.logdir)

    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [None, 60], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, 30], name='y-input')

    def weight_variable(shape):
        """Create a weight variable with appropriate initialization."""
        initial = tf.truncated_normal(shape, stddev=1)
        return tf.Variable(initial)

    def bias_variable(shape):
        """Create a bias variable with appropriate initialization."""
        initial = tf.constant(1.0, shape=shape)
        return tf.Variable(initial)

    def dense_layer(input_tensor,
                    input_dim,
                    output_dim,
                    layer_name,
                    act=tf.nn.relu):
        with tf.name_scope(layer_name):
            weights = weight_variable([input_dim, output_dim])
            biases = bias_variable([output_dim])
            preactivate = tf.matmul(input_tensor, weights) + biases
            activations = act(preactivate, name='activation')
            tf.summary.histogram("activation", activations)
            return activations

    def dense_batch_relu(input_tensor, input_dim, output_dim, layer_name,
                         is_training):
        with tf.variable_scope(layer_name):
            h1 = dense_layer(input_tensor,
                             input_dim,
                             output_dim,
                             layer_name,
                             act=tf.nn.relu)
            h2 = tf.contrib.layers.batch_norm(h1,
                                              decay=0.9,
                                              center=True,
                                              scale=True,
                                              updates_collections=None,
                                              is_training=is_training,
                                              reuse=None,
                                              trainable=True,
                                              scope=layer_name)
            return tf.nn.relu(h2, 'relu')

    keep_prob = tf.placeholder(tf.float32)
    is_training = tf.placeholder(tf.bool)
    hidden1 = dense_batch_relu(x, 60, 100, "layer1", is_training)
    dropped1 = tf.nn.dropout(hidden1, keep_prob)

    hidden2 = dense_batch_relu(dropped1, 100, 100, "layer2", is_training)
    dropped2 = tf.nn.dropout(hidden2, keep_prob)

    hidden3 = dense_batch_relu(dropped2, 100, 60, "layer3", is_training)
    dropped3 = tf.nn.dropout(hidden3, keep_prob)

    y = dense_layer(dropped3, 60, 30, 'layer4')

    # 真实数据为: y_ 计算数据为 y
    diff = tf.abs(tf.subtract(y_, y))
    ratio = tf.divide(diff, y_)
    mean = tf.reduce_sum(ratio)

    summary = tf.summary.scalar("mean", mean)

    train_step = tf.train.AdamOptimizer(
        learning_rate=Para.learningRate).minimize(mean)

    with tf.Session() as sess:
        merged = tf.summary.merge_all()
        tf.global_variables_initializer().run()
        train_writer = tf.summary.FileWriter(fp.logdir, sess.graph)

        index = 0
        for i in range(Para.epoch):
            if (i % 50 == 0):
                summary, res = sess.run(
                    [merged, mean],
                    feed_dict={
                        x: test_data[:, 0:60],
                        y_: test_data[:, 60:90],
                        keep_prob: 1,
                        is_training: False
                    })
                train_writer.add_summary(summary, i)
                print("{0} : {1}".format(i, res / 71280))

            np.random.shuffle(train_data)
            train_sample = train_data[0:Para.batch_size]

            train_x = train_sample[:, 0:60]
            train_y = train_sample[:, 60:90]
            sess.run(train_step,
                     feed_dict={
                         x: train_x,
                         y_: train_y,
                         keep_prob: 0.6,
                         is_training: True
                     })
        if (full_data):
            res_june = sess.run(y,
                                feed_dict={
                                    x: source_data,
                                    keep_prob: 1.0,
                                    is_training: False
                                })
            res_june = res_june.reshape(132, 30, 30)
            write_to_file(res_june)
    train_writer.close()
Exemplo n.º 7
0
def naive_conv(full_data=False):
    train_data = np.load(fp.train)
    test_data_y = np.load(fp.test_no_fix)[:, 60:90]
    test_data_x = np.load(fp.test)[:, 0:60]
    june = None
    source_data = None

    if (full_data):
        whole = np.load(fp.fix_data)
        whole = whole[:, :, 6:9, :]
        train_data = whole.reshape(132 * 92, 90)
        june = np.load(fp.fix_June)
        source_data = june.reshape(132 * 30, 60)

    fp.clear(fp.logdir)

    x = tf.placeholder(tf.float32, [None, n_input])
    y_ = tf.placeholder(tf.float32, [None, n_output])
    is_training = tf.placeholder(tf.bool)
    keep_prob = tf.placeholder(tf.float32)  #dropout (keep probability)

    # Create some wrappers for simplicity
    def conv1d(x, W, b):
        # Conv2D wrapper, with bias and relu activation
        x = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
        x = tf.nn.bias_add(x, b)
        return tf.nn.relu(x)

    def maxpool2d(x, k=kernel_size_pool):
        # MaxPool2D wrapper
        return tf.nn.max_pool(x,
                              ksize=[1, k, 1, 1],
                              strides=[1, 2, 1, 1],
                              padding='SAME')

    # Create model
    def conv_net(x, weights, biases, dropout):
        # Reshape input picture
        x = tf.reshape(x, shape=[-1, 60, 1, 1])

        # Convolution Layer
        conv1 = conv1d(x, weights['wc1'], biases['bc1'])
        # Max Pooling (down-sampling)
        conv1 = maxpool2d(conv1)

        # Convolution Layer
        conv2 = conv1d(conv1, weights['wc2'], biases['bc2'])
        # Max Pooling (down-sampling)
        conv2 = maxpool2d(conv2)

        # Fully connected layer
        # Reshape conv2 output to fit fully connected layer input

        fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])

        fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])

        bn = tf.contrib.layers.batch_norm(fc1,
                                          decay=0.9,
                                          center=True,
                                          scale=True,
                                          updates_collections=None,
                                          is_training=is_training,
                                          reuse=None,
                                          trainable=True,
                                          scope="bn")
        fc1 = tf.nn.relu(bn)
        # Apply Dropout
        fc1 = tf.nn.dropout(fc1, dropout)

        # Output, class prediction
        out = tf.nn.relu(tf.add(tf.matmul(fc1, weights['out']), biases['out']))
        return out

    # Store layers weight & bias
    weights = {
        # 5x5 conv, 1 input, 32 outputs
        'wc1':
        tf.Variable(tf.truncated_normal([kernel_size, 1, 1, 10], stddev=dev)),
        # 5x5 conv, 32 inputs, 64 outputs
        'wc2':
        tf.Variable(tf.truncated_normal([kernel_size, 1, 10, 15], stddev=dev)),
        # fully connected, 7*7*64 inputs, 1024 outputs
        'wd1':
        tf.Variable(tf.truncated_normal([15 * 15, 200], stddev=dev)),
        # 1024 inputs, 10 outputs (class prediction)
        'out':
        tf.Variable(tf.truncated_normal([200, n_output], stddev=dev))
    }

    biases = {
        'bc1': tf.Variable(tf.constant(bias_init, shape=[10])),
        'bc2': tf.Variable(tf.constant(bias_init, shape=[15])),
        'bd1': tf.Variable(tf.constant(bias_init, shape=[200])),
        'out': tf.Variable(tf.constant(bias_init, shape=[n_output]))
    }

    # Construct model
    y = conv_net(x, weights, biases, keep_prob)

    # filter the zeros
    zeros = tf.cast(tf.zeros_like(y_), dtype=tf.bool)
    ones = tf.cast(tf.ones_like(y_), dtype=tf.bool)

    loc = tf.where(tf.equal(y_, 0), zeros, ones)
    rel_y = tf.boolean_mask(y_, loc)
    pred_y = tf.boolean_mask(y, loc)

    diff = tf.abs(tf.subtract(rel_y, pred_y))
    ratio = tf.divide(diff, rel_y)
    cost = tf.reduce_sum(ratio)
    summary = tf.summary.scalar("mean", cost)

    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.global_variables_initializer()

    # Launch the graph
    with tf.Session() as sess:
        merged = tf.summary.merge_all()
        sess.run(init)
        step = 1
        train_writer = tf.summary.FileWriter(fp.logdir, sess.graph)
        index = 0

        for i in range(training_iters):
            if (i % 50 == 0):
                summary, res = sess.run(
                    [merged, cost],
                    feed_dict={
                        x: test_data_x,
                        y_: test_data_y,
                        keep_prob: 1,
                        is_training: False
                    })
                train_writer.add_summary(summary, i)
                print("{0} : {1}".format(i, res / 71280))

            np.random.shuffle(train_data)
            train_sample = train_data[0:batch_size]

            train_x = train_sample[:, 0:60]
            train_y = train_sample[:, 60:90]
            sess.run(optimizer,
                     feed_dict={
                         x: train_x,
                         y_: train_y,
                         keep_prob: 0.6,
                         is_training: True
                     })
        if (full_data):
            res_june = sess.run(y,
                                feed_dict={
                                    x: source_data,
                                    keep_prob: 1.0,
                                    is_training: False
                                })
            res_june = res_june.reshape(132, 30, 30)
            full.write_to_file(res_june)
        train_writer.close()
Exemplo n.º 8
0
    def take_images(self):
        if self.noop or self.skip - test_seq_num < self.scanmode + self.transparent:
            preCols = fp.fp.getSequencerParameter("PreCols")
            readCols = fp.fp.getSequencerParameter("ReadCols")
            postCols = fp.fp.getSequencerParameter("PostCols")
            overCols = fp.fp.getSequencerParameter("OverCols")
            preRows = fp.fp.getSequencerParameter("PreRows")
            readRows = fp.fp.getSequencerParameter("ReadRows")
            postRows = fp.fp.getSequencerParameter("PostRows")
            scanMode = fp.fp.isScanEnabled()
            idleFlushTimeout = fp.fp.getConfigurationParameterValue(
                "sequencerConfig", "idleFlushTimeout")
            print "Initial sequencer parameters"

            print "preCols=", preCols
            print "readCols=", readCols
            print "postCols=", postCols
            print "overCols=", overCols

            print "preRows=", preRows
            print "readRows=", readRows
            print "postRows=", postRows

            print "scanMode=", scanMode
            print "idleFlushTimeout=", idleFlushTimeout

            # set up scan mode
            fp.fp.sequencerConfig().submitChanges({
                "underCols": self.undercols,
                "preCols": self.precols,
                "readCols": self.readcols,
                "postCols": self.postcols,
                "overCols": self.overcols,
                "preRows": self.prerows,
                "readRows": self.readrows,
                "postRows": self.postrows,
                "overRows": self.overrows,
                "scanMode": True,
                "idleFlushTimeout": -1
            })
            fp.fp.applySubmittedChanges()
            if idleFlushTimeout != -1:
                fp.clear()

        exposure = 1.0
        expose_command = lambda: time.sleep(exposure)

        for i in range(self.scanmode):
            self.take_image(exposure,
                            expose_command,
                            image_type=None,
                            symlink_image_type=None)

        if self.noop or self.skip - test_seq_num < self.transparent:
            fp.fp.sequencerConfig().submitChanges({"transparentMode": 1})
            timeout = Duration.ofSeconds(60 * 5)
            fp.fp.applySubmittedChanges(timeout=timeout)

        for i in range(self.transparent):
            self.take_image(exposure,
                            expose_command,
                            image_type=None,
                            symlink_image_type=None)

        # Restore settings
        fp.fp.dropAllChanges()

        if idleFlushTimeout != -1:
            fp.clear()