Exemplo n.º 1
0
    def _assert_unlabelled_roundtrip_lt_256(self, matrix, delimiter):
        with TemporaryDirectory() as tmp_dir:
            csv_path = tmp_dir + '/tmp.csv'
            with open(csv_path, 'w', newline='') as csv_file:
                writer = csv.writer(csv_file, delimiter=delimiter)
                # body:
                for row in matrix:
                    writer.writerow(row)

            csv_handle = open(csv_path, 'r')
            hdf5_path = tmp_dir + 'tmp.hdf5'
            hdf5_write_handle = h5py.File(hdf5_path, 'w')

            is_labelled = False
            height = get_height(csv_path, is_labelled=is_labelled)
            width = get_width(csv_path, is_labelled=is_labelled)
            parse(csv_handle,
                  hdf5_write_handle,
                  height,
                  width,
                  first_n=None,
                  is_labelled=is_labelled,
                  delimiter=delimiter)

            hdf5 = h5py.File(hdf5_path, 'r')
            self.assertEqual(list(hdf5.keys()), ['resolutions'])
            self.assertEqual(list(hdf5['resolutions'].keys()), ['1'])
            self.assertEqual(list(hdf5['resolutions']['1'].keys()),
                             ['nan_values', 'values'])
            assert_array_equal(hdf5['resolutions']['1']['nan_values'],
                               [[0] * len(matrix[0])] * len(matrix))
            assert_array_equal(hdf5['resolutions']['1']['values'], matrix)
Exemplo n.º 2
0
    def test_parse(self):
        with TemporaryDirectory() as tmp_dir:
            csv_path = tmp_dir + "/tmp.csv"
            with open(csv_path, "w", newline="") as csv_file:
                writer = csv.writer(csv_file, delimiter="\t")
                # header:
                labels = ["col-{}".format(x) for x in range(513)]
                writer.writerow(labels)
                # body:
                for y in range(0, 3):
                    writer.writerow(["row-{}".format(y)] + [0] * 512)
                for y in range(3, 6):
                    writer.writerow(["row-{}".format(y)] + [1] * 512)
                for y in range(6, 9):
                    writer.writerow(["row-{}".format(y)] + [1, -1] * 256)
            csv_handle = open(csv_path, "r")

            hdf5_path = tmp_dir + "tmp.hdf5"
            hdf5_write_handle = h5py.File(hdf5_path, "w")

            parse(csv_handle, hdf5_write_handle)

            hdf5 = h5py.File(hdf5_path, "r")
            self.assertEqual(list(hdf5.keys()), ["labels", "resolutions"])
            self.assertEqual([h.decode("utf8") for h in hdf5["labels"]],
                             labels[1:])

            self.assertEqual(list(hdf5["resolutions"].keys()), ["1", "2"])

            self.assertEqual(list(hdf5["resolutions"]["1"].keys()),
                             ["nan_values", "values"])
            assert_array_equal(hdf5["resolutions"]["1"]["nan_values"],
                               [[0] * 512] * 512)
            res_1 = hdf5["resolutions"]["1"]["values"]
            assert_array_equal(res_1[0], [0] * 512)
            assert_array_equal(res_1[3], [1] * 512)
            assert_array_equal(res_1[6], [1, -1] * 256)
            assert_array_equal(res_1[9], [nan] * 512)

            self.assertEqual(list(hdf5["resolutions"]["2"].keys()), ["values"])
            res_2 = hdf5["resolutions"]["2"]["values"]
            assert_array_equal(res_2[0], [0] * 256)
            assert_array_equal(res_2[1],
                               [2] * 256)  # Stradles the 0 and 1 rows
            assert_array_equal(res_2[2], [4] * 256)
            assert_array_equal(res_2[3], [0] * 256)  # -1 and +1 cancel out
            assert_array_equal(res_2[4], [0] * 256)
            assert_array_equal(res_2[5], [0] * 256)
            assert_array_equal(res_2[6], [0] * 256)
Exemplo n.º 3
0
    def _assert_unlabelled_roundtrip_1024(self,
                                          matrix,
                                          first_row=None,
                                          first_col=None,
                                          first_n=None):
        delimiter = "\t"
        with TemporaryDirectory() as tmp_dir:
            csv_path = tmp_dir + "/tmp.csv"
            with open(csv_path, "w", newline="") as csv_file:
                writer = csv.writer(csv_file, delimiter=delimiter)
                # body:
                for row in matrix:
                    writer.writerow(row)

            csv_handle = open(csv_path, "r")
            hdf5_path = tmp_dir + "tmp.hdf5"
            hdf5_write_handle = h5py.File(hdf5_path, "w")

            is_labelled = False
            height = get_height(csv_path, is_labelled=is_labelled)
            width = get_width(csv_path, is_labelled=is_labelled)
            parse(
                csv_handle,
                hdf5_write_handle,
                height,
                width,
                first_n=first_n,
                is_labelled=is_labelled,
                delimiter=delimiter,
            )

            hdf5 = h5py.File(hdf5_path, "r")
            self.assertEqual(list(hdf5.keys()), ["resolutions"])
            self.assertEqual(
                list(hdf5["resolutions"].keys()),
                ["1", "2", "2-nan_values", "4", "4-nan_values"],
            )
            self.assertEqual(list(hdf5["resolutions"]["1"].keys()),
                             ["nan_values", "values"])
            self.assertEqual(list(hdf5["resolutions"]["4"].keys()), ["values"])
            res_4 = hdf5["resolutions"]["4"]["values"]
            if first_row:
                assert_array_equal(res_4[0], first_row)
            if first_col:
                assert_array_equal(
                    [res_4[y][0] for y in range(len(first_col))], first_col)
Exemplo n.º 4
0
    def _assert_unlabelled_roundtrip_1024(self,
                                          matrix,
                                          first_row=None,
                                          first_col=None,
                                          first_n=None):
        delimiter = '\t'
        with TemporaryDirectory() as tmp_dir:
            csv_path = tmp_dir + '/tmp.csv'
            with open(csv_path, 'w', newline='') as csv_file:
                writer = csv.writer(csv_file, delimiter=delimiter)
                # body:
                for row in matrix:
                    writer.writerow(row)

            csv_handle = open(csv_path, 'r')
            hdf5_path = tmp_dir + 'tmp.hdf5'
            hdf5_write_handle = h5py.File(hdf5_path, 'w')

            is_labelled = False
            height = get_height(csv_path, is_labelled=is_labelled)
            width = get_width(csv_path, is_labelled=is_labelled)
            parse(csv_handle,
                  hdf5_write_handle,
                  height,
                  width,
                  first_n=first_n,
                  is_labelled=is_labelled,
                  delimiter=delimiter)

            hdf5 = h5py.File(hdf5_path, 'r')
            self.assertEqual(list(hdf5.keys()), ['resolutions'])
            self.assertEqual(list(hdf5['resolutions'].keys()),
                             ['1', '2', '2-nan_values', '4', '4-nan_values'])
            self.assertEqual(list(hdf5['resolutions']['1'].keys()),
                             ['nan_values', 'values'])
            self.assertEqual(list(hdf5['resolutions']['4'].keys()), ['values'])
            res_4 = hdf5['resolutions']['4']['values']
            if first_row:
                assert_array_equal(res_4[0], first_row)
            if first_col:
                assert_array_equal(
                    [res_4[y][0] for y in range(len(first_col))], first_col)
Exemplo n.º 5
0
    def _assert_unlabelled_roundtrip_lt_256(self, matrix, delimiter):
        with TemporaryDirectory() as tmp_dir:
            csv_path = tmp_dir + "/tmp.csv"
            with open(csv_path, "w", newline="") as csv_file:
                writer = csv.writer(csv_file, delimiter=delimiter)
                # body:
                for row in matrix:
                    writer.writerow(row)

            csv_handle = open(csv_path, "r")
            hdf5_path = tmp_dir + "tmp.hdf5"
            hdf5_write_handle = h5py.File(hdf5_path, "w")

            is_labelled = False
            height = get_height(csv_path, is_labelled=is_labelled)
            width = get_width(csv_path, is_labelled=is_labelled)
            parse(
                csv_handle,
                hdf5_write_handle,
                height,
                width,
                first_n=None,
                is_labelled=is_labelled,
                delimiter=delimiter,
            )

            hdf5 = h5py.File(hdf5_path, "r")
            self.assertEqual(list(hdf5.keys()), ["resolutions"])
            self.assertEqual(list(hdf5["resolutions"].keys()), ["1"])
            self.assertEqual(list(hdf5["resolutions"]["1"].keys()),
                             ["nan_values", "values"])
            assert_array_equal(
                hdf5["resolutions"]["1"]["nan_values"],
                [[0] * len(matrix[0])] * len(matrix),
            )
            assert_array_equal(hdf5["resolutions"]["1"]["values"], matrix)
Exemplo n.º 6
0
    def test_wide_labelled_square(self):
        with TemporaryDirectory() as tmp_dir:
            csv_path = tmp_dir + '/tmp.csv'
            with open(csv_path, 'w', newline='') as csv_file:
                writer = csv.writer(csv_file, delimiter='\t')
                # header:
                col_labels = ['col-{}'.format(x) for x in range(513)]
                writer.writerow(col_labels)
                # body:
                for y in range(0, 3):
                    writer.writerow(['row-{}'.format(y)] + [0] * 512)
                for y in range(3, 6):
                    writer.writerow(['row-{}'.format(y)] + [1] * 512)
                for y in range(6, 9):
                    writer.writerow(['row-{}'.format(y)] + [1, -1] * 256)
            csv_handle = open(csv_path, 'r')

            hdf5_path = tmp_dir + 'tmp.hdf5'
            hdf5_write_handle = h5py.File(hdf5_path, 'w')

            height = get_height(csv_path)
            width = get_width(csv_path, is_labelled=True)
            parse(csv_handle,
                  hdf5_write_handle,
                  height,
                  width,
                  delimiter='\t',
                  first_n=None,
                  is_labelled=True)

            hdf5 = h5py.File(hdf5_path, 'r')
            self.assertEqual(list(hdf5.keys()),
                             ['col_labels', 'resolutions', 'row_labels'])
            self.assertEqual(list(hdf5['col_labels']), col_labels[1:])
            self.assertEqual(list(hdf5['row_labels']),
                             ['row-{}'.format(r) for r in range(9)])

            self.assertEqual(list(hdf5['resolutions'].keys()),
                             ['1', '2', '2-nan_values'])

            self.assertEqual(list(hdf5['resolutions']['1'].keys()),
                             ['nan_values', 'values'])
            assert_array_equal(hdf5['resolutions']['1']['nan_values'],
                               [[0] * 512] * 512)
            res_1 = hdf5['resolutions']['1']['values']
            assert_array_equal(res_1[0], [0] * 512)
            assert_array_equal(res_1[3], [1] * 512)
            assert_array_equal(res_1[6], [1, -1] * 256)
            assert_array_equal(res_1[9], [nan] * 512)

            self.assertEqual(list(hdf5['resolutions']['2'].keys()), ['values'])
            # TODO: We are missing nan_values at higher aggregations: Bug?
            # https://github.com/higlass/clodius/issues/62
            res_2 = hdf5['resolutions']['2']['values']
            assert_array_equal(res_2[0], [0] * 256)
            # Stradles the 0 and 1 rows
            assert_array_equal(res_2[1], [2] * 256)
            assert_array_equal(res_2[2], [4] * 256)
            assert_array_equal(res_2[3], [0] * 256)  # -1 and +1 cancel out
            assert_array_equal(res_2[4], [0] * 256)
            assert_array_equal(res_2[5], [0] * 256)
            assert_array_equal(res_2[6], [0] * 256)
Exemplo n.º 7
0
    def test_wide_labelled_square(self):
        with TemporaryDirectory() as tmp_dir:
            csv_path = tmp_dir + "/tmp.csv"
            with open(csv_path, "w", newline="") as csv_file:
                writer = csv.writer(csv_file, delimiter="\t")
                # header:
                col_labels = ["col-{}".format(x) for x in range(513)]
                writer.writerow(col_labels)
                # body:
                for y in range(0, 3):
                    writer.writerow(["row-{}".format(y)] + [0] * 512)
                for y in range(3, 6):
                    writer.writerow(["row-{}".format(y)] + [1] * 512)
                for y in range(6, 9):
                    writer.writerow(["row-{}".format(y)] + [1, -1] * 256)
            csv_handle = open(csv_path, "r")

            hdf5_path = tmp_dir + "tmp.hdf5"
            hdf5_write_handle = h5py.File(hdf5_path, "w")

            height = get_height(csv_path)
            width = get_width(csv_path, is_labelled=True)
            parse(
                csv_handle,
                hdf5_write_handle,
                height,
                width,
                delimiter="\t",
                first_n=None,
                is_labelled=True,
            )

            hdf5 = h5py.File(hdf5_path, "r")
            self.assertEqual(list(hdf5.keys()),
                             ["col_labels", "resolutions", "row_labels"])
            self.assertEqual(list(hdf5["col_labels"]), col_labels[1:])
            self.assertEqual(list(hdf5["row_labels"]),
                             ["row-{}".format(r) for r in range(9)])

            self.assertEqual(list(hdf5["resolutions"].keys()),
                             ["1", "2", "2-nan_values"])

            self.assertEqual(list(hdf5["resolutions"]["1"].keys()),
                             ["nan_values", "values"])
            assert_array_equal(hdf5["resolutions"]["1"]["nan_values"],
                               [[0] * 512] * 512)
            res_1 = hdf5["resolutions"]["1"]["values"]
            assert_array_equal(res_1[0], [0] * 512)
            assert_array_equal(res_1[3], [1] * 512)
            assert_array_equal(res_1[6], [1, -1] * 256)
            assert_array_equal(res_1[9], [nan] * 512)

            self.assertEqual(list(hdf5["resolutions"]["2"].keys()), ["values"])
            # TODO: We are missing nan_values at higher aggregations: Bug?
            # https://github.com/higlass/clodius/issues/62
            res_2 = hdf5["resolutions"]["2"]["values"]
            assert_array_equal(res_2[0], [0] * 256)
            # Stradles the 0 and 1 rows
            assert_array_equal(res_2[1], [2] * 256)
            assert_array_equal(res_2[2], [4] * 256)
            assert_array_equal(res_2[3], [0] * 256)  # -1 and +1 cancel out
            assert_array_equal(res_2[4], [0] * 256)
            assert_array_equal(res_2[5], [0] * 256)
            assert_array_equal(res_2[6], [0] * 256)