示例#1
0
    def get_content(self):
        # 1.提取基本文本块
        self.parser = Parser(self.url)
        ns_list = self.parser.ns()
        self.title = self.parser.get_title()
        # 2.文本串分块
        self.partitioner = Partitioner()
        blocks = self.partitioner.partition(ns_list)

        # 3.抽取正文块,副产品为分析信息
        self.judge = Judge(self.title.string, ns_list)
        res = self.judge.select(blocks, ns_list)

        flag = res['flag']
        cblock = res['block']
        confidence = res['confidence']
        detail = res['detail']
        #if flag:
        content = cblock.to_str()
        (srcs, images) = self.get_images(cblock)
        cblock = self.insert_images(cblock, images)
        content_with_format = cblock.to_str_with_format()
        #else:
        #    content = ""
        #    content_with_format = ""
        #    srcs = None
        return (flag, self.title.string.strip(), content, content_with_format,
                srcs, confidence, detail)
示例#2
0
 def __init__(self, context, partition_set):
     self.__aggregation_sets = partition_set
     self.__aggregation_sets[c.KEY_TABLES] = {}
     self.__partitioner = Partitioner(context[c.KEY_PARTITIONS],
                                      context[c.KEY_SEPERATOR_PARTITION])
     self.__context = context
     self.__info = {}
     self.__info[c.INFO_TOTAL_BYTES] = 0
     self.__info[c.INFO_TOTAL_ROWS] = 0
     self.__info[c.INFO_TOTAL_MESSAGES] = 0
     self.__logger = logging.getLogger()
     self.__logger.setLevel(logging.ERROR)
示例#3
0
文件: disk.py 项目: k0da/kiwi-1
    def __init__(self, table_type, storage_provider):
        # bind the underlaying block device providing class instance
        # to this object (e.g loop) if present. This is done to guarantee
        # the correct destructor order when the device should be released.
        self.storage_provider = storage_provider

        self.partition_map = {}
        self.partition_id_map = {}
        self.partition_id = {}
        self.is_mapped = False

        self.partitioner = Partitioner(
            table_type, storage_provider
        )

        self.table_type = table_type
示例#4
0
    data_orig = xr.open_dataarray(filepath)
    # let's first try only one var
    data = data_orig[0, :, :, :].copy()
    shape = np.shape(data)
    nx = shape[0]
    ny = shape[1]
    nz = shape[2]
# making shape parameters available everywhere
nx = comm.bcast(nx, root=0)
ny = comm.bcast(ny, root=0)
nz = comm.bcast(nz, root=0)

print(nx, ny, nz)
# setting up the partitioner
# the field dimensions need to be the real ones - the halo points.
p = Partitioner(comm, [nx, ny - 2 * 2, nz - 2 * 2], num_halo=2)

# distribute the work onto the ranks
data_work = p.scatter(data)
"""
# subset more for speedup of first tests
print(f'subset even more because very large dataset')
data = data[:,::10,:,:]
"""

# create a mask of nans
mask = ~np.isnan(data_work)  # nan values have zero weight (i.e. are False)

# gapfilling the missing values with spatiotemporal mean
print('gapfilling missing values with spatiotemporal mean')
tic = datetime.now()
示例#5
0
def main(nx, ny, nz, num_iter, num_halo=2, plot_result=False):
    """Driver for apply_diffusion that sets up fields and does timings"""

    assert 0 < nx <= 1024 * 1024, 'You have to specify a reasonable value for nx'
    assert 0 < ny <= 1024 * 1024, 'You have to specify a reasonable value for ny'
    assert 0 < nz <= 1024, 'You have to specify a reasonable value for nz'
    assert 0 < num_iter <= 1024 * 1024, 'You have to specify a reasonable value for num_iter'
    assert 0 < num_halo <= 256, 'Your have to specify a reasonable number of halo points'
    alpha = 1. / 32.

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    p = Partitioner(comm, [nz, ny, nx], num_halo)

    if rank == 0:
        f = np.zeros((nz, ny + 2 * num_halo, nx + 2 * num_halo))
        # Option 1: Original stencil2d-mpi during HPC4WC course:
        # f[nz // 4:3 * nz // 4, num_halo + ny // 4:num_halo + 3 * ny // 4, num_halo + nx // 4:num_halo + 3 * nx // 4] = 1.0

        # Option 2: Similar to option 1, but positive region extended towards tile edges:
        # f[nz // 10:9 * nz // 10, num_halo + ny // 10:num_halo + 9 * ny // 10, num_halo + nx // 10:num_halo + 9 * nx // 10] = 1.0

        # Option 3: One positive region in bottom-left (0-0) corner, one positive region in top-right (ny-nx) corner
        # f[nz // 4:3 * nz // 4, num_halo:num_halo + ny // 4, num_halo:num_halo + nx // 4] = 1.0
        # f[nz // 4:3 * nz // 4, num_halo + 3 * ny // 4:-num_halo, num_halo + 3 * nx // 4:-num_halo] = 1.0

        # Option 4: Positive region line prime number fraction off-center across tile:
        f[nz // 4:3 * nz // 4, num_halo + ny // 7:num_halo + 2 * ny // 7,
          num_halo:-num_halo] = 1.0

    else:
        f = np.empty(1)
    in_field = p.scatter(f)

    out_field = np.copy(in_field)

    f = p.gather(in_field)
    if rank == 0:
        np.save('in_field', f)
        if plot_result:
            plt.ioff()
            plt.imshow(f[in_field.shape[0] // 2, :, :], origin='lower')
            plt.colorbar()
            plt.savefig('in_field.png')
            plt.close()

    # warmup caches
    apply_diffusion(in_field, out_field, alpha, num_halo, p=p)

    comm.Barrier()

    # time the actual work
    tic = time.time()
    apply_diffusion(in_field,
                    out_field,
                    alpha,
                    num_halo,
                    num_iter=num_iter,
                    p=p)
    toc = time.time()

    comm.Barrier()

    if rank == 0:
        print("Elapsed time for work = {} s".format(toc - tic))

    update_halo(out_field, num_halo, p)

    f = p.gather(out_field)
    if rank == 0:
        np.save('out_field', f)
        if plot_result:
            plt.imshow(f[out_field.shape[0] // 2, :, :], origin='lower')
            plt.colorbar()
            plt.savefig('out_field.png')
            plt.close()
示例#6
0
def main(nx, ny, nz, num_iter, num_halo=2, plot_result=False):
    """Driver for apply_diffusion that sets up fields and does timings"""

    assert 0 < nx <= 1024 * 1024, 'You have to specify a reasonable value for nx'
    assert 0 < ny <= 1024 * 1024, 'You have to specify a reasonable value for ny'
    assert 0 < nz <= 1024, 'You have to specify a reasonable value for nz'
    assert 0 < num_iter <= 1024 * 1024, 'You have to specify a reasonable value for num_iter'
    assert 0 < num_halo <= 256, 'Your have to specify a reasonable number of halo points'
    alpha = 1. / 32.

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    p = Partitioner(comm, [nz, ny, nx], num_halo)

    if rank == 0:
        f = np.zeros((nz, ny + 2 * num_halo, nx + 2 * num_halo))
        f[nz // 4:3 * nz // 4, num_halo + ny // 4:num_halo + 3 * ny // 4,
          num_halo + nx // 4:num_halo + 3 * nx // 4] = 1.0
    else:
        f = np.empty(1)
    in_field = p.scatter(f)

    out_field = np.copy(in_field)

    f = p.gather(in_field)
    if rank == 0:
        np.save('in_field', f)
        if plot_result:
            plt.ioff()
            plt.imshow(f[in_field.shape[0] // 2, :, :], origin='lower')
            plt.colorbar()
            plt.savefig('in_field.png')
            plt.close()

    # warmup caches
    apply_diffusion(in_field, out_field, alpha, num_halo, p=p)

    comm.Barrier()

    # time the actual work
    tic = time.time()
    apply_diffusion(in_field,
                    out_field,
                    alpha,
                    num_halo,
                    num_iter=num_iter,
                    p=p)
    toc = time.time()

    comm.Barrier()

    if rank == 0:
        print("Elapsed time for work = {} s".format(toc - tic))

    update_halo(out_field, num_halo, p)

    f = p.gather(out_field)
    if rank == 0:
        np.save('out_field', f)
        if plot_result:
            plt.imshow(f[out_field.shape[0] // 2, :, :], origin='lower')
            plt.colorbar()
            plt.savefig('out_field.png')
            plt.close()