Пример #1
0
def build_offline_time():
    with PersistentDict("./local_push_time.json",
                        format="json") as time_local_push:
        for data in NEW_DATA:
            print("processing", data)
            with PersistentDict(LOCAL_PUSH_INDEX + data + "_R.json",
                                flag="r") as residual:
                time_local_push[data] = residual["time"]
    return
Пример #2
0
def load_local_push_index(data_name):
    '''
    load local push index given data name
    '''
    print("loading local push index", data_name)
    P = PersistentDict(LOCAL_PUSH_INDEX + data_name + "_P.json",
                       flag="r",
                       format="json")
    R = PersistentDict(LOCAL_PUSH_INDEX + data_name + "_R.json",
                       flag="r",
                       format="json")
    lpi = dict()
    lpi["P"] = P
    lpi["R"] = R
    return lpi
Пример #3
0
def local_push_simrank(A, indegrees, epsilon=0.01, delta=0.01, r_max=None, \
                       c=0.6, is_sync=False, data_name=None):
    '''
    the local push algorithm for all-pairs simrank: push all residuals until all below epsilon
    A: adjacency matrix, csr sparse matrix
    indegrees: index of in-degree of each node, 1darray
    r_max: the maximum residual
    to-do: make it parallel
    '''
    n = A.shape[0]
    m = A.nnz
    d = m / n
    if r_max is None:
        r_max = cal_rmax(d, epsilon, delta)
    print("threshold r", r_max)

    # the integer key for dict
    @jit
    def get_key(a1, a2):
        return a1 * n + a2

    # estimate = SyncableDict(redis=conn, key=data_name+"_P")
    # residual = SyncableDict(redis=conn, key=data_name+"_R")
    # estimate.clear()
    # residual.clear()
    # estimate = dict()
    # residual = dict()
    # if data_name is not None:
    #     off_line_file = shelve.open(LOCAL_PUSH_INDEX + data_name + ".shelve", protocol=4)
    with PersistentDict(LOCAL_PUSH_INDEX + data_name + "_P.json", flag="c", format="json") as estimate, \
            PersistentDict(LOCAL_PUSH_INDEX + data_name + "_R.json", flag="c", format="json") as residual:

        estimate.clear()
        residual.clear()
        for i in range(0, n):
            residual[get_key(i, i)] = 1
        Q = set([(i, i)
                 for i in range(0, n)])  # use set to avoid dulplicate keys
        step = [0]
        current_sum = [0]  # the sum of current estimates

        @jit
        def push(a, b):
            """
            push the position a,b
            """
            k_ab = get_key(a, b)
            step[0] += 1
            r = residual.pop(k_ab, None)  # delete current key
            # r = residual[a,b]
            # residual[a,b] = 0
            # check whether key in the estimate
            estimate.setdefault(k_ab, 0)
            estimate[k_ab] += r
            current_sum[0] += r
            out_a = A.indices[A.indptr[a]:A.indptr[a + 1]]
            out_b = A.indices[A.indptr[b]:A.indptr[b + 1]]
            # print(len(out_a) * len(out_b))
            # for oa, ob in product(out_a, out_b):
            # for oa,ob in dstack_product(out_a, out_b):
            is_singleton_node = a == b
            for (oa, ob) in product(out_a, out_b):
                if oa == ob:  # don't push to singleton nodes
                    continue
                if oa > ob:  #
                    if is_singleton_node:  # residuals are from singletong nodes, only push to partial pairs
                        continue
                    else:
                        oa, ob = ob, oa
                k_oaob = get_key(oa, ob)
                indeg_a = indegrees[oa]
                indeg_b = indegrees[ob]
                total_in = indeg_a * indeg_b
                if total_in > 0:
                    inc = (c * r) / total_in
                    total_in = indeg_a * indeg_b
                    residual.setdefault(k_oaob, 0)
                    residual[k_oaob] += inc  # update residual value
                    # if is_sync: # the sync method
                    #     next_set.add((oa,ob))
                    # else:
                    if residual[k_oaob] > r_max:
                        Q.add((oa, ob))

        t1 = time.time()
        while len(Q) > 0:
            # check the memory
            # mem = psutil.virtual_memory()
            # mem_ratio = mem.available / mem.total
            i, j = Q.pop()
            # if step[0] % 1000 == 0:
            #     print(current_sum[0], len(Q), \
            #             "i,j: ", (i,j), "poped priority:",\
            #             residual[i,j], "step:", step)
            push(i, j)
        t2 = time.time()

        # save off-line index to disk
        print("total ", t2 - t1, "seconds")
        residual['time'] = t2 - t1
        print('origin sum', current_sum[0])
        print("sum", current_sum[0] * 2 - n)

        # sync to redis
        # print("syncing P")
        # estimate.sync()
        # print("syncing R")
        # residual.sync()

        # if off_line_file is not None:
        #     print("syncing to disk....")
        #     off_line_file["P"] = estimate
        #     off_line_file["R"] = residual
        #     off_line_file["time"] = t2-t1
        #     off_line_file["size"] = sys.getsizeof(estimate) + sys.getsizeof(residual)
        #     off_line_file.close()

        return True
Пример #4
0
            metaStr.write(line)
            metaStr.write("\n")

        if metaStr.pos > self._size:
            raise se.MetadataOverflowError()

        # Clear out previous data - it is a volume, not a file
        metaStr.write('\0' * (self._size - metaStr.pos))

        data = metaStr.getvalue()
        with fileUtils.DirectFile(self.metavol, "r+d") as f:
            f.seek(self._offset)
            f.write(data)

LvBasedSDMetadata = lambda vg, lv: DictValidator(
    PersistentDict(LvMetadataRW(vg, lv, 0, SD_METADATA_SIZE)),
    BLOCK_SD_MD_FIELDS)
TagBasedSDMetadata = lambda vg: DictValidator(
    PersistentDict(VGTagMetadataRW(vg)),
    BLOCK_SD_MD_FIELDS)


def selectMetadata(sdUUID):
    mdProvider = LvBasedSDMetadata(sdUUID, sd.METADATA)
    if len(mdProvider) > 0:
        metadata = mdProvider
    else:
        metadata = TagBasedSDMetadata(sdUUID)
    return metadata

Пример #5
0
                line = line.encode('utf-8')
            metadata[i] = line

        metadata = [i + '\n' for i in metadata]
        tmpFilePath = self._metafile + ".new"
        try:
            self._oop.writeLines(tmpFilePath, metadata)
        except IOError as e:
            if e.errno != errno.ESTALE:
                raise
            self._oop.writeLines(tmpFilePath, metadata)
        self._oop.os.rename(tmpFilePath, self._metafile)


FileSDMetadata = lambda metafile: DictValidator(
    PersistentDict(FileMetadataRW(metafile)), FILE_SD_MD_FIELDS)


class FileStorageDomain(sd.StorageDomain):
    def __init__(self, domainPath):
        # Using glob might look like the simplest thing to do but it isn't
        # If one of the mounts is stuck it'll cause the entire glob to fail
        # and you wouldn't be able to access any domain
        self.log.debug("Reading domain in path %s", domainPath)
        self.mountpoint = os.path.dirname(domainPath)
        self.remotePath = os.path.basename(self.mountpoint)
        self.metafile = os.path.join(domainPath, sd.DOMAIN_META_DATA,
                                     sd.METADATA)

        sdUUID = os.path.basename(domainPath)
        validateFileSystemFeatures(sdUUID, self.mountpoint)
Пример #6
0
            metaStr.write("\n")

        if metaStr.pos > self._size:
            raise se.MetadataOverflowError()

        # Clear out previous data - it is a volume, not a file
        metaStr.write('\0' * (self._size - metaStr.pos))

        data = metaStr.getvalue()
        with fileUtils.DirectFile(self.metavol, "r+d") as f:
            f.seek(self._offset)
            f.write(data)


LvBasedSDMetadata = lambda vg, lv: DictValidator(
    PersistentDict(LvMetadataRW(vg, lv, 0, SD_METADATA_SIZE)),
    BLOCK_SD_MD_FIELDS)
TagBasedSDMetadata = lambda vg: DictValidator(
    PersistentDict(VGTagMetadataRW(vg)), BLOCK_SD_MD_FIELDS)


def selectMetadata(sdUUID):
    mdProvider = LvBasedSDMetadata(sdUUID, sd.METADATA)
    if len(mdProvider) > 0:
        metadata = mdProvider
    else:
        metadata = TagBasedSDMetadata(sdUUID)
    return metadata


def metadataValidity(vg):