示例#1
0
    def getXmlData(self, save=False, relative=True):
        """
        保存id和data数据
        :return:
        """

        if relative:
            path = os.path.join(Properties.getRootPath()+Properties.getXmlLocation() + self.name + ".xml")
        else:
            path = self.name
        images = parse(path)
        id = []
        data = []
        for node in images.getElementsByTagName(self.tag):
            idNode = node.getElementsByTagName("id")[0].childNodes[0].data
            id.append(idNode)
            dataNode = node.getElementsByTagName("data")[0].childNodes[0].data
            dataNode = dataNode[1:-1].split(',')
            data.append(dataNode)
        id = np.asarray(id)
        id = Series(id)
        data = np.asarray(list(map(conv, data)), dtype=np.float)
        if save:
            if not os.path.exists(Properties.getRootPath()+Properties.getDefaultDataFold() + "/cache/" + self.name):
                # f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w")
                # f.close()
                os.makedirs(Properties.getRootPath()+Properties.getDefaultDataFold() + "/cache/" + self.name)
            np.save(Properties.getRootPath()+Properties.getRootPath() + "/data/cache/" + self.name + "/id.npy", id)
            np.save(Properties.getRootPath()+Properties.getRootPath() + "/data/cache/" + self.name + "/data.npy", data)

        return id, data
示例#2
0
def get_threshold():
    from context.resource_manager import Properties
    from view import shape_view
    from view import plot_utils
    from cluster import density_cluster
    id = np.load(Properties.getRootPath() + "/data/cache/id.npy")
    data = np.load(Properties.getRootPath() + "/data/cache/data.npy")
    image_size = round(math.sqrt(float(data[0].shape[0])))
    #plot_utils.plot_image( data[551], w, w)
    data = density_cluster.binary_array(data)
    # shape_view.pandas_view_record((data))
    import numpy
    import multiprocessing
    threshold = DataFrame([], columns=['H', 'd_c', 'cluster'])
    N = 20
    pool = multiprocessing.Pool(processes=N)
    result = list(range(N))
    log.info("init " + str(N) + " workers")
    for i in range(N):
        pool.apply_async(density_cluster.multi_processing_cluster,
                         (N, i, threshold, id, data))
        # d = numpy.concatenate([c, c], axis=0)
    pool.close()
    pool.join()
    log.debug(threshold)
    if not os.path.exists(Properties.getDefaultDataFold() + "/csv"):
        #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w")
        #f.close()
        os.makedirs(Properties.getDefaultDataFold() + "/csv")
    threshold.to_csv(Properties.getDefaultDataFold() + "/csv/threshold.csv")
示例#3
0
def save(name='default'):
    """
    保存id和data数据
    :return:
    """
    from context.resource_manager import Properties
    from pandas import DataFrame, Series
    path = os.path.join(Properties.getXmlLocation() + name + ".xml")
    from xml.dom.minidom import parse, parseString
    images = parse(path)
    id = []
    data = []
    for node in images.getElementsByTagName("Image"):
        idNode = node.getElementsByTagName("id")[0].childNodes[0].data
        id.append(idNode)
        dataNode = node.getElementsByTagName("data")[0].childNodes[0].data
        dataNode = dataNode[1:-1].split(',')
        data.append(dataNode)
    id = np.asarray(id)
    id = Series(id)
    data = np.asarray(list(map(conv, data)), dtype=np.float)
    if not os.path.exists(Properties.getDefaultDataFold() + "/cache/" + name):
        #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w")
        #f.close()
        os.makedirs(Properties.getDefaultDataFold() + "/cache/" + name)
    np.save(Properties.getRootPath() + "/data/cache/" + name + "/id.npy", id)
    np.save(Properties.getRootPath() + "/data/cache/" + name + "/data.npy",
            data)
示例#4
0
def get_threshold(name='default'):
    """
    计算信息熵
    :return:
    """
    from context.resource_manager import Properties
    from view import shape_view
    from view import plot_utils
    from cluster import density_cluster_demo
    id = np.load(Properties.getRootPath() + "/data/cache/" + name + "/id.npy")
    data = np.load(Properties.getRootPath() + "/data/cache/" + name +
                   "/data.npy")
    #image_size= round(math.sqrt(float(data[0].shape[0])))
    #plot_utils.plot_image( data[551], w, w)
    # data=density_cluster_demo.binary_array(data)
    # shape_view.pandas_view_record((data))
    threshold = density_cluster_demo.cluster(id, data, dataset=name)
    if not os.path.exists(Properties.getDefaultDataFold() + "/csv/" + name +
                          "/" + resource_manager.Properties.name_str_static()):
        #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w")
        #f.close()
        os.makedirs(Properties.getDefaultDataFold() + "/csv/" + name + "/" +
                    resource_manager.Properties.name_str_static())
    threshold.to_csv(Properties.getDefaultDataFold() + "/csv/" + name + "/" +
                     resource_manager.Properties.name_str_static() +
                     "/threshold.csv")
示例#5
0
def get_data_from_xml(path=Properties.getImageXmlResource()):
    """
    解析xml文件,获得对应的id,data,作为运算的基础
    :param path:
    :return: list, NumpArray  用于显示控制与用于计算
    list is used to make the index to location the value
    """
    log.info("starting running compute_distance_from_xml function.")
    from context.resource_manager import Properties
    from pandas import DataFrame, Series
    path = os.path.join(Properties.getRootPath(),
                        Properties.getImageXmlResource())
    from xml.dom.minidom import parse, parseString
    images = parse(path)
    id = []
    data = []
    for node in images.getElementsByTagName("Image"):
        idNode = node.getElementsByTagName("id")[0].childNodes[0].data
        id.append(idNode)
        dataNode = node.getElementsByTagName("data")[0].childNodes[0].data
        dataNode = dataNode[1:-1].split(',')
        data.append(dataNode)
    id = np.asarray(id)
    id = id.tolist()
    data = np.asarray(data)
    data = np.asarray(list(map(_conv, data)), dtype=np.float)
    return id, data
示例#6
0
 def __init__(self,type):
     self.configParse=config_parser_extender.CapitalCaseConfigParser()
     self.type=type
     if type == 'D':
         self.configParse.read(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini')
         # self.f = open(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini', 'w+')
     elif type == 'F':
         self.configParse.read(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'document.ini')
示例#7
0
def save_dataframe_csv(threshold=DataFrame(), name="default", relative=True):
    if not os.path.exists(Properties.getDefaultDataFold() + "/csv/" + name +
                          "/" + resource_manager.Properties.name_str_static()):
        #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w")
        #f.close()
        os.makedirs(Properties.getDefaultDataFold() + "/csv/" + name + "/" +
                    resource_manager.Properties.name_str_static())
    threshold.to_csv(Properties.getDefaultDataFold() + "/csv/" + name + "/" +
                     resource_manager.Properties.name_str_static() +
                     "/threshold.csv")
示例#8
0
 def save(self):
     if self.type == 'D':
         f=open(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini', 'w+')
         self.configParse.write(f)
         f.close()
         # self.f = open(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini', 'w+')
     elif self.type == 'F':
         f=open(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'document.ini', 'w+')
         self.configParse.write(f)
         f.close()
示例#9
0
def compute_distance(data=np.array([])):
    """
    :param data:
    :return:
    the numpy array 按行进行查看
    """
    if data.size <= 0:
        raise Exception
        log.critical(
            "cluster need the data, and there has no data in numpy array collection."
        )
    else:
        log.info("start running compute distance. data count:" +
                 str(data.shape[0]) + " point has to computing.")
    row = data.shape[0]
    result = np.zeros((row, row))
    i = 0
    for i in range(row):
        j = i
        for j in range(row):
            k = compute_point_distance(data[i], data[j])
            result[i][j] = k
            result[j][i] = k
        result[i][i] = np.nan
    np.save(Properties.getRootPath() + "/data/cache/distance/data.npy", result)
    return result
示例#10
0
 def __init__(self, dataset):
     self.dataset = dataset
     try:
         self.recorder_csv = pandas.read_csv(
             Properties.getDefaultDataFold() + "/csv/recorder_csv_" + self.dataset + ".csv", index_col=0)
     except:
         self.recorder_csv = DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distance_c', 'dataset',
                                                    'pile_size', 'H', 'note'])
示例#11
0
def save():
    from context.resource_manager import Properties
    from pandas import DataFrame, Series
    path = os.path.join(Properties.getRootPath(), Properties.getImageXmlResource())
    from xml.dom.minidom import parse, parseString
    images = parse(path)
    id = []
    data = []
    for node in images.getElementsByTagName("Image"):
        idNode = node.getElementsByTagName("id")[0].childNodes[0].data
        id.append(idNode)
        dataNode = node.getElementsByTagName("data")[0].childNodes[0].data
        dataNode = dataNode[1:-1].split(',')
        data.append(dataNode)
    id = np.asarray(id)
    id = Series(id)
    data = np.asarray(list(map(conv, data)), dtype=np.float)
    np.save(Properties.getRootPath() + "/data/cache/id.npy", id)
    np.save(Properties.getRootPath() + "/data/cache/data.npy", data)
示例#12
0
    def loadCacheData(self, name='default', tag="Image", relative=True):
        """
        返回xml数据
        :param name:
        :param tag:
        :param relative:
        :return:
        """

        if relative:
            path = os.path.join(Properties.getRootPath() + "/data/cache/" + name)
        else:
            path = name
        try:
            id = np.load(path + "/id.npy")
            data = np.load(path + "/data.npy")
        except:
            return self.getXmlData(name=name, tag=tag, save=True)
        return id, data
示例#13
0
def record_expriment(name='path', save_name='default'):
    from context.resource_manager import Properties
    from context import resource_manager
    from view import shape_view
    record_img_path = resource_manager.Properties.getDefaultDataFold(
    ) + "result/temp/" + name + "/" + save_name + "/"
    record_csv_path = Properties.getDefaultDataFold(
    ) + "/csv/" + name + "/" + save_name + "/"
    path = resource_manager.Properties.getDefaultDataFold(
    ) + "/result/" + name + "/" + save_name

    if not os.path.exists(record_csv_path):
        # shutil.rmtree(resource_manager.Properties.getDefaultDataFold()+"result/temp/"+save_name+ "/")
        os.makedirs(record_csv_path)
    threshold = pandas.read_csv(record_csv_path + "threshold.csv")

    save_plot(name, threshold, save_name)

    log.debug(threshold['cluster'].sort_values(ascending=False))
    shutil.copytree(record_img_path, path)
    shutil.copy(record_csv_path + "threshold.csv", path)
    log.warn("finished")
示例#14
0
 def setValue(self, row, columns, value):
     self.recorder_csv.set_value(row, columns, value)
     self.recorder_csv.set_value(row, 'end', Properties.name_str_FULL())
示例#15
0
def save_show_cluster(index_id, data, distance_c, pile_id, dataset="/", level="INFO", level_info='scatter figure'):
    from view import plot_utils
    from context import resource_manager
    path = resource_manager.Properties.getDefaultDataFold() + "result" + resource_manager.getSeparator() + "temp/" + dataset + "/" + resource_manager.Properties.name_str_static() + "/"

    level_path = resource_manager.Properties.getDefaultDataFold() + "result" + resource_manager.getSeparator() + "temp/" + level + "/" + resource_manager.Properties.name_str_static() + "/" + str(
        distance_c) + "/"

    if not os.path.exists(path[:path.rfind('/')]):
        os.makedirs(path[:path.rfind('/')])
    if not os.path.exists(level_path[:level_path.rfind('/')]):
        os.makedirs(level_path[:level_path.rfind('/')])

    pile_id = pile_id.sort_values('size', ascending=False)
    x = []
    y = []
    label = []
    i = 1
    for m in range(len(pile_id)):
        # l=pile_id.irow(m)['pile']
        l = pile_id.iloc[m]['pile']
        # size=pile_id.irow(m)['size']
        size = pile_id.iloc[m]['size']

        if pile_id.loc[m]['outlier'] is False:
            for node in l:
                index = index_id[node]
                x.append(data[index][0])
                y.append(data[index][1])
                label.append(i)
            i = i + 1
        else:
            for node in l:
                index = index_id[node]
                x.append(data[index][0])
                y.append(data[index][1])
                label.append(0)
    if level is "SEE":
        plot_utils.plot_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title=level_info, label=label)
    if level is "DEBUG":
        # plot_utils.save_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label,path=level_path+resource_manager.Properties.name_str_FULL()+".png")

        plot_utils.save_all_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title=level_info, label=label,
                                            path=level_path + resource_manager.Properties.name_str_FULL() + ".png")
    else:
        plot_utils.save_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label,
                                        path=path + str(
                                            distance_c) + ".png")
        plot_utils.save_all_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure',
                                            label=label,
                                            path=path + str(
                                                distance_c) + ".png")

    # plot_utils.plot_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label)
    log.debug(("\n") + str(pile_id))
    try:
        p = Properties.getDefaultDataFold() + "/csv/" + dataset + "/" + resource_manager.Properties.name_str_static() + "/" + str(
            distance_c) + ".csv"
        pile_id.to_csv(p)
    except:
        if not os.path.exists(p[:p.rfind('/')]):
            pp = p.rfind('/')
            os.makedirs(p[:pp])
        os.mknod(p)
        pile_id.to_csv(p)
示例#16
0
def load_data():
    id = np.load(Properties.getRootPath() + "/data/cache/flame/id.npy")
    data = np.load(Properties.getRootPath() + "/data/cache/flame/data.npy")

    return id, data
示例#17
0
def ent_dc_step_by_step(id_index,
                        index_id,
                        data,
                        threshold,
                        distance,
                        distance_c,
                        dataset='none'):
    """
    :param id_index:
    :param index_id:
    :param data:
    :param threshold:
    :param distance:
    :param distance_c:
    :return:
    """
    # TODO modify the clustering without outlier
    i = 0
    N = int(index_id.shape[0])
    # next_distance_c=get_next_distance_c(distance,distance_c)
    max_distance_c = max_distance(distance, distance_c)
    learning_rate = 0
    gradient = 0.00001
    jarge_now = 0
    jarge_pre = 5
    pre = 65535

    # 方差步长
    temp = distance.copy()
    temp[np.isnan(temp)] = 0
    stand = np.std(temp)
    temp = distance.copy()
    temp[np.isnan(temp)] = stand
    temp = temp.min(axis=0)
    next_distance_c = np.std(temp)

    clusterRecorder = ClusterRecorder(dataset)
    cr_i = str(Properties.name_str_static() + "#" + str(i))

    #DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distance_c', 'dataset', 'pile_size', 'H','note'])

    clusterRecorder.setValue(str(cr_i), 'id', Properties.name_str_static())
    clusterRecorder.setValue(str(cr_i), 'start', Properties.name_str_HMS())
    clusterRecorder.setValue(str(cr_i), 'd_c', distance_c)
    clusterRecorder.setValue(str(cr_i), 'max_distance_c', max_distance_c)
    clusterRecorder.setValue(str(cr_i), 'dataset', dataset)
    clusterRecorder.setValue(str(cr_i), 'pile_size', N)
    clusterRecorder.setValue(str(cr_i), 'H', 65535)
    clusterRecorder.setValue(str(cr_i), 'note', '整个算法运行时间')
    if learning_rate != 0:
        distance_c = distance_c + learning_rate
    log.debug("init the first max distance_c:" + str(max_distance_c) +
              " distance shape:" + str(distance.shape))
    start_time = Properties.name_str_HMS()

    while max_distance_c >= distance_c:
        i = i + 1
        last_time = Properties.name_str_HMS()
        # pile = 0
        # 设置pile的pile元素,与pile的类成员个数
        pile_id = DataFrame([], columns=['p_id', 'pile', 'size', 'outlier'])
        # delta_id, data_id = delta_function(id_index, index_id, rho_id, distance)
        # data = DataFrame([], columns=['gamma', 'rho', 'delta', 'pile'], index=index_id.index)
        data_id = DataFrame([],
                            columns=[
                                'i_id', 'j_id', 'rho', 'delta', 'gamma', 'i',
                                'j', 'pile'
                            ],
                            index=id_index.values)
        pile_id = pile_function(pile_id, id_index, index_id, data_id, distance,
                                distance_c, next_distance_c)
        pile_size = pile_id['size']
        pile = pile_id.shape[0] - np.sum(pile_id['outlier'])
        # id_index, index_id
        e = _calc_ent(pile_size.values / N)
        merge = list([e, distance_c, pile])
        threshold = add_row(threshold, merge)
        jarge_now = pre - e
        # if jarge_now > jarge_pre:
        if jarge_now > 0:
            cr_j = str(Properties.name_str_static() + "#" + str(i))
            #DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distanc', 'dataset', 'pile_size', 'H','note'])
            clusterRecorder.setValue(str(cr_j), 'id',
                                     Properties.name_str_static())
            clusterRecorder.setValue(str(cr_j), 'start', last_time)
            clusterRecorder.setValue(str(cr_j), 'd_c', distance_c)
            clusterRecorder.setValue(str(cr_j), 'max_distance_c',
                                     max_distance_c)
            clusterRecorder.setValue(str(cr_j), 'dataset', dataset)
            clusterRecorder.setValue(str(cr_j), 'pile_size', len(pile_id))
            clusterRecorder.setValue(str(cr_j), 'H', e)
            clusterRecorder.setValue(str(cr_j), 'note', '发现新下降时间')
            clusterRecorder.setValue(str(cr_j), 'end',
                                     Properties.name_str_HMS())

            save_show_cluster(index_id, data, distance_c, pile_id)

            cr_j = str(Properties.name_str_static() + "#" + str(i))
            #DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distanc', 'dataset', 'pile_size', 'H','note'])
            clusterRecorder.setValue(str(cr_j), 'id',
                                     Properties.name_str_static())
            clusterRecorder.setValue(str(cr_j), 'start', start_time)
            clusterRecorder.setValue(str(cr_j), 'd_c', distance_c)
            clusterRecorder.setValue(str(cr_j), 'max_distance_c',
                                     max_distance_c)
            clusterRecorder.setValue(str(cr_j), 'dataset', dataset)
            clusterRecorder.setValue(str(cr_j), 'pile_size', len(pile_id))
            clusterRecorder.setValue(str(cr_j), 'H', e)
            clusterRecorder.setValue(str(cr_j), 'note', '发现新下降时间')
            clusterRecorder.setValue(str(cr_j), 'end',
                                     Properties.name_str_HMS())
            save_show_cluster(index_id, data, distance_c, pile_id)
            start_time = Properties.name_str_HMS()

        pre = e
        # jarge_now = jarge_now + 1
        # jarge_pre = jarge_now
        # next_distance_c = get_next_distance_c(distance, distance_c)
        # next_distance_c = 0
        distance_c = distance_c + next_distance_c

        # if gradient==0.00005:
        #
        #     distance_c = distance_c + gradient
        #     gradient = gradient + 0.00001
        # elif learning_rate != 0:
        #     distance_c = distance_c + learning_rate
        #     gradient = 0.00001
        # else:
        #     distance_c = distance_c + next_distance_c
        #     gradient = 0.00001

        # distance_c = distance_c + learning_rate
        if e == 0:
            log.debug("e is 0.")
            break

        log.info(
            str(i) + " time, finished the next_distance_c about: " +
            str(next_distance_c) + " distance_c:" + str(distance_c) +
            " next-learning_rate:" + str(learning_rate) + " H:" + str(e))
    clusterRecorder.setValue(str(cr_i), 'end', Properties.name_str_HMS())
    clusterRecorder.save()
    log.debug(threshold)
    return threshold
示例#18
0
    rho_id=Series(rho,index=id)
    log.critical(rho)
    """
    from cluster import density_cluster
    from pandas import Series
    from pandas import Series, DataFrame
    from context.resource_manager import Properties
    from view import shape_view
    from cluster import density_cluster

    name = 'path'
    distance_c = 12.3972318748
    m = '3_44'
    pile = 0
    id = np.load(Properties.getRootPath() + "/data/cache/" + name + "/id.npy")
    data = np.load(Properties.getRootPath() + "/data/cache/" + name + "/data.npy")
    id_index = Series(id.tolist())
    from cluster import density_cluster
    index_id = Series(id_index.index, index=id_index.values)
    distance = density_cluster.compute_distance(data)
    pile_id = DataFrame([], columns=['pile', 'size'])
    rho_id = density_cluster.rho_function(index_id, distance, distance_c=distance_c)
    rho_id = Series(rho_id, index=index_id.index)
    rho_id = rho_id.sort_values(ascending=False)
    #delta_id, data_id = density_cluster.delta_function(id_index, index_id, rho_id, distance)
    log.debug(rho_id)
    pile=['3_44']
    pile_max=14
    pile = density_cluster.pile_brother(index_id, id_index, distance, distance_c, pile,pile_max)
    log.debug("pile info:")
示例#19
0
def save_show_cluster(index_id, data, distance_c, pile_id):
    from view import plot_utils
    from context import resource_manager
    path = resource_manager.Properties.getDefaultDataFold(
    ) + "result" + resource_manager.getSeparator(
    ) + "temp" + resource_manager.getSeparator(
    ) + resource_manager.Properties.name_str_static(
    ) + resource_manager.getSeparator() + str(distance_c) + ".png"
    if not os.path.exists(resource_manager.Properties.getDefaultDataFold() +
                          "result" + resource_manager.getSeparator() + "temp" +
                          resource_manager.getSeparator() +
                          resource_manager.Properties.name_str_static() +
                          resource_manager.getSeparator()):
        os.makedirs(resource_manager.Properties.getDefaultDataFold() +
                    "result" + resource_manager.getSeparator() + "temp" +
                    resource_manager.getSeparator() +
                    resource_manager.Properties.name_str_static() +
                    resource_manager.getSeparator())
    pile_id = pile_id.sort_values('size', ascending=False)
    x = []
    y = []
    label = []
    i = 1
    for m in range(len(pile_id)):
        # l=pile_id.irow(m)['pile']
        l = pile_id.iloc[m]['pile']
        # size=pile_id.irow(m)['size']
        size = pile_id.iloc[m]['size']
        if size >= 1 and i < 15:
            for node in l:
                index = index_id[node]
                x.append(data[index][0])
                y.append(data[index][1])
                label.append(i)
            i = i + 1
        else:
            for node in l:
                index = index_id[node]
                x.append(data[index][0])
                y.append(data[index][1])
                label.append(0)
    plot_utils.save_scatter_diagram(None,
                                    x=x,
                                    y=y,
                                    x_label='x',
                                    y_label='y',
                                    title='scatter figure',
                                    label=label,
                                    path=path)
    # plot_utils.plot_scatter_diagram(None, x=x, y=y, x_label='x', y_label='y', title='scatter figure', label=label)
    log.debug(pile_id)
    try:
        p = Properties.getDefaultDataFold(
        ) + "/csv/" + resource_manager.Properties.name_str_static(
        ) + "/" + str(distance_c) + ".csv"
        pile_id.to_csv(p)
    except:
        if not os.path.exists(p):
            pp = p.rfind('/')
            os.makedirs(p[:pp])
        os.mknod(p)
        pile_id.to_csv(p)
示例#20
0
 def reload(self):
     if type == 'D':
         self.configParse.read(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini')
         # self.f = open(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini', 'w+')
     elif type == 'F':
         self.configParse.read(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'document.ini')
示例#21
0
 def save(self):
     self.recorder_csv.to_csv(Properties.getDefaultDataFold() + "/csv/recorder_csv_" + self.dataset + ".csv")
示例#22
0
    def removeConfigFiles(cls):
        if os.path.isfile(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini'):
            os.remove(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'directory.ini')

        if os.path.isfile(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'document.ini'):
            os.remove(Properties.getRootPath()+'conf' + resource_manager.getSeparator() + 'document.ini')
示例#23
0
def ent_dc_step_by_step(id_index, index_id, data, threshold, distance, distance_c, dataset='none'):
    """
    :param id_index:
    :param index_id:
    :param data:
    :param threshold:
    :param distance:
    :param distance_c:
    :return:
    """
    # TODO modify the clustering without outlier
    i = 0
    level = "INFO"
    N = int(index_id.shape[0])
    # next_distance_c=get_next_distance_c(distance,distance_c)
    max_distance_c = max_distance(distance, distance_c)
    learning_rate = 0
    gradient = 0.00001
    jarge_now = 0
    jarge_pre = 5
    pre = 65535


    # 方差步长
    temp = distance.copy()
    # cache=temp.ravel()

    # percent = 0.2
    # position = int(index_id.shape[0] * (index_id.shape[0] + 1) / 2 * percent / 100)
    # log.debug("init the first max distance_c:" + str(max_distance_c) + " distance shape:" + str(distance.shape)+" start:"+str(sorted(cache)[position * 2 + index_id.shape[0]]))
    # distance_c = sorted(cache)[position * 2 + index_id.shape[0]]

    temp[np.isnan(temp)] = 0
    stand = np.std(temp)
    temp = distance.copy()

    temp[np.isnan(temp)] = stand

    temp = temp.min(axis=0)
    next_distance_c = np.std(temp)

    clusterRecorder = ClusterRecorder(dataset)
    cr_i = str(Properties.name_str_static() + "#" + str(i))

    # DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distance_c', 'dataset', 'pile_size', 'H','note'])

    clusterRecorder.setValue(str(cr_i), 'id', Properties.name_str_static())
    clusterRecorder.setValue(str(cr_i), 'start', Properties.name_str_HMS())
    clusterRecorder.setValue(str(cr_i), 'd_c', distance_c)
    clusterRecorder.setValue(str(cr_i), 'max_distance_c', max_distance_c)
    clusterRecorder.setValue(str(cr_i), 'dataset', dataset)
    clusterRecorder.setValue(str(cr_i), 'pile_size', N)
    clusterRecorder.setValue(str(cr_i), 'H', 65535)
    clusterRecorder.setValue(str(cr_i), 'note', '整个算法运行时间')
    if learning_rate != 0:
        distance_c = distance_c + learning_rate

    start_time = Properties.name_str_HMS()

    while max_distance_c >= distance_c:
        i = i + 1
        last_time = Properties.name_str_HMS()
        # pile = 0
        # 设置pile的pile元素,与pile的类成员个数
        pile_id = DataFrame([], columns=['p_id', 'pile', 'size', 'outlier'])
        # delta_id, data_id = delta_function(id_index, index_id, rho_id, distance)
        # data = DataFrame([], columns=['gamma', 'rho', 'delta', 'pile'], index=index_id.index)
        data_id = DataFrame([], columns=['i_id', 'j_id', 'rho', 'delta', 'gamma', 'i', 'j', 'pile'],
                            index=id_index.values)
        pile_id = pile_function(pile_id, id_index, index_id, data, distance, distance_c, next_distance_c, dataset)
        pile_size = pile_id['size']
        pile = pile_id.shape[0] - np.sum(pile_id['outlier'])
        # id_index, index_id

        e=[]
        e_outlier=0
        #log.fatal(pile_size.values)


        pile_id = pile_id.sort_values('size', ascending=False)
        pile_id = pile_id.reset_index(drop=True)
        for i in range(0,len(pile_id)):
            if not pile_id.loc[i,'outlier']:
                e.append(pile_id.loc[i,'size'])
            else:
                e_outlier+=pile_id.loc[i,'size']
            if e_outlier>0:
                e.append(e_outlier)

        ee=np.array(e)
        e = _calc_ent(ee / N)
        merge = list([e, distance_c, pile])
        threshold = add_row(threshold, merge)
        jarge_now = pre - e
        # if jarge_now > jarge_pre:
        if jarge_now > 0:
            cr_j = str(Properties.name_str_static() + "#" + str(i))
            # DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distanc', 'dataset', 'pile_size', 'H','note'])
            clusterRecorder.setValue(str(cr_j), 'id', Properties.name_str_static())
            clusterRecorder.setValue(str(cr_j), 'start', last_time)
            clusterRecorder.setValue(str(cr_j), 'd_c', distance_c)
            clusterRecorder.setValue(str(cr_j), 'max_distance_c', max_distance_c)
            clusterRecorder.setValue(str(cr_j), 'dataset', dataset)
            clusterRecorder.setValue(str(cr_j), 'pile_size', len(pile_id))
            clusterRecorder.setValue(str(cr_j), 'H', e)
            clusterRecorder.setValue(str(cr_j), 'note', '发现新下降时间')
            clusterRecorder.setValue(str(cr_j), 'end', Properties.name_str_HMS())

            save_show_cluster(index_id, data, distance_c, pile_id, dataset)

            cr_j = str(Properties.name_str_static() + "#" + str(i))
            # DataFrame([], columns=['id', 'start', 'end', 'd_c', 'max_distanc', 'dataset', 'pile_size', 'H','note'])
            clusterRecorder.setValue(str(cr_j), 'id', Properties.name_str_static())
            clusterRecorder.setValue(str(cr_j), 'start', start_time)
            clusterRecorder.setValue(str(cr_j), 'd_c', distance_c)
            clusterRecorder.setValue(str(cr_j), 'max_distance_c', max_distance_c)
            clusterRecorder.setValue(str(cr_j), 'dataset', dataset)
            clusterRecorder.setValue(str(cr_j), 'pile_size', len(pile_id))
            clusterRecorder.setValue(str(cr_j), 'H', e)
            clusterRecorder.setValue(str(cr_j), 'note', '发现新下降时间')
            clusterRecorder.setValue(str(cr_j), 'end', Properties.name_str_HMS())
            save_show_cluster(index_id, data, distance_c, pile_id, dataset)
            start_time = Properties.name_str_HMS()
            clusterRecorder.save()
            if level is "DEBUG":
                pile_id = pile_function(pile_id, id_index, index_id, data, distance, distance_c - next_distance_c,
                                        next_distance_c, dataset, level="DEBUG")
                save_show_cluster(index_id, data, distance_c, pile_id, dataset, level="DEBUG")



        pre = e
        # jarge_now = jarge_now + 1
        # jarge_pre = jarge_now
        # next_distance_c = get_next_distance_c(distance, distance_c)
        # next_distance_c = 0


        distance_c = distance_c + next_distance_c

        # if gradient==0.00005:
        #
        #     distance_c = distance_c + gradient
        #     gradient = gradient + 0.00001
        # elif learning_rate != 0:
        #     distance_c = distance_c + learning_rate
        #     gradient = 0.00001
        # else:
        #     distance_c = distance_c + next_distance_c
        #     gradient = 0.00001

        # distance_c = distance_c + learning_rate
        if e == 0:
            log.debug("e is 0.")
            break

        log.info(
            str(i) + " time, finished the next_distance_c about: " + str(next_distance_c) + " distance_c:" + str(
                distance_c) + " next-learning_rate:" + str(learning_rate) + " H:" + str(e))
    clusterRecorder.setValue(str(cr_i), 'end', Properties.name_str_HMS())
    clusterRecorder.save()
    log.debug(threshold)
    return threshold
示例#24
0
    pool.join()
    log.debug(threshold)
    if not os.path.exists(Properties.getDefaultDataFold() + "/csv"):
        #f=open(Properties.getDefaultDataFold()+"/csv/threshold.csv","w")
        #f.close()
        os.makedirs(Properties.getDefaultDataFold() + "/csv")
    threshold.to_csv(Properties.getDefaultDataFold() + "/csv/threshold.csv")


if __name__ == '__main__':
    get_threshold()
    from context.resource_manager import Properties
    from view import shape_view
    from view import plot_utils
    from cluster import density_cluster
    threshold = pandas.read_csv(Properties.getDefaultDataFold() +
                                "/csv/threshold.csv")
    d_c = np.asarray(threshold['d_c'].values)
    log.debug(d_c)
    log.critical(type(d_c))
    plot_utils.plot_scatter_diagram(None,
                                    x=d_c,
                                    y=threshold['H'].values,
                                    x_label='delta',
                                    y_label='H',
                                    title='threshold scatter figure')
    """
    delta_index=Series(id,index=id,dtype=np.float)

    i=0
    order_id=Series(result[:,0],index=id_index.values)