Пример #1
0
def load_data():
    """加载数据
    return: list, bolls
    """
    l = []
    code = MyCode.get()
    codes = [code]
    for code in codes:
        datas = jbs.getData(code)   # from local
        upper, middle, lower, df, adx = datas
        if len(df) < 100:
            continue
        for index in range(100, len(df) - jbs.g_scope_len):
            l.append(getBolls(index, datas))
    return l
Пример #2
0
def myhclust(datas, indexs):
    """尝试层次聚类, 因为大样本会造成计算速度过慢, 因此不能2000的切片
    datas: list or np.ndarray 数据源
    indexs: array 索引, 按顺序的索引列表,不支持打乱
    return: df 中心点
    """
    offset = indexs[0]
    #l = load_data()
    l = datas
    print("num=%d, total_num = %d"%(len(indexs), len(l)))
    l = l[indexs[0]:indexs[-1]]
    indexs = range(indexs[-1]-indexs[0])
    assert(len(indexs) == len(l))

    #原始数据集
    code = MyCode.get()
    tick_period = jbs.g_scope_len    
    df_datas = jbs.getData(code)[3]

    #写入本地img中
    fname = 'img_labels/hclust_imgs'
    #agl.removeDir(fname)
    #agl.createDir(fname)
    #imlist = []
    ##for i in range(len(indexs)):
    #for i in indexs:
        #fname1 =fname + '/img_%s.png'%(i)
        ##pl.figure
        ##draw(g_list[i])
        ##pl.savefig(fname1)
        ##pl.close()
        #imlist.append(fname1)
    
    #df = pd.DataFrame(g_report)
    # 转换一下数据
    features = [np.array([i]) for i in indexs]
    #features = indexs
        
    #把距离值放入一个id为key的字典
    #dict_distance = {}
    #for v in g_report:
        #ni,nj = v[2:4]
        #dict_distance[ni,nj] = (v[0]+v[1])/2
        
    #在100样本下, 使用0.2比较合适
    tree , distances = hcluster.hcluster(features, l, distfcn=distfn)
    clusters = tree.extract_clusters(0.2 * tree.distance)
    center_indexs = []        # 保存中心点       
    center_indexs_columns = ['datas_index', 'code', 'dt', 'tick_period',  'clust_id', 'label_id', 'label_desc']
    print('clusters num=', len(clusters))
    def ShowResult():
        img_dir = 'html/'
        agl.removeDir(img_dir)
        for j,c in enumerate(clusters):
            elements = c.get_cluster_elements()
            nbr_elements = len(elements)
            center_index = calc_center(elements, distances) + offset
            
            #添加的字段
            dt = df_datas.index[center_index]
            clust_id = j
            label_id = 0
            label_desc = ''
            
            center_indexs.append([center_index, code, dt, tick_period, clust_id, label_id, label_desc])
            print(j, center_index, len(elements),elements)
            #if nbr_elements >= 6:
                #draw_multi(l, j, offset, elements, center_index)
    
    #hcluster.draw_dendrogram(tree,imlist,filename='./sunset.png')                
  
    ShowResult()
    df = pd.DataFrame(center_indexs, columns=center_indexs_columns)
    print(df)
    return df
Пример #3
0
 def test():
     code = '000001'
     data = getData(code)
     obj = boll_judge_score(data)
     obj._show()