예제 #1
0
def TestA():
    df1 = util.LoadData('stock_signal',
                        '2019-12-21',
                        condition={'操作': 1},
                        sort=[('百分比', -1)])
    df2 = util.LoadData('stock_statistcs',
                        'dvYears',
                        condition={},
                        sort=[('百分比', -1)])

    mergeData = df1.join(df2, how='left', rsuffix='right')
    codes = []
    for code, row in mergeData.iterrows():
        codes.append(code)
    df3 = util.LoadData2('stock_backtest', 'all_dv1_digest', codes)
    df4 = util.LoadData2('stock_statistcs', 'quarterSpeed', codes)
    df5 = util.LoadData2('stock', 'stock_list', codes)
    df6 = util.LastPriceNone(codes)
    df5 = df5[['所属行业', '地区', '总股本(亿)']]
    mergeData = mergeData.join(df3, how='left', rsuffix='right2')
    mergeData = mergeData.join(df4, how='left', rsuffix='right3')
    mergeData = mergeData.join(df5, how='left', rsuffix='right4')
    mergeData = mergeData.join(df6, how='left', rsuffix='right5')
    mergeData['总市值(亿)'] = mergeData['price'] * mergeData['总股本(亿)']
    # mergeData.eval('总市值 = price * 总股本(亿)', inplace=True)
    mergeData.to_excel("c:/workspace/tmp/1224.xlsx")
    return mergeData
예제 #2
0
    def Run(codes):
        out = []
        for one in codes:
            try:
                baseCounter = 0
                hitCounter = 0
                firstQuarter = None
                lastQuarter = None
                baseCounter2010 = 0
                hitCounter2010 = 0
                firstQuarter2010 = None
                lastQuarter2010 = None
                df = util.LoadData('stock',
                                   'yjbg2-' + one['_id'],
                                   condition={},
                                   sort=[('_id', 1)])
                for quarter, row in df.iterrows():
                    id = datetime.strptime(quarter, '%Y-%m-%d')
                    value = util.String2Number(row['sjltz'])
                    if not np.isnan(value):
                        if firstQuarter is None:
                            firstQuarter = id
                        lastQuarter = id
                        baseCounter += 1
                        if id.year >= 2010:
                            if firstQuarter2010 is None:
                                firstQuarter2010 = id
                            lastQuarter2010 = id
                            baseCounter2010 += 1
                        if value < -10:
                            hitCounter += 1
                            if id.year >= 2010:
                                hitCounter2010 += 1

                percent = 0
                percent2010 = 0
                if baseCounter > 0:
                    percent = hitCounter / baseCounter
                if baseCounter2010 > 0:
                    percent2010 = hitCounter2010 / baseCounter2010
                out.append({
                    '_id': one['_id'],
                    'begin': firstQuarter,
                    'end': lastQuarter,
                    'base': baseCounter,
                    'hit': hitCounter,
                    'percent': percent,
                    'begin2010': firstQuarter2010,
                    'end2010': lastQuarter2010,
                    'base2010': baseCounter2010,
                    'hit2010': hitCounter2010,
                    'percent2010': percent2010
                })
            except Exception as e:
                util.PrintException(e)
        dfOut = pd.DataFrame(out)
        util.SaveMongoDB_DF(dfOut, 'stock_statistics2',
                            'dangerousQuarterRatio')
예제 #3
0
def mirs(args):
    parentFolder = args.dir if args.dir[-1] == "/" else args.dir + "/"
    primInfo, aInfo, rmInfo = util.LoadData(parentFolder, False)
    myRetriever = retriever.Retriever(
        primInfo, args.sortAlg, args.amountToShow, args.verbose, aInfo, rmInfo
    )
    if args.topo is not None:
        myRetriever.PrintTopo(args.topo, args.regex, args.regexneg)
    if args.queryTokens != []:
        myRetriever.PrintSelect(args.queryTokens)
예제 #4
0
 def __init__(self, parentFolder, fixedEncodeFilename, verbose, showToken):
     super().__init__(parentFolder, fixedEncodeFilename, verbose, showToken)
     princ = util.LoadData(parentFolder, True)
     if princ is None:
         print("Indexamento principal nao encontrado. Abortando...")
         exit(1)
     self.mainFiles = princ[util.LISTFILES]
     encs = princ[util.ENCODINGS]
     self.mainFilesModified = [
         encs[i]["modificado"] for i in range(len(self.mainFiles))
     ]
예제 #5
0
def LastSignal2File(collectionName, path):
    df = util.LoadData('stock_backtest',
                       collectionName,
                       condition={'priceFromDate': {
                           "$exists": True
                       }},
                       sort=[('priceBuy', 1)])
    # db.getCollection('all_dv3').find({'priceFromDate': {"$exists": true}},
    #                                  {'_id': 1, 'name': 1, 'priceBuy': 1, 'priceSell': 1}).sort({'priceBuy': 1})
    df.loc[:, [
        '_id', 'name', 'priceBuy', 'priceFrom', 'priceSell', 'priceFromDate',
        'priceWhere'
    ]].to_excel(path)
예제 #6
0
  def Show(codes):
    out = []
    for one in codes:
      try:
        df = util.LoadData('stock_statistics2', 'dangerousQuarterRatio', condition={'_id': one['_id']})
        df['name'] = one['name']
        out.append(df)
      except Exception as e:
        print(e)

    dfAll = out[0]
    for index in range(1, len(out)):
      dfAll = dfAll.append(out[index])
    print(dfAll)
예제 #7
0
def CalcHoldTime(stockList, collectionName, toDBName, beginDate, endDate):
    utcTZ = timezone('UTC')
    dfIndex = pd.date_range(start=beginDate, end=endDate, freq='M')
    df = pd.DataFrame(np.random.randn(len(dfIndex)),
                      index=dfIndex,
                      columns=['willDrop'])
    df = pd.concat([df, pd.DataFrame(columns=[
        'number',
        'stockList',
    ])],
                   sort=False)
    df.drop([
        'willDrop',
    ], axis=1, inplace=True)

    result = {}
    for date, row in df.iterrows():
        result[date] = row.to_dict()
        result[date]['number'] = 0
        result[date]['stockList'] = set()

    codes = []
    for one in stockList:
        codes.append(one['_id'])
    condition = {'_id': {'$in': codes}}
    df2 = util.LoadData('stock_backtest', collectionName, condition)
    for code, row in df2.iterrows():
        name = row['name']
        oneList = row['holdStockDateVec']
        for datePair in oneList:
            startDate = datetime(datePair[0].year,
                                 datePair[0].month,
                                 datePair[0].day,
                                 tzinfo=utcTZ)
            endDate = datetime(datePair[1].year,
                               datePair[1].month,
                               datePair[1].day,
                               tzinfo=utcTZ)
            print(startDate)
            tmp = pd.date_range(start=startDate, end=endDate, freq='M')
            print(tmp)
            for oneDate in tmp:
                if oneDate in result:
                    result[oneDate]['number'] += 1
                    result[oneDate]['stockList'].add(name)

    for k, v in result.items():
        result[k]['stockList'] = list(result[k]['stockList'])
    util.SaveMongoDBDict(result, 'stock_hold', toDBName)
예제 #8
0
    def Run(codes):
        #计算全财报的,计算2010年作为开始年份的
        for one in codes:
            try:
                out = []
                # out2010 = []
                beforeSJLTZ = 0
                beforeContinuityTrend = 0
                # beforeSJLTZ2010 = 0
                beforeContinuityTrend2010 = 0
                df = util.LoadData('stock',
                                   'yjbg-' + one,
                                   condition={},
                                   sort=[('_id', 1)])
                for quarter, row in df.iterrows():
                    id = datetime.strptime(quarter, '%Y-%m-%d')
                    value = util.String2Number(row['sjltz'])
                    if not np.isnan(value):
                        nowTrend = None
                        #这个条件不过分,一个公司即使业务完全停止增长,考虑到通胀也该在数值上是增长的
                        if value < 0:
                            nowTrend = -1
                            # nowTrend2010 = -1
                        elif value - beforeSJLTZ < 0:
                            nowTrend = -0.5
                            # nowTrend2010 = -0.5
                        else:
                            nowTrend = 1
                            # nowTrend2010 = 1
                        beforeSJLTZ = value
                        nowContinuityTrend = beforeContinuityTrend + nowTrend
                        beforeContinuityTrend = nowContinuityTrend
                        out.append({
                            '_id': id,
                            'nowPMT': nowTrend,
                            'continuityPMT': nowContinuityTrend
                        })
                        if id.year >= 2010:
                            nowContinuityTrend2010 = beforeContinuityTrend2010 + nowTrend
                            beforeContinuityTrend2010 = nowContinuityTrend2010
                            out[-1].update({
                                'continuityPMTFrom2010':
                                nowContinuityTrend2010
                            })

                dfOut = pd.DataFrame(out)
                util.SaveMongoDB_DF(dfOut, 'stock_statistics', one)
            except Exception as e:
                print(e)
예제 #9
0
def TestB():
    df1 = util.LoadData('stock_hold',
                        'dv1',
                        condition={'diff': {
                            '$lt': 0.1
                        }},
                        sort=[('diff', 1)])
    df2 = util.LoadData('stock_statistcs',
                        'dvYears',
                        condition={},
                        sort=[('百分比', -1)])

    mergeData = df1.join(df2, how='left', rsuffix='right')
    codes = []
    for code, row in mergeData.iterrows():
        codes.append(code)
    df3 = util.LoadData2('stock_backtest', 'all_dv1', codes)
    df4 = util.LoadData2('stock_statistcs', 'quarterSpeed', codes)
    df5 = util.LoadData2('stock', 'stock_list', codes)
    df6 = util.LastPriceNone(codes)
    df5 = df5[['所属行业', '地区', '总股本(亿)']]
    mergeData = mergeData.join(df3, how='left', rsuffix='right2')
    mergeData = mergeData.join(df4, how='left', rsuffix='right3')
    mergeData = mergeData.join(df5, how='left', rsuffix='right4')
    mergeData = mergeData.join(df6, how='left', rsuffix='right5')
    mergeData['markValue'] = mergeData['price'] * mergeData['总股本(亿)']
    mergeData.query('first > 0 and second > 0 and third > 0', inplace=True)
    mergeData.query('markValue >= 50', inplace=True)
    mergeData = mergeData[[
        'name', 'diff', 'percent', '统计年数', '分红年数', 'maxValue:value', '所属行业',
        '地区', 'markValue'
    ]]

    # mergeData.eval('总市值 = price * 总股本(亿)', inplace=True)
    mergeData.to_excel("c:/workspace/tmp/1222-3.xlsx")
    return mergeData
예제 #10
0
  def Show(codes):
    out = []
    for one in codes:
      try:
        df = util.LoadData('stock_statistics', one['_id'], condition={}, sort=[('_id', -1)], limit=1)
        df['_id'] = one['_id']
        df['name'] = one['name']
        df['date'] = df.index
        df.set_index('_id', inplace=True)
        out.append(df)
      except Exception as e:
        print(e)

    dfAll = out[0]
    for index in range(1, len(out)):
      dfAll = dfAll.append(out[index])
    print(dfAll)
예제 #11
0
def TimeWarpAnnotations(data_root_path, sample_rate=1):
   max_warp_seconds = 3
   hz_suffix = str(sample_rate)+'hz'

   data_dict = util.LoadData(data_root_path, sample_rate)
   for task in data_dict.keys():
      # Apply DTW
      anno_df = data_dict[task]['raw']['annotations']
      ot_df = data_dict[task]['raw']['objective_truth']
      dtw_anno_df = agree.DTWReference(anno_df.iloc[:,1:], ot_df.iloc[:,1], max_warp_distance=max_warp_seconds*sample_rate)

      # Output DTW annotations
      dtw_output_task_path = os.path.join(data_root_path, task, 'annotations_'+hz_suffix+'_dtw_aligned')
      if not os.path.isdir(dtw_output_task_path):
         os.makedirs(dtw_output_task_path)

      for anno_col in dtw_anno_df.columns:
         output_task_file = os.path.join(dtw_output_task_path, task+'_'+anno_col+'.csv')
         dtw_single_anno_df = pd.concat((anno_df.iloc[:,0], dtw_anno_df[anno_col]), axis=1)
         dtw_single_anno_df.to_csv(output_task_file, index=False, header=True)

      # Plot
      num_annos = len(anno_df.columns)-1
      subplot_rows = int(math.floor(math.sqrt(num_annos)))
      subplot_cols = math.ceil(float(num_annos)/subplot_rows)
      fig, axs = plt.subplots(subplot_rows, subplot_cols)
      for anno_idx in range(len(anno_df.columns)-1):
         anno_col = anno_df.columns[anno_idx+1] # Skip time col
         anno = anno_df[anno_col]
         ot = ot_df.iloc[:,1]
         axs_col = anno_idx%subplot_cols
         axs_row = int(math.floor(anno_idx/subplot_cols))
         axs[axs_row, axs_col].plot(ot_df.iloc[:,0], ot_df.iloc[:,1], 'm-')
         axs[axs_row, axs_col].plot(ot_df.iloc[:,0], dtw_anno_df.iloc[:,anno_idx], 'k-')
         axs[axs_row, axs_col].plot(ot_df.iloc[:,0], anno_df[anno_col].values, 'b--')
         axs[axs_row, axs_col].title.set_text(anno_col)
      fig.suptitle(task + '  DTW')
   plt.show()
         
   return
예제 #12
0
def mirs(args):
    parentFolder = args.dir if args.dir[-1] == "/" else args.dir + "/"
    primInfo, aInfo, rmInfo = util.LoadData(parentFolder, False)
    indMap, aOffset = BuildInfo(primInfo, aInfo, rmInfo)
    if args.topo is not None:
        PrintTopo(
            primInfo[util.INDEX],
            aInfo[util.INDEX],
            indMap,
            aOffset,
            args.topo,
            args.regex,
            args.regexneg,
        )
    if args.queryTokens != []:
        PrintSelect(
            args.queryTokens,
            primInfo[util.INDEX],
            primInfo[util.LISTFILES],
            aInfo[util.INDEX],
            aInfo[util.LISTFILES],
            indMap,
            aOffset,
        )
예제 #13
0
  #   stock.CloseAccount()
  #   stock.Store2DB()

  #沪深300统计##############################################
  # df = util.QueryHS300All()
  # out = []
  # for code, row in df.iterrows():
  #   out.append({'code': code, 'name': row['股票名称']})
  #
  # strategy.dv1.CompareAll('hs300_dv1', out)

  # 作图



  df = util.LoadData('stock_result', 'dv_jusths300_w')
  df[['total', 'capital']].plot()
  plt.show()
  #全部股票################################################
  # df = util.QueryAll()
  # out = []
  # client = MongoClient()
  # db = client['stock_backtest']
  # collection = db['all_dv1']
  # cursor = collection.find()
  # already = set()
  # for c in cursor:
  #   already.add(c['_id'])
  #
  # for code, row in df.iterrows():
  #   # if code in already:
예제 #14
0
def AgreementMeasureExamples(data_root_path,
                             output_path,
                             show_plots=True,
                             sample_rate=1):
    if not os.path.isdir(output_path):
        os.makedirs(output_path)

    data_dict = util.LoadData(data_root_path, sample_rate)
    print('Computing examples of drawbacks to existing measures...')

    # Pearson
    anno = data_dict['TaskA']['aligned_shifted'][
        'annotations'].loc[:, 'ann7'].values
    ot = data_dict['TaskA']['aligned_shifted'][
        'objective_truth'].iloc[:, 1].values
    poly_feats = PolynomialFeatures(degree=3)
    poly_feats_fit = poly_feats.fit_transform(anno.reshape(-1, 1))
    poly_reg = LinearRegression()
    poly_reg.fit(poly_feats_fit, ot)
    ot_warped = poly_reg.predict(poly_feats.fit_transform(ot.reshape(-1, 1)))
    pearson_self = pearsonr(ot, ot)[0]
    pearson_distort = pearsonr(ot, ot_warped)[0]
    print('Pearson example, ann7:')
    print('  Self correlation: %f' % (pearson_self))
    print('  Distorted correlation: %f' % (pearson_distort))

    # CCC
    ccc_self = agree._CCCHelper(ot, ot)
    ccc_distort = agree._CCCHelper(ot, ot_warped)
    print('CCC example, ann7:')
    print('  Self correlation: %f' % (ccc_self))
    print('  Distorted correlation: %f' % (ccc_distort))

    # Spearman and Kendall
    anno = 10 * [4] + 3 * [0] + 8 * [4.1]
    anno_distort = 10 * [4] + 3 * [0] + 8 * [3.9]
    spearman_self = spearmanr(anno, anno)[0]
    spearman_distort = spearmanr(anno, anno_distort)[0]
    kendall_self = kendalltau(anno, anno)[0]
    kendall_distort = kendalltau(anno, anno_distort)[0]
    print('Spearman example:')
    print('  Self correlation: %f' % (spearman_self))
    print('  Distorted correlation: %f' % (spearman_distort))
    print('Kendall example:')
    print('  Self correlation: %f' % (kendall_self))
    print('  Distorted correlation: %f' % (kendall_distort))
    fig, axs = plt.subplots(1, 2)
    axs[0].plot(range(len(anno)), anno, 'b-')
    axs[1].plot(range(len(anno_distort)), anno_distort, 'r-')
    axs[0].axes.set_ylim(-1, 5)
    axs[1].axes.set_ylim(-1, 5)
    axs[0].title.set_text('Example Annotation')
    axs[1].title.set_text('Similar Annotation')
    tikzplotlib.save(os.path.join(output_path,
                                  'agreement_examples_spearmankendall.tex'),
                     figure=fig,
                     axis_width='\\figureWidth',
                     textsize=12)

    print('Simulating annotations and computing agreement measures....')
    sim_sig, sim_axs = plt.subplots(1, 1)
    sim_anno = np.array([
        8, 8, 8, 7, 5.2, 3, 2.5, 2, 1.8, 1.7, 1.6, 1.5, 1.7, 1.9, 2.2, 3, 4,
        5.5, 6.8, 7.9, 9.5, 10.5, 11, 11.2, 11.3, 11.3, 11.3, 11.3, 11.2, 11,
        10.7, 10.4, 10, 9, 7.5, 7.3, 7.2, 7.2, 7.2, 7.2
    ])

    sim_anno2 = sim_anno.copy()
    sim_anno2[16:] *= 1.5
    sim_anno2 += 2

    sim_axs.plot(range(len(sim_anno)), sim_anno, 'r-')
    sim_axs.plot(range(len(sim_anno2)), sim_anno2, 'b--')
    sim_axs.title.set_text('Simulated annotations')

    if show_plots:
        plt.show()

    return