Пример #1
0
 def daily_compute(self, busi_date):
     """
     """
     spark = self.spark
     yesterday = get_date(self.date_order, self.order_date, busi_date, -1)
     unclose = spark.sql("select * from %s.%s where busi_date='%s'" %
                         (self.fdata, self.unclose_cal, yesterday))
     trd = spark.sql("select * from %s.%s where busi_date='%s'" % (
         self.odata, self.cash_flow_table, busi_date))\
         .filter("trd_type='short_related' and prd_no!='0.0'")\
         .filter("trd_qty!=0 or amortize_label!=0")\
         .withColumn("now_trd", func.struct("timestamp", "trd_qty",
                                            "trd_cash_flow",
                                            "amortize_label"))\
         .groupBy("trade_id", "secu_acc_id", "prd_no").\
         agg(func.collect_list("now_trd").alias("now_trd")).\
         persist(StorageLevel.DISK_ONLY)
     trd.count()
     debt = spark.sql("select * from %s.%s where busi_date='%s'" % (
         self.odata, self.debt_table, busi_date))\
         .filter("prd_no!='0.0'")\
         .select("trade_id", "secu_acc_id", "prd_no", "liab_qty", "mkt_val")
     df = full_outer_join(unclose, trd,
                          ["trade_id", "secu_acc_id", "prd_no"])
     df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"])
     df.persist(StorageLevel.DISK_ONLY).count()
     data = df.rdd.map(
         lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday))
     if data.count() > 0:
         data.persist(StorageLevel.DISK_ONLY).count()
         self.save_trade_close_data(data, spark, busi_date)
         self.save_trade_unclose_data(data, spark, busi_date,
                                      lambda t: int(t["unclose_qty"]) < 0)
     else:
         print '没有做空开仓交易的数据'
Пример #2
0
 def _get_base_data(self, spark, table, enddate, odataTable, type):
     sqlTemp = "select * from {0}.{1} where busi_date='{2}'"
     startdate = get_date(self.date_order, self.order_date, enddate, -1)
     unCloseCalDf = spark.sql(sqlTemp.format(self.fdata, table, startdate))
     trd_type = "long_related" if type == 'long' else "short_related"
     cashDetailDf = spark.sql(
         sqlTemp.format(self.odata, self.cash_flow_detail, enddate)).filter(
         "trd_type='%s' and prd_no!='0.0'" % trd_type).filter(
         "trd_qty!=0 or amortize_label!=0 ").persist(
         storageLevel=StorageLevel.DISK_ONLY).\
         withColumn("trd_detail",
                    sqlf.struct("timestamp", "trd_qty", "trd_cash_flow",
                                "amortize_label"))\
         .groupBy("trade_id", "secu_acc_id", "prd_no").agg(
         sqlf.collect_list("trd_detail").alias("trd_detail_list"))
     cashDetailDf.persist(StorageLevel.DISK_ONLY).count()
     moneyDf = spark.sql(sqlTemp.format(
         self.odata, odataTable, enddate)).filter("prd_no!='0.0'").select(
             "trade_id", "secu_acc_id", "prd_no",
             "qty" if type == 'long' else "liab_qty", "mkt_val")
     tempDf = full_outer_join(unCloseCalDf, cashDetailDf,
                              ["trade_id", "secu_acc_id", "prd_no"])
     baseDf = full_outer_join(tempDf, moneyDf,
                              ["trade_id", "secu_acc_id", "prd_no"])
     baseDf.persist(StorageLevel.DISK_ONLY).count()
     return baseDf
Пример #3
0
 def daily_compute(self, startdate=None, enddate=None):
     """
     daily_compute
     :param startdate:
     :param enddate:
     :return:
     """
     # 月度计算,需要从一号开始
     yesterday = get_date(self.date_order, self.order_date, enddate, -1)
     diff_days = days_between(enddate, yesterday)
     LongBaseDf = self._get_base_data(
         self.sparkSession, self.stock_unclose_tradebyday_long_cal, enddate,
         self.asset_table, "long")
     longBaseRdd = LongBaseDf.rdd.mapPartitions(
         lambda rows: _travel_long_iter(rows, enddate, diff_days))
     longBaseRdd.persist(StorageLevel.DISK_ONLY)
     if longBaseRdd.count() > 0:
         self.save_all_data(longBaseRdd, enddate, "long")
     ShortBaseDf = \
         self._get_base_data(self.sparkSession,
                             self.stock_unclose_tradebyday_short_cal,
                             enddate, self.debt_table, "short")
     shortBaseRdd = ShortBaseDf.rdd.mapPartitions(
         lambda rows: _travel_short_iter(rows, enddate, diff_days))
     shortBaseRdd.persist(StorageLevel.DISK_ONLY)
     if shortBaseRdd.count() > 0:
         self.save_all_data(shortBaseRdd, enddate, "short")
Пример #4
0
    def _cal_asset_debt(self, startdate=None, enddate=None):
        """
        T日的asset,debt,net_asset
        :param startdate:
        :param enddate:
        :return:
        """
        sqlTmp1 = """
            select nvl(a.trade_id,b.trade_id) trade_id,nvl(a.mkt_val,0) asset,nvl(b.mkt_val,0) debt,
            nvl(a.mkt_val,0)+nvl(b.mkt_val,0) net_asset
            from (
                select trade_id, sum(mkt_val) mkt_val
                from {1}.{2}
                where busi_date='{0}'
                group by trade_id
            ) a full outer join (
                select trade_id, sum(mkt_val) mkt_val
                from {1}.{3}
                where busi_date='{0}'
                group by trade_id
            ) b on a.trade_id=b.trade_id
        """
        selectSql1 = sqlTmp1.format(enddate, self.odata,
                                    self.stock_asset_holding,
                                    self.stock_debt_holding)
        tempTable1 = "cust_daily_asset_debt_tmp"
        self._reg_table(selectSql1, tempTable1)

        startdate = get_date(self.date_order, self.order_date, enddate, -1)
        selectSql2 = sqlTmp1.format(startdate, self.odata,
                                    self.stock_asset_holding,
                                    self.stock_debt_holding)
        tempTable2 = "cust_daily_asset_debt_before_tmp"
        self._reg_table(selectSql2, tempTable2)
        return tempTable1, tempTable2
Пример #5
0
    def __init__(self, spark, busi_date, mode):
        LeekSparkJob.__init__(self, spark)
        # T-1Date
        self.enddate = busi_date
        if not self.is_trading_day(self.enddate):
            print("task start error day[%s] is not trading day" % self.enddate)
            sys.exit(0)
        # T-2Date
        if self.logLevel != 'debug':
            self.startdate = get_date(self.date_order, self.order_date,
                                      self.enddate, -1)
        else:
            self.startdate = self.enddate

        self.mode = mode  # 如果不是交易日,则不进行计算
Пример #6
0
 def daily_compute(self, busi_date):
     """
     """
     spark = self.spark
     yesterday = get_date(self.date_order, self.order_date, busi_date, -1)
     unclose = spark.sql("select * from %s.%s where busi_date='%s'" %
                         (self.fdata, self.unclose_cal, yesterday))
     trd = spark.sql("select * from %s.%s where busi_date='%s'" %
                     (self.odata, self.cash_flow_table, busi_date))
     trd = trd.filter("trd_type='long_related' and prd_no!='0.0'")
     # trd_qty=0时,一定有amortize_label!=0 (==1)
     trd = trd.filter("trd_qty!=0 or amortize_label!=0")
     asset = spark.sql("select * from %s.%s where busi_date='%s'" %
                       (self.odata, self.asset_table, busi_date))
     asset = asset.filter("prd_no!='0.0'")
     asset = asset.select("trade_id", "secu_acc_id", "prd_no", "qty",
                          "mkt_val")
     trd = trd.withColumn(
         "now_trd",
         func.struct("timestamp", "trd_qty", "trd_cash_flow",
                     "amortize_label"))
     trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \
         .agg(func.collect_list("now_trd").alias("now_trd"))
     trd.persist(StorageLevel.DISK_ONLY)
     trd.count()
     df = full_outer_join(unclose, trd,
                          ["trade_id", "secu_acc_id", "prd_no"])
     df = full_outer_join(df, asset, ["trade_id", "secu_acc_id", "prd_no"])
     df.persist(StorageLevel.DISK_ONLY)
     df.count()
     data = df.rdd.map(
         lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday))
     data.persist(StorageLevel.DISK_ONLY)
     if data.count() > 0:
         self.save_close_data(data, busi_date,
                              lambda t: int(t["unclose_qty"]) == 0,
                              sort_long_close_data)
         self.save_unclose_data(data, busi_date,
                                lambda t: int(t["unclose_qty"]) > 0,
                                sort_long_unclose_data)
         self.save_unclose_cal(data, busi_date,
                               lambda t: int(t["unclose_qty"]) > 0,
                               sort_long_unclose_cal)
Пример #7
0
 def daily_compute(self, startdate=None, enddate=None):
     startdate = get_date(self.date_order, self.order_date, enddate, -19)
     sql = """
         select busi_date,trade_id,long_return,short_return,total_return,
         nvl(total_return_rate,0.0) total_return_rate,
         exception_label ac_exception_label,
         case when total_return>=0 and total_return_rate>=0 then 0
              when total_return<0 and total_return_rate<0 then 0
              else 1 end pos_neg_exception,
         case when abs(long_return+short_return-total_return)<0.01 then 0
              else 1 end detail_sum_exception,
         case when abs(total_return_rate)<0.223 then 0
              else 1 end rr_outlier_exception
         from {1}.{2}
         where busi_date='{0}'
     """
     sqlCmd = sql.format(enddate, self.adata, self.stock_cust_daily_return)
     df = self.sparkSession.sql(sqlCmd)
     sqllast20 = """
         select trade_id,
         avg(total_return_rate)+3*stddev(total_return_rate) std_return_rate,
         count(busi_date) num_data
         from {2}.{3}
         where busi_date>='{0}' and busi_date<='{1}'
         group by trade_id
     """.format(startdate, enddate, self.adata,
                self.stock_cust_daily_return)
     dflast20 = self.sparkSession.sql(sqllast20)
     df = df.join(dflast20, "trade_id", "inner")\
         .select(df["*"],
                 fn.when((dflast20.num_data == 20) &
                         (dflast20.std_return_rate <= df.total_return_rate), 0)
                 .when(dflast20.num_data < 20, 0)
                 .when(dflast20.std_return_rate == 0, 0)
                 .otherwise(1).alias("rr_sp_exception"))\
         .where("""
                 ac_exception_label!=0 or pos_neg_exception!=0 or
                 detail_sum_exception!=0 or rr_outlier_exception!=0 or
                 rr_sp_exception!=0
              """)\
         .persist(StorageLevel.DISK_ONLY).repartition(5)
     save_data(self.sparkSession, self.adata,
               self.stock_dr_check_exception_data, enddate, df)
Пример #8
0
 def daily_compute(self, busi_date):
     """
     """
     spark = self.spark
     yesterday = get_date(self.date_order, self.order_date, busi_date, -1)
     unclose = spark.sql("select * from %s.%s where busi_date='%s'" %
                         (self.fdata, self.unclose_table, yesterday))
     trd = spark.sql("select * from %s.%s where busi_date='%s'" %
                     (self.odata, self.cash_flow_table, busi_date))
     trd = trd.filter("trd_type='short_related' and prd_no!='0.0'")
     trd = trd.withColumn(
         "now_trd", func.struct("trd_qty", "trd_cash_flow", "timestamp"))
     trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \
         .agg(func.collect_list("now_trd").alias("now_trd"))
     trd.persist(StorageLevel.DISK_ONLY)
     trd.count()
     debt = spark.sql("select * from %s.%s where busi_date='%s'" %
                      (self.odata, self.debt_table, busi_date))
     debt = debt.select("trade_id", "secu_acc_id", "prd_no", "liab_qty",
                        "mkt_val")
     debt = debt.filter("prd_no!='0.0'")
     df = full_outer_join(unclose, trd,
                          ["trade_id", "secu_acc_id", "prd_no"])
     df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"])
     df.persist(StorageLevel.DISK_ONLY)
     df.count()
     data = df.rdd.map(lambda x: Row(**short_compute(
         x.asDict(recursive=True), busi_date)))
     if data.count() > 0:
         data = data.toDF()
         close = data.filter("remain_liab_qty = 0")
         close = close.withColumn("close_date", close.busi_date)
         self.check_data(close)
         unclose = data.filter("remain_liab_qty != 0")
         self.save_close_data(close, busi_date)
         self.save_unclose_data(unclose, busi_date)
     else:
         print "清仓股票收益做空没有数据输出"
Пример #9
0
    def daily_compute(self, startdate=None, enddate=None):

        #计算T日的return
        df_o, df_o_long, df_o_short, df_tbd, df_tbd_long, df_tbd_short = self._cal_return(
            enddate)
        startdate = get_date(self.date_order, self.order_date, enddate, -1)
        df_o_1, df_o_long_1, df_o_short_1, df_tbd_1, df_tbd_long_1, df_tbd_short_1\
            = self._cal_return(startdate,oreturn="o_return_1", tbdreturn="tbd_return_1",
                    oept="o_exception_label_1", tbdept="tbd_exception_label_1",
                    ounclose="o_unclose_amt_1", tbdunclose="tbd_unclose_amt_1", type='unclose')

        # cal:long/
        df1 = full_outer_join(df_o, df_o_1,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df1.registerTempTable("temp_o_return_table")
        sqlOTemp = """
                  select trade_id,secu_acc_id,prd_no,
                        nvl(o_return,0.0)-nvl(o_return_1,0.0) {1},
                        nvl(o_unclose_amt,0.0)-nvl(o_unclose_amt_1,0.0) {2},
                        nvl(o_exception_label,o_exception_label_1) o_exception_label
            from {0}
        """
        dfOreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_return_table", "o_return",
                            "o_unclose_amt"))
        dfOreturn.show()
        dfOLong = full_outer_join(df_o_long, df_o_long_1,
                                  ["trade_id", "secu_acc_id", "prd_no"])
        dfOLong.registerTempTable("temp_o_long_return_table")

        dfOlongreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_long_return_table", "o_return",
                            "o_unclose_amt"))
        dfOlongreturn.show()
        dfOShort = full_outer_join(df_o_short, df_o_short_1,
                                   ["trade_id", "secu_acc_id", "prd_no"])
        dfOShort.registerTempTable("temp_o_short_return_table")
        dfOShortreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_short_return_table", "o_return",
                            "o_unclose_amt"))

        df2 = full_outer_join(df_tbd, df_tbd_1,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df2.registerTempTable("temp_tbd_return_table")
        sqlTbdTemp = """
                  select trade_id,secu_acc_id,prd_no,
                        nvl(tbd_return,0.0)-nvl(tbd_return_1,0.0) {1},
                        nvl(tbd_unclose_amt,0.0)-nvl(tbd_unclose_amt_1,0.0) {2},
                        nvl(tbd_exception_label,tbd_exception_label_1) tbd_exception_label
            from {0}
        """
        dfTbdreturn = self.sparkSession.sql(
            sqlTbdTemp.format("temp_tbd_return_table", "tbd_return",
                              "tbd_unclose_amt"))
        dfTbdreturn.show()
        dfTbdLong = full_outer_join(df_tbd_long, df_tbd_long_1,
                                    ["trade_id", "secu_acc_id", "prd_no"])
        dfTbdLong.registerTempTable("temp_tbd_long_return_table")

        dfTbdlongreturn = self.sparkSession.sql(
            sqlTbdTemp.format("temp_tbd_long_return_table", "tbd_return",
                              "tbd_unclose_amt"))
        dfTbdshort = full_outer_join(df_tbd_short, df_tbd_short_1,
                                     ["trade_id", "secu_acc_id", "prd_no"])
        dfTbdshort.registerTempTable("temp_tbd_short_return_table")
        dfTbdShortreturn = self.sparkSession.sql(
            sqlTbdTemp.format("temp_tbd_short_return_table", "tbd_return",
                              "tbd_unclose_amt"))

        dfall = full_outer_join(dfOreturn, dfTbdreturn,
                                ["trade_id", "secu_acc_id", "prd_no"])
        dfall.registerTempTable("temp_all_return")
        sql_all_return = """
            select trade_id,secu_acc_id,prd_no,
                  nvl(o_return,0) o_return, nvl(tbd_return,0) tbd_return,
                  nvl(o_exception_label,0) o_exception_label,
                  nvl(tbd_exception_label,0) tbd_exception_label
            from {0}
        """.format("temp_all_return")
        dfall = self.sparkSession.sql(sql_all_return)
        print 'dfall'
        dfall.show()
        dfLong = full_outer_join(dfOlongreturn, dfTbdlongreturn,
                                 ["trade_id", "secu_acc_id", "prd_no"])
        dfLong.registerTempTable("temp_all_long_return")
        sqlOcTemp = """
            select trade_id,secu_acc_id,prd_no,
                   case when abs(nvl(o_return,0.0)-nvl(tbd_return,0.0))<=0.01
                        then 0 else 1 end {1},
                   case when abs(nvl(o_unclose_amt,0.0)-nvl(tbd_unclose_amt,0.0))<=0.01
                         then 0 else 1 end {2}
            from {0}
        """
        sqlOcLong = sqlOcTemp.format("temp_all_long_return",
                                     "lr_equal_exception",
                                     "lmv_euqal_exception")
        df_oc_long = self.sparkSession.sql(sqlOcLong)

        dfShort = full_outer_join(dfOShortreturn, dfTbdShortreturn,
                                  ["trade_id", "secu_acc_id", "prd_no"])

        dfShort.registerTempTable("temp_all_short_return")
        sqlOcShort = sqlOcTemp.format("temp_all_short_return",
                                      "sr_equal_exception",
                                      "smv_equal_exception")
        df_oc_short = self.sparkSession.sql(sqlOcShort)

        df2 = full_outer_join(df_oc_long, df_oc_short,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3 = full_outer_join(dfall, df2,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3.registerTempTable("temp_stockotbdcheckexception")
        sql = """select trade_id,
                    secu_acc_id,
                    '{0}' busi_date,
                    prd_no,
                    nvl(tbd_return,0) tbd_return,
                    nvl(tbd_exception_label,0) tbd_exception_label,
                    nvl(o_return,0) o_return,
                    nvl(o_exception_label,0) o_exception_label,
                    nvl(lr_equal_exception,0) lr_equal_exception,
                    nvl(lmv_euqal_exception,0) lmv_euqal_exception,
                    nvl(sr_equal_exception,0) sr_equal_exception,
                    nvl(smv_equal_exception,0) smv_equal_exception
                  from temp_stockotbdcheckexception
        """
        finalDf = self.sparkSession.sql(sql.format(enddate))
        self._cal_detail(finalDf, enddate)
        self._stat_report(finalDf, enddate)
Пример #10
0
    def daily_compute(self, startdate=None, enddate=None):
        # cal:o_return,o_exception_label,c_return,c_exception_label
        #获取T日的数据
        df_o, df_o_long, df_o_short = self._cal_return(enddate)
        startdate = get_date(self.date_order, self.order_date, enddate, -1)

        df_o_1, df_o_long_1, df_o_short_1\
            = self._cal_return(startdate,oreturn="o_return_1",
                    oept="o_exception_label_1",
                    ounclose="o_unclose_amt_1", type='unclose')
        df_dc_long = self._cal_dc(enddate, 'long_related')
        df_dc_short = self._cal_dc(enddate, 'short_related')
        df_dc = df_dc_long.union(df_dc_short).groupBy(
            ["trade_id", "secu_acc_id", "prd_no"]).agg(
                fn.sum("dc_return").alias("dc_return"),
                fn.max("dc_exception_label").alias("dc_exception_label"))
        # cal:long/
        df1 = full_outer_join(df_o, df_o_1,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df1.registerTempTable("temp_o_return_table")
        sqlOTemp = """
                          select trade_id,secu_acc_id,prd_no,
                                nvl(o_return,0.0)-nvl(o_return_1,0.0) {1},
                                nvl(o_unclose_amt,0.0) {2},
                                nvl(o_exception_label,o_exception_label_1) o_exception_label
                    from {0}
                """
        dfOreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_return_table", "o_return",
                            "o_unclose_amt"))
        dfOLong = full_outer_join(df_o_long, df_o_long_1,
                                  ["trade_id", "secu_acc_id", "prd_no"])
        dfOLong.registerTempTable("temp_o_long_return_table")
        dfOlongreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_long_return_table", "o_return",
                            "o_unclose_amt"))
        dfOShort = full_outer_join(df_o_short, df_o_short_1,
                                   ["trade_id", "secu_acc_id", "prd_no"])
        dfOShort.registerTempTable("temp_o_short_return_table")
        dfOShortreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_short_return_table", "o_return",
                            "o_unclose_amt"))

        dfall = full_outer_join(dfOreturn, df_dc,
                                ["trade_id", "secu_acc_id", "prd_no"])
        dfall.registerTempTable("temp_all_return")
        sql_all_return = """
                   select trade_id,secu_acc_id,prd_no,
                         nvl(o_return,0) o_return, nvl(dc_return,0) dc_return,
                         nvl(o_exception_label,0) o_exception_label,
                         nvl(dc_exception_label,0) dc_exception_label
                   from {0}
               """.format("temp_all_return")
        dfall = self.sparkSession.sql(sql_all_return)

        dfLong = full_outer_join(dfOlongreturn, df_dc_long,
                                 ["trade_id", "secu_acc_id", "prd_no"])
        dfLong.registerTempTable("temp_all_long_return")
        sqlOcTemp = """
            select trade_id,secu_acc_id,prd_no,
                   case when abs(nvl(o_return,0.0)-nvl(dc_return,0.0))<=0.01
                        then 0 else 1 end {1},
                   case when abs(nvl(o_unclose_amt,0.0)-nvl(dc_unclose_amt,0.0))<=0.01
                         then 0 else 1 end {2}
            from {0}
        """
        sqlOcLong = sqlOcTemp.format("temp_all_long_return",
                                     "lr_equal_exception",
                                     "lmv_euqal_exception")
        df_oc_long = self.sparkSession.sql(sqlOcLong)
        dfShort = full_outer_join(dfOShortreturn, df_dc_short,
                                  ["trade_id", "secu_acc_id", "prd_no"])

        dfShort.registerTempTable("temp_all_short_return")
        sqlOcShort = sqlOcTemp.format("temp_all_short_return",
                                      "sr_equal_exception",
                                      "smv_equal_exception")
        df_oc_short = self.sparkSession.sql(sqlOcShort)
        df2 = full_outer_join(df_oc_long, df_oc_short,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3 = full_outer_join(dfall, df2,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3.registerTempTable("temp_stockodccheckexception")
        sql = """select trade_id,
                    secu_acc_id,
                    '{0}' busi_date,
                    prd_no,
                    nvl(o_return,0) o_return,
                    nvl(o_exception_label,0) o_exception_label,
                    nvl(dc_return,0) dc_return,
                    nvl(dc_exception_label,0) dc_exception_label,
                    nvl(lr_equal_exception,0) lr_equal_exception,
                    nvl(lmv_euqal_exception,0) lmv_euqal_exception,
                    nvl(sr_equal_exception,0) sr_equal_exception,
                    nvl(smv_equal_exception,0) smv_equal_exception
                 from temp_stockodccheckexception
        """.format(enddate)
        finalDf = self.sparkSession.sql(sql)
        self._cal_detail(finalDf, enddate)
        self._stat_report(finalDf, enddate)