示例#1
0
 def daily_compute(self, busi_date):
     """
     """
     spark = self.spark
     yesterday = get_date(self.date_order, self.order_date, busi_date, -1)
     unclose = spark.sql("select * from %s.%s where busi_date='%s'" %
                         (self.fdata, self.unclose_cal, yesterday))
     trd = spark.sql("select * from %s.%s where busi_date='%s'" % (
         self.odata, self.cash_flow_table, busi_date))\
         .filter("trd_type='short_related' and prd_no!='0.0'")\
         .filter("trd_qty!=0 or amortize_label!=0")\
         .withColumn("now_trd", func.struct("timestamp", "trd_qty",
                                            "trd_cash_flow",
                                            "amortize_label"))\
         .groupBy("trade_id", "secu_acc_id", "prd_no").\
         agg(func.collect_list("now_trd").alias("now_trd")).\
         persist(StorageLevel.DISK_ONLY)
     trd.count()
     debt = spark.sql("select * from %s.%s where busi_date='%s'" % (
         self.odata, self.debt_table, busi_date))\
         .filter("prd_no!='0.0'")\
         .select("trade_id", "secu_acc_id", "prd_no", "liab_qty", "mkt_val")
     df = full_outer_join(unclose, trd,
                          ["trade_id", "secu_acc_id", "prd_no"])
     df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"])
     df.persist(StorageLevel.DISK_ONLY).count()
     data = df.rdd.map(
         lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday))
     if data.count() > 0:
         data.persist(StorageLevel.DISK_ONLY).count()
         self.save_trade_close_data(data, spark, busi_date)
         self.save_trade_unclose_data(data, spark, busi_date,
                                      lambda t: int(t["unclose_qty"]) < 0)
     else:
         print '没有做空开仓交易的数据'
示例#2
0
 def _get_base_data(self, spark, table, enddate, odataTable, type):
     sqlTemp = "select * from {0}.{1} where busi_date='{2}'"
     startdate = get_date(self.date_order, self.order_date, enddate, -1)
     unCloseCalDf = spark.sql(sqlTemp.format(self.fdata, table, startdate))
     trd_type = "long_related" if type == 'long' else "short_related"
     cashDetailDf = spark.sql(
         sqlTemp.format(self.odata, self.cash_flow_detail, enddate)).filter(
         "trd_type='%s' and prd_no!='0.0'" % trd_type).filter(
         "trd_qty!=0 or amortize_label!=0 ").persist(
         storageLevel=StorageLevel.DISK_ONLY).\
         withColumn("trd_detail",
                    sqlf.struct("timestamp", "trd_qty", "trd_cash_flow",
                                "amortize_label"))\
         .groupBy("trade_id", "secu_acc_id", "prd_no").agg(
         sqlf.collect_list("trd_detail").alias("trd_detail_list"))
     cashDetailDf.persist(StorageLevel.DISK_ONLY).count()
     moneyDf = spark.sql(sqlTemp.format(
         self.odata, odataTable, enddate)).filter("prd_no!='0.0'").select(
             "trade_id", "secu_acc_id", "prd_no",
             "qty" if type == 'long' else "liab_qty", "mkt_val")
     tempDf = full_outer_join(unCloseCalDf, cashDetailDf,
                              ["trade_id", "secu_acc_id", "prd_no"])
     baseDf = full_outer_join(tempDf, moneyDf,
                              ["trade_id", "secu_acc_id", "prd_no"])
     baseDf.persist(StorageLevel.DISK_ONLY).count()
     return baseDf
示例#3
0
 def daily_compute(self, busi_date):
     """
     """
     spark = self.spark
     yesterday = get_date(self.date_order, self.order_date, busi_date, -1)
     unclose = spark.sql("select * from %s.%s where busi_date='%s'" %
                         (self.fdata, self.unclose_cal, yesterday))
     trd = spark.sql("select * from %s.%s where busi_date='%s'" %
                     (self.odata, self.cash_flow_table, busi_date))
     trd = trd.filter("trd_type='long_related' and prd_no!='0.0'")
     # trd_qty=0时,一定有amortize_label!=0 (==1)
     trd = trd.filter("trd_qty!=0 or amortize_label!=0")
     asset = spark.sql("select * from %s.%s where busi_date='%s'" %
                       (self.odata, self.asset_table, busi_date))
     asset = asset.filter("prd_no!='0.0'")
     asset = asset.select("trade_id", "secu_acc_id", "prd_no", "qty",
                          "mkt_val")
     trd = trd.withColumn(
         "now_trd",
         func.struct("timestamp", "trd_qty", "trd_cash_flow",
                     "amortize_label"))
     trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \
         .agg(func.collect_list("now_trd").alias("now_trd"))
     trd.persist(StorageLevel.DISK_ONLY)
     trd.count()
     df = full_outer_join(unclose, trd,
                          ["trade_id", "secu_acc_id", "prd_no"])
     df = full_outer_join(df, asset, ["trade_id", "secu_acc_id", "prd_no"])
     df.persist(StorageLevel.DISK_ONLY)
     df.count()
     data = df.rdd.map(
         lambda x: _compute(x.asDict(recursive=True), busi_date, yesterday))
     data.persist(StorageLevel.DISK_ONLY)
     if data.count() > 0:
         self.save_close_data(data, busi_date,
                              lambda t: int(t["unclose_qty"]) == 0,
                              sort_long_close_data)
         self.save_unclose_data(data, busi_date,
                                lambda t: int(t["unclose_qty"]) > 0,
                                sort_long_unclose_data)
         self.save_unclose_cal(data, busi_date,
                               lambda t: int(t["unclose_qty"]) > 0,
                               sort_long_unclose_cal)
示例#4
0
 def daily_compute(self, busi_date):
     """
     """
     spark = self.spark
     yesterday = get_date(self.date_order, self.order_date, busi_date, -1)
     unclose = spark.sql("select * from %s.%s where busi_date='%s'" %
                         (self.fdata, self.unclose_table, yesterday))
     trd = spark.sql("select * from %s.%s where busi_date='%s'" %
                     (self.odata, self.cash_flow_table, busi_date))
     trd = trd.filter("trd_type='short_related' and prd_no!='0.0'")
     trd = trd.withColumn(
         "now_trd", func.struct("trd_qty", "trd_cash_flow", "timestamp"))
     trd = trd.groupBy("trade_id", "secu_acc_id", "prd_no") \
         .agg(func.collect_list("now_trd").alias("now_trd"))
     trd.persist(StorageLevel.DISK_ONLY)
     trd.count()
     debt = spark.sql("select * from %s.%s where busi_date='%s'" %
                      (self.odata, self.debt_table, busi_date))
     debt = debt.select("trade_id", "secu_acc_id", "prd_no", "liab_qty",
                        "mkt_val")
     debt = debt.filter("prd_no!='0.0'")
     df = full_outer_join(unclose, trd,
                          ["trade_id", "secu_acc_id", "prd_no"])
     df = full_outer_join(df, debt, ["trade_id", "secu_acc_id", "prd_no"])
     df.persist(StorageLevel.DISK_ONLY)
     df.count()
     data = df.rdd.map(lambda x: Row(**short_compute(
         x.asDict(recursive=True), busi_date)))
     if data.count() > 0:
         data = data.toDF()
         close = data.filter("remain_liab_qty = 0")
         close = close.withColumn("close_date", close.busi_date)
         self.check_data(close)
         unclose = data.filter("remain_liab_qty != 0")
         self.save_close_data(close, busi_date)
         self.save_unclose_data(unclose, busi_date)
     else:
         print "清仓股票收益做空没有数据输出"
 def daily_compute(self, startdate=None, enddate=None):
     df1 = self._cal_rr_sum_exception(enddate)
     df2 = self._cal_r_sum_exception(enddate)
     df3 = full_outer_join(df1, df2,
                           ["trade_id", "busi_date", "compute_term"])
     df3.registerTempTable("temp_stockinvplcheckexception")
     sql = """select trade_id,
                 busi_date,
                 compute_term,
                 nvl(r_sum_exception,0) pl_r_sum_exception,
                 nvl(trd_sum_exception,0) pl_trd_sum_exception,
                 nvl(rr_sum_exception,0) rr_sum_exception
              from temp_stockinvplcheckexception
         """
     finalDf = self.sparkSession.sql(sql)
     self._cal_detail(finalDf, enddate)
     self._stat_report(finalDf, enddate)
示例#6
0
 def daily_compute(self, startdate=None, enddate=None):
     # cal:o_return,o_exception_label,c_return,c_exception_label
     df_o_long = self._cal_data(enddate, self.fdata,
                                self.stock_close_o_trade_long_data,
                                self.stock_unclose_o_trade_long_data,
                                'o_return', 'o_exception_label',
                                'o_unclose_amt')
     df_o_short = self._cal_data(enddate, self.fdata,
                                 self.stock_close_o_trade_short_data,
                                 self.stock_unclose_o_trade_short_data,
                                 'o_return', 'o_exception_label',
                                 'o_unclose_amt')
     df_o = df_o_long.union(df_o_short).groupBy(
         ["trade_id", "secu_acc_id", "busi_date", "prd_no"]).agg(
             fn.sum("o_return").alias("o_return"),
             fn.max("o_exception_label").alias("o_exception_label"))
     if LOG_LEVEL == 'debug':
         df_o.show()
     df_c_long = self._cal_data(enddate, self.fdata,
                                self.stock_close_c_trade_long_data,
                                self.stock_unclose_c_trade_long_data,
                                'c_return', 'c_exception_label',
                                'c_unclose_amt')
     df_c_short = self._cal_data(enddate, self.fdata,
                                 self.stock_close_c_trade_short_data,
                                 self.stock_unclose_c_trade_short_data,
                                 'c_return', 'c_exception_label',
                                 'c_unclose_amt')
     df_c = df_c_long.union(df_c_short).groupBy(
         ["trade_id", "secu_acc_id", "busi_date", "prd_no"]).agg(
             fn.sum("c_return").alias("c_return"),
             fn.max("c_exception_label").alias("c_exception_label"))
     # cal:long/
     ocLong = full_outer_join(
         df_o_long, df_c_long,
         ["trade_id", "secu_acc_id", "busi_date", "prd_no"])
     ocLong.registerTempTable("temp_oc_long_check_1")
     sqlOcTemp = """
         select trade_id,secu_acc_id,busi_date,prd_no,
                case when abs(nvl(c_return,0.0)-nvl(o_return,0.0))<=0.01
                     then 0 else 1 end {1},
                case when abs(nvl(c_unclose_amt,0.0)-nvl(o_unclose_amt,0.0))<=0.01
                      then 0 else 1 end {2}
         from {0}
     """
     sqlOcLong = sqlOcTemp.format("temp_oc_long_check_1",
                                  "lr_equal_exception",
                                  "lmv_equal_exception")
     df_oc_long = self.sparkSession.sql(sqlOcLong)
     ocShort = full_outer_join(
         df_o_short, df_c_short,
         ["trade_id", "secu_acc_id", "busi_date", "prd_no"])
     ocShort.registerTempTable("temp_oc_short_check_2")
     sqlOcShort = sqlOcTemp.format("temp_oc_short_check_2",
                                   "sr_equal_exception",
                                   "smv_equal_exception")
     df_oc_short = self.sparkSession.sql(sqlOcShort)
     df1 = full_outer_join(
         df_o, df_c, ["trade_id", "secu_acc_id", "busi_date", "prd_no"])
     df2 = full_outer_join(
         df_oc_long, df_oc_short,
         ["trade_id", "secu_acc_id", "busi_date", "prd_no"])
     df3 = full_outer_join(
         df1, df2, ["trade_id", "secu_acc_id", "busi_date", "prd_no"])
     df3.registerTempTable("temp_stockoccheckexception")
     sql = """
                 select trade_id,
                 secu_acc_id,
                 busi_date,
                 prd_no,
                 nvl(o_return,0) o_return,
                 nvl(o_exception_label,0) o_exception_label,
                 nvl(c_return,0) c_return,
                 nvl(c_exception_label,0) c_exception_label,
                 nvl(lr_equal_exception,0) lr_equal_exception,
                 nvl(lmv_equal_exception,0) lmv_equal_exception,
                 nvl(sr_equal_exception,0) sr_equal_exception,
                 nvl(smv_equal_exception,0) smv_equal_exception
                 from temp_stockoccheckexception
         """
     finalDf = self.sparkSession.sql(sql)
     if LOG_LEVEL == 'debug':
         finalDf.show()
     self._cal_detail(finalDf, enddate)
     self._stat_report(finalDf, enddate)
 def daily_compute(self, startdate=None, enddate=None):
     # cal:c_return,c_exception_label,c_return,c_exception_label
     #获取闭仓收益的数据
     df_c, df_c_long, df_c_short = self._cal_return(enddate)
     df_dc_long = self._cal_dc(enddate, 'long_related')
     df_dc_short = self._cal_dc(enddate, 'short_related')
     #获取daily_check_data的数据
     df_dc = df_dc_long.union(df_dc_short).groupBy(
         ["trade_id", "secu_acc_id", "prd_no"]).agg(
             fn.sum("dc_return").alias("dc_return"),
             fn.max("dc_exception_label").alias("dc_exception_label"))
     # cal:long/
     dfall = full_outer_join(df_c, df_dc,
                             ["trade_id", "secu_acc_id", "prd_no"])
     dfall.registerTempTable("temp_all_return")
     sql_all_return = """
                select trade_id,secu_acc_id,prd_no,
                      nvl(c_return,0) c_return, nvl(dc_return,0) dc_return,
                      nvl(c_exception_label,0) c_exception_label,
                      nvl(dc_exception_label,0) dc_exception_label
                from {0}
            """.format("temp_all_return")
     dfall = self.sparkSession.sql(sql_all_return)
     dfLong = full_outer_join(df_c_long, df_dc_long,
                              ["trade_id", "secu_acc_id", "prd_no"])
     dfLong.registerTempTable("temp_all_long_return")
     sqlOcTemp = """
         select trade_id,secu_acc_id,prd_no,
                case when abs(nvl(c_return,0.0)-nvl(dc_return,0.0))<=0.01
                     then 0 else 1 end {1},
                case when abs(nvl(c_unclose_amt,0.0)-nvl(dc_unclose_amt,0.0))<=0.01
                      then 0 else 1 end {2}
         from {0}
     """
     sqlOcLong = sqlOcTemp.format("temp_all_long_return",
                                  "lr_equal_exception",
                                  "lmv_euqal_exception")
     df_cdc_long = self.sparkSession.sql(sqlOcLong)
     dfShort = full_outer_join(df_c_short, df_dc_short,
                               ["trade_id", "secu_acc_id", "prd_no"])
     dfShort.registerTempTable("temp_all_short_return")
     sqlOcShort = sqlOcTemp.format("temp_all_short_return",
                                   "sr_equal_exception",
                                   "smv_equal_exception")
     df_cdc_short = self.sparkSession.sql(sqlOcShort)
     df2 = full_outer_join(df_cdc_long, df_cdc_short,
                           ["trade_id", "secu_acc_id", "prd_no"])
     df3 = full_outer_join(dfall, df2,
                           ["trade_id", "secu_acc_id", "prd_no"])
     df3.registerTempTable("temp_stockodccheckexception")
     sql = """select trade_id,
                 secu_acc_id,
                 '{0}' busi_date,
                 prd_no,
                 nvl(c_return,0) c_return,
                 nvl(c_exception_label,0) c_exception_label,
                 nvl(dc_return,0) dc_return,
                 nvl(dc_exception_label,0) dc_exception_label,
                 nvl(lr_equal_exception,0) lr_equal_exception,
                 nvl(lmv_euqal_exception,0) lmv_euqal_exception,
                 nvl(sr_equal_exception,0) sr_equal_exception,
                 nvl(smv_equal_exception,0) smv_equal_exception
              from temp_stockodccheckexception
     """.format(enddate)
     finalDf = self.sparkSession.sql(sql)
     self._cal_detail(finalDf, enddate)
     self._stat_report(finalDf, enddate)
示例#8
0
    def daily_compute(self, startdate=None, enddate=None):

        #计算T日的return
        df_o, df_o_long, df_o_short, df_tbd, df_tbd_long, df_tbd_short = self._cal_return(
            enddate)
        startdate = get_date(self.date_order, self.order_date, enddate, -1)
        df_o_1, df_o_long_1, df_o_short_1, df_tbd_1, df_tbd_long_1, df_tbd_short_1\
            = self._cal_return(startdate,oreturn="o_return_1", tbdreturn="tbd_return_1",
                    oept="o_exception_label_1", tbdept="tbd_exception_label_1",
                    ounclose="o_unclose_amt_1", tbdunclose="tbd_unclose_amt_1", type='unclose')

        # cal:long/
        df1 = full_outer_join(df_o, df_o_1,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df1.registerTempTable("temp_o_return_table")
        sqlOTemp = """
                  select trade_id,secu_acc_id,prd_no,
                        nvl(o_return,0.0)-nvl(o_return_1,0.0) {1},
                        nvl(o_unclose_amt,0.0)-nvl(o_unclose_amt_1,0.0) {2},
                        nvl(o_exception_label,o_exception_label_1) o_exception_label
            from {0}
        """
        dfOreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_return_table", "o_return",
                            "o_unclose_amt"))
        dfOreturn.show()
        dfOLong = full_outer_join(df_o_long, df_o_long_1,
                                  ["trade_id", "secu_acc_id", "prd_no"])
        dfOLong.registerTempTable("temp_o_long_return_table")

        dfOlongreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_long_return_table", "o_return",
                            "o_unclose_amt"))
        dfOlongreturn.show()
        dfOShort = full_outer_join(df_o_short, df_o_short_1,
                                   ["trade_id", "secu_acc_id", "prd_no"])
        dfOShort.registerTempTable("temp_o_short_return_table")
        dfOShortreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_short_return_table", "o_return",
                            "o_unclose_amt"))

        df2 = full_outer_join(df_tbd, df_tbd_1,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df2.registerTempTable("temp_tbd_return_table")
        sqlTbdTemp = """
                  select trade_id,secu_acc_id,prd_no,
                        nvl(tbd_return,0.0)-nvl(tbd_return_1,0.0) {1},
                        nvl(tbd_unclose_amt,0.0)-nvl(tbd_unclose_amt_1,0.0) {2},
                        nvl(tbd_exception_label,tbd_exception_label_1) tbd_exception_label
            from {0}
        """
        dfTbdreturn = self.sparkSession.sql(
            sqlTbdTemp.format("temp_tbd_return_table", "tbd_return",
                              "tbd_unclose_amt"))
        dfTbdreturn.show()
        dfTbdLong = full_outer_join(df_tbd_long, df_tbd_long_1,
                                    ["trade_id", "secu_acc_id", "prd_no"])
        dfTbdLong.registerTempTable("temp_tbd_long_return_table")

        dfTbdlongreturn = self.sparkSession.sql(
            sqlTbdTemp.format("temp_tbd_long_return_table", "tbd_return",
                              "tbd_unclose_amt"))
        dfTbdshort = full_outer_join(df_tbd_short, df_tbd_short_1,
                                     ["trade_id", "secu_acc_id", "prd_no"])
        dfTbdshort.registerTempTable("temp_tbd_short_return_table")
        dfTbdShortreturn = self.sparkSession.sql(
            sqlTbdTemp.format("temp_tbd_short_return_table", "tbd_return",
                              "tbd_unclose_amt"))

        dfall = full_outer_join(dfOreturn, dfTbdreturn,
                                ["trade_id", "secu_acc_id", "prd_no"])
        dfall.registerTempTable("temp_all_return")
        sql_all_return = """
            select trade_id,secu_acc_id,prd_no,
                  nvl(o_return,0) o_return, nvl(tbd_return,0) tbd_return,
                  nvl(o_exception_label,0) o_exception_label,
                  nvl(tbd_exception_label,0) tbd_exception_label
            from {0}
        """.format("temp_all_return")
        dfall = self.sparkSession.sql(sql_all_return)
        print 'dfall'
        dfall.show()
        dfLong = full_outer_join(dfOlongreturn, dfTbdlongreturn,
                                 ["trade_id", "secu_acc_id", "prd_no"])
        dfLong.registerTempTable("temp_all_long_return")
        sqlOcTemp = """
            select trade_id,secu_acc_id,prd_no,
                   case when abs(nvl(o_return,0.0)-nvl(tbd_return,0.0))<=0.01
                        then 0 else 1 end {1},
                   case when abs(nvl(o_unclose_amt,0.0)-nvl(tbd_unclose_amt,0.0))<=0.01
                         then 0 else 1 end {2}
            from {0}
        """
        sqlOcLong = sqlOcTemp.format("temp_all_long_return",
                                     "lr_equal_exception",
                                     "lmv_euqal_exception")
        df_oc_long = self.sparkSession.sql(sqlOcLong)

        dfShort = full_outer_join(dfOShortreturn, dfTbdShortreturn,
                                  ["trade_id", "secu_acc_id", "prd_no"])

        dfShort.registerTempTable("temp_all_short_return")
        sqlOcShort = sqlOcTemp.format("temp_all_short_return",
                                      "sr_equal_exception",
                                      "smv_equal_exception")
        df_oc_short = self.sparkSession.sql(sqlOcShort)

        df2 = full_outer_join(df_oc_long, df_oc_short,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3 = full_outer_join(dfall, df2,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3.registerTempTable("temp_stockotbdcheckexception")
        sql = """select trade_id,
                    secu_acc_id,
                    '{0}' busi_date,
                    prd_no,
                    nvl(tbd_return,0) tbd_return,
                    nvl(tbd_exception_label,0) tbd_exception_label,
                    nvl(o_return,0) o_return,
                    nvl(o_exception_label,0) o_exception_label,
                    nvl(lr_equal_exception,0) lr_equal_exception,
                    nvl(lmv_euqal_exception,0) lmv_euqal_exception,
                    nvl(sr_equal_exception,0) sr_equal_exception,
                    nvl(smv_equal_exception,0) smv_equal_exception
                  from temp_stockotbdcheckexception
        """
        finalDf = self.sparkSession.sql(sql.format(enddate))
        self._cal_detail(finalDf, enddate)
        self._stat_report(finalDf, enddate)
    def daily_compute(self, startdate=None, enddate=None):
        # cal:o_return,o_exception_label,c_return,c_exception_label
        #获取T日的数据
        df_o, df_o_long, df_o_short = self._cal_return(enddate)
        startdate = get_date(self.date_order, self.order_date, enddate, -1)

        df_o_1, df_o_long_1, df_o_short_1\
            = self._cal_return(startdate,oreturn="o_return_1",
                    oept="o_exception_label_1",
                    ounclose="o_unclose_amt_1", type='unclose')
        df_dc_long = self._cal_dc(enddate, 'long_related')
        df_dc_short = self._cal_dc(enddate, 'short_related')
        df_dc = df_dc_long.union(df_dc_short).groupBy(
            ["trade_id", "secu_acc_id", "prd_no"]).agg(
                fn.sum("dc_return").alias("dc_return"),
                fn.max("dc_exception_label").alias("dc_exception_label"))
        # cal:long/
        df1 = full_outer_join(df_o, df_o_1,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df1.registerTempTable("temp_o_return_table")
        sqlOTemp = """
                          select trade_id,secu_acc_id,prd_no,
                                nvl(o_return,0.0)-nvl(o_return_1,0.0) {1},
                                nvl(o_unclose_amt,0.0) {2},
                                nvl(o_exception_label,o_exception_label_1) o_exception_label
                    from {0}
                """
        dfOreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_return_table", "o_return",
                            "o_unclose_amt"))
        dfOLong = full_outer_join(df_o_long, df_o_long_1,
                                  ["trade_id", "secu_acc_id", "prd_no"])
        dfOLong.registerTempTable("temp_o_long_return_table")
        dfOlongreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_long_return_table", "o_return",
                            "o_unclose_amt"))
        dfOShort = full_outer_join(df_o_short, df_o_short_1,
                                   ["trade_id", "secu_acc_id", "prd_no"])
        dfOShort.registerTempTable("temp_o_short_return_table")
        dfOShortreturn = self.sparkSession.sql(
            sqlOTemp.format("temp_o_short_return_table", "o_return",
                            "o_unclose_amt"))

        dfall = full_outer_join(dfOreturn, df_dc,
                                ["trade_id", "secu_acc_id", "prd_no"])
        dfall.registerTempTable("temp_all_return")
        sql_all_return = """
                   select trade_id,secu_acc_id,prd_no,
                         nvl(o_return,0) o_return, nvl(dc_return,0) dc_return,
                         nvl(o_exception_label,0) o_exception_label,
                         nvl(dc_exception_label,0) dc_exception_label
                   from {0}
               """.format("temp_all_return")
        dfall = self.sparkSession.sql(sql_all_return)

        dfLong = full_outer_join(dfOlongreturn, df_dc_long,
                                 ["trade_id", "secu_acc_id", "prd_no"])
        dfLong.registerTempTable("temp_all_long_return")
        sqlOcTemp = """
            select trade_id,secu_acc_id,prd_no,
                   case when abs(nvl(o_return,0.0)-nvl(dc_return,0.0))<=0.01
                        then 0 else 1 end {1},
                   case when abs(nvl(o_unclose_amt,0.0)-nvl(dc_unclose_amt,0.0))<=0.01
                         then 0 else 1 end {2}
            from {0}
        """
        sqlOcLong = sqlOcTemp.format("temp_all_long_return",
                                     "lr_equal_exception",
                                     "lmv_euqal_exception")
        df_oc_long = self.sparkSession.sql(sqlOcLong)
        dfShort = full_outer_join(dfOShortreturn, df_dc_short,
                                  ["trade_id", "secu_acc_id", "prd_no"])

        dfShort.registerTempTable("temp_all_short_return")
        sqlOcShort = sqlOcTemp.format("temp_all_short_return",
                                      "sr_equal_exception",
                                      "smv_equal_exception")
        df_oc_short = self.sparkSession.sql(sqlOcShort)
        df2 = full_outer_join(df_oc_long, df_oc_short,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3 = full_outer_join(dfall, df2,
                              ["trade_id", "secu_acc_id", "prd_no"])
        df3.registerTempTable("temp_stockodccheckexception")
        sql = """select trade_id,
                    secu_acc_id,
                    '{0}' busi_date,
                    prd_no,
                    nvl(o_return,0) o_return,
                    nvl(o_exception_label,0) o_exception_label,
                    nvl(dc_return,0) dc_return,
                    nvl(dc_exception_label,0) dc_exception_label,
                    nvl(lr_equal_exception,0) lr_equal_exception,
                    nvl(lmv_euqal_exception,0) lmv_euqal_exception,
                    nvl(sr_equal_exception,0) sr_equal_exception,
                    nvl(smv_equal_exception,0) smv_equal_exception
                 from temp_stockodccheckexception
        """.format(enddate)
        finalDf = self.sparkSession.sql(sql)
        self._cal_detail(finalDf, enddate)
        self._stat_report(finalDf, enddate)
示例#10
0
 def daily_compute(self, startdate=None, enddate=None):
     df_o_close = self._cal_data(enddate, self.fdata, self.stock_close_o_trade_long_data,
                                 self.stock_close_o_trade_short_data, 'o_close_rr_max',
                                 'o_close_rr_min', 'o_close_wt_max', 'o_close_wt_min')
     df_o_unclose = self._cal_data(enddate, self.fdata, self.stock_unclose_o_trade_long_data,
                                   self.stock_unclose_o_trade_short_data,
                                   'o_unclose_rr_max', 'o_unclose_rr_min', 'o_unclose_wt_max',
                                   'o_unclose_wt_min')
     df1 = full_outer_join(df_o_close, df_o_unclose,
                           ["busi_date", "exception_label",
                            "trd_type"]).persist(StorageLevel.DISK_ONLY)
     df_c_close = self._cal_data(enddate, self.fdata, self.stock_close_c_trade_long_data,
                                 self.stock_close_c_trade_short_data, 'c_close_rr_max',
                                 'c_close_rr_min', 'c_close_wt_max', 'c_close_wt_min')
     df_c_unclose = self._cal_data(enddate, self.fdata, self.stock_unclose_c_trade_long_data,
                                   self.stock_unclose_c_trade_short_data,
                                   'c_unclose_rr_max', 'c_unclose_rr_min', 'c_unclose_wt_max',
                                   'c_unclose_wt_min')
     df2 = full_outer_join(df_c_close, df_c_unclose,
                           ["busi_date", "exception_label",
                            "trd_type"]).persist(StorageLevel.DISK_ONLY)
     df_day_close = self._cal_data(enddate, self.fdata, self.stock_close_tradebyday_long_data,
                                   self.stock_close_tradebyday_short_data,
                                   'tbd_close_rr_max', 'tbd_close_rr_min', 'tbd_close_wt_max',
                                   'tbd_close_wt_min')
     df_day_unclose = self._cal_data(enddate, self.fdata,
                                     self.stock_unclose_tradebyday_long_data,
                                     self.stock_unclose_tradebyday_short_data,
                                     'tbd_unclose_rr_max', 'tbd_unclose_rr_min',
                                     'tbd_unclose_wt_max', 'tbd_unclose_wt_min')
     df3 = full_outer_join(df_day_close, df_day_unclose,
                           ["busi_date", "exception_label",
                            "trd_type"]).persist(StorageLevel.DISK_ONLY)
     df_prd_close = self._cal_data(enddate, self.fdata, self.stock_close_prd_long_data,
                                   self.stock_close_prd_short_data, 'prd_close_rr_max',
                                   'prd_close_rr_min', 'prd_close_wt_max', 'prd_close_wt_min',
                                   wt='holding_term')
     df_prd_unclose = self._cal_data(enddate, self.fdata, self.stock_unclose_prd_long_data,
                                     self.stock_unclose_prd_short_data,
                                     'prd_unclose_rr_max', 'prd_unclose_rr_min',
                                     'prd_unclose_wt_max', 'prd_unclose_wt_min',
                                     wt='holding_term')
     df4 = full_outer_join(df_prd_close, df_prd_unclose,
                           ["busi_date", "exception_label",
                            "trd_type"]).persist(StorageLevel.DISK_ONLY)
     df12 = full_outer_join(df1, df2, ["busi_date", "exception_label", "trd_type"])
     df34 = full_outer_join(df3, df4, ["busi_date", "exception_label", "trd_type"])
     df = full_outer_join(df12, df34,
                          ["busi_date", "exception_label",
                           "trd_type"]).persist(StorageLevel.DISK_ONLY)
     df.registerTempTable("tmp_stocktrdrrcheckexception")
     sql = """
         select exception_label,
         trd_type,
         busi_date,
         nvl(o_close_rr_max,0) o_close_rr_max,
         nvl(o_close_rr_min,0) o_close_rr_min,
         nvl(o_unclose_rr_max,0) o_unclose_rr_max,
         nvl(o_unclose_rr_min,0) o_unclose_rr_min,
         nvl(c_close_rr_max,0) c_close_rr_max,
         nvl(c_close_rr_min,0) c_close_rr_min,
         nvl(c_unclose_rr_max,0) c_unclose_rr_max,
         nvl(c_unclose_rr_min,0) c_unclose_rr_min,
         nvl(tbd_close_rr_max,0) tbd_close_rr_max,
         nvl(tbd_close_rr_min,0) tbd_close_rr_min,
         nvl(tbd_unclose_rr_max,0) tbd_unclose_rr_max,
         nvl(tbd_unclose_rr_min,0) tbd_unclose_rr_min,
         nvl(prd_close_rr_max,0) prd_close_rr_max,
         nvl(prd_close_rr_min,0) prd_close_rr_min,
         nvl(prd_unclose_rr_max,0) prd_unclose_rr_max,
         nvl(prd_unclose_rr_min,0) prd_unclose_rr_min,
         nvl(o_close_wt_max,0) o_close_wt_max,
         nvl(o_close_wt_min,0) o_close_wt_min,
         nvl(o_unclose_wt_max,0) o_unclose_wt_max,
         nvl(o_unclose_wt_min,0) o_unclose_wt_min,
         nvl(c_close_wt_max,0) c_close_wt_max,
         nvl(c_close_wt_min,0) c_close_wt_min,
         nvl(c_unclose_wt_max,0) c_unclose_wt_max,
         nvl(c_unclose_wt_min,0) c_unclose_wt_min,
         nvl(tbd_close_wt_max,0) tbd_close_wt_max,
         nvl(tbd_close_wt_min,0) tbd_close_wt_min,
         nvl(tbd_unclose_wt_max,0) tbd_unclose_wt_max,
         nvl(tbd_unclose_wt_min,0) tbd_unclose_wt_min,
         nvl(prd_close_wt_max,0) prd_close_wt_max,
         nvl(prd_close_wt_min,0) prd_close_wt_min,
         nvl(prd_unclose_wt_max,0) prd_unclose_wt_max,
         nvl(prd_unclose_wt_min,0) prd_unclose_wt_min
         from tmp_stocktrdrrcheckexception
     """
     finalDf = self.sparkSession.sql(sql).repartition(5)
     save_data(self.sparkSession, self.adata, self.taget_table, enddate, finalDf)