示例#1
0
def alpha34(df):
    """
    Alpha#34
    rank(((1 - rank((stddev(returns, 2) / stddev(returns, 5)))) + (1 - rank(delta(close, 1))))) 
    """
    return u.rank(((1 - u.rank((u.stddev(df.returns, 2) / u.stddev(df.returns, 5)))) \
            + (1 - u.rank(u.delta(df.close, 1)))))
示例#2
0
def alpha21(df):
    """
    Alpha#21
    ((((sum(close, 8) / 8) + stddev(close, 8)) < (sum(close, 2) / 2)) ? (-1 * 1) : 
    (((sum(close,2) / 2) < ((sum(close, 8) / 8) - stddev(close, 8))) ? 
    1 : (((1 < (volume / adv20)) || ((volume /adv20) == 1)) ? 1 : (-1 * 1)))) 
    """
    decision1 = (u.ts_sum(df.close, 8) / 8 +
                 u.stddev(df.close, 8)) < (u.ts_sum(df.close, 2) / 2)
    decision2 = (u.ts_sum(df.close, 2) / 2 <
                 (u.ts_sum(df.close, 8) / 8) - u.stddev(df.close, 8))
    decision3 = ((1 < (df.volume / u.adv(df, 20))) |
                 ((df.volume / u.adv(df, 20)) == 1))
    return np.where(decision1, (-1 * 1),
                    np.where(decision2, 1, np.where(decision3, 1, (-1 * 1))))
示例#3
0
def buildk(vals, ma, s=1.5):
   """
   According to Kemp (1962), the expression for determing a target value
   k for cusum should be done via:

         k = mean_a + .5 delta
         (Where: delta is the mean shift we want to detect.
                 mean_a is an "acceptable process mean value."
                 mean_a is the mean of the original dataset.) 

   Lucas et al. (1982) suggested it be close to .5 delta as well,
   and it should be chosen close to:

                    mean_d - mean_a
         k = ---------------------------
               ln (mean_d) - ln (mean_a)

   Mean_d is the "barely tolerable mean value".  This is the mean that
   CUSUM should quickly detect.  Mean_d is based on the declared needs
   of an experimental designer, the mean, and the std dev.

      mean_d = s * p + mean_a 
      (Where: s is a value chosen by the experimental designers,
              p is the standard deviation, and mean_a is the mean
              of the dataset.)
   """
   md = s * utils.stddev(vals) + ma
   return (md - ma) / (math.log(md, math.e) - math.log(ma, math.e))
示例#4
0
def alpha40(df):
    """
    Alpha#40
    ((-1 * rank(stddev(high, 10))) * correlation(high, volume, 10))
    """
    return ((-1 * u.rank(u.stddev(df.high, 10))) *
            u.corr(df.high, df.volume, 10))
示例#5
0
def alpha22(df):
    """
    Alpha#22
    (-1 * (delta(correlation(high, volume, 5), 5) * rank(stddev(close, 20))))
    """
    return (-1 * (u.delta(u.corr(df.high, df.volume, 5), 5) *
                  u.rank(u.stddev(df.close, 20))))
示例#6
0
	def print_cluster_separation(self):
		print "CLUSTER SEPERATION"
		print
		print "Comparing each Cluster to it's most similar other clusters"

		if len(self.clusters) < 2:
			print "There are less than two clusters"
			return

		cluster_sim_mat = self.confirm.get_cluster_sim_mat()
		for row in cluster_sim_mat:
			row.sort(reverse=True)

		top_1 = list()
		top_3 = list()
		top_5 = list()
		for row in cluster_sim_mat:
			for x, val in enumerate(row):
				if x == 0:
					continue
				if x <= 1:
					top_1.append(val)
				if x <= 3:
					top_3.append(val)
				if x <= 5:
					top_5.append(val)
				else:
					break
		top_1.sort(reverse=True)

		top_1_mean = utils.avg(top_1)
		top_1_stddev = utils.stddev(top_1)
		top_3_mean = utils.avg(top_3)
		top_3_stddev = utils.stddev(top_3)
		top_5_mean = utils.avg(top_5)
		top_5_stddev = utils.stddev(top_5)
		print "\n        Mean\t   Std Dev"
		print "Top 1: %3.3f\t %3.3f" % (top_1_mean, top_1_stddev)
		print "Top 3: %3.3f\t %3.3f" % (top_3_mean, top_3_stddev)
		print "Top 5: %3.3f\t %3.3f" % (top_5_mean, top_5_stddev)
		print
		print "List of 10 most similar scores"
		print ", ".join(map(lambda x: "%4.3f" % x, top_1[:10]))

		print
		print
示例#7
0
    def print_cluster_separation(self):
        print "CLUSTER SEPERATION"
        print
        print "Comparing each Cluster to it's most similar other clusters"

        if len(self.clusters) < 2:
            print "There are less than two clusters"
            return

        cluster_sim_mat = self.confirm.get_cluster_sim_mat()
        for row in cluster_sim_mat:
            row.sort(reverse=True)

        top_1 = list()
        top_3 = list()
        top_5 = list()
        for row in cluster_sim_mat:
            for x, val in enumerate(row):
                if x == 0:
                    continue
                if x <= 1:
                    top_1.append(val)
                if x <= 3:
                    top_3.append(val)
                if x <= 5:
                    top_5.append(val)
                else:
                    break
        top_1.sort(reverse=True)

        top_1_mean = utils.avg(top_1)
        top_1_stddev = utils.stddev(top_1)
        top_3_mean = utils.avg(top_3)
        top_3_stddev = utils.stddev(top_3)
        top_5_mean = utils.avg(top_5)
        top_5_stddev = utils.stddev(top_5)
        print "\n        Mean\t   Std Dev"
        print "Top 1: %3.3f\t %3.3f" % (top_1_mean, top_1_stddev)
        print "Top 3: %3.3f\t %3.3f" % (top_3_mean, top_3_stddev)
        print "Top 5: %3.3f\t %3.3f" % (top_5_mean, top_5_stddev)
        print
        print "List of 10 most similar scores"
        print ", ".join(map(lambda x: "%4.3f" % x, top_1[:10]))

        print
        print
示例#8
0
def alpha18(df):
    """
    Alpha#18
    (-1 * rank(((stddev(abs((close - open)), 5) + (close - open)) + 
    correlation(close, open, 10))))
    """
    temp1 = u.stddev(abs((df.close - df.open)), 5)
    temp2 = df.close - df.open
    temp3 = u.corr(df.close, df.open, 10)
    return (-1 * u.rank(temp1 + temp2 + temp3))
示例#9
0
	def feature_eval_metrics(self, sim_fun):
		doc_cluster_sims_flat = list()
		doc_cluster_means = list()
		doc_cluster_std_devs = list()
		for cluster in self.clusters:
			cluster_sims = list()
			for _doc in cluster.members:
				val = sim_fun(cluster, _doc)
				doc_cluster_sims_flat.append(val)
				cluster_sims.append(val)
			doc_cluster_means.append(utils.avg(cluster_sims))
			doc_cluster_std_devs.append(utils.stddev(cluster_sims))
		global_mean = utils.avg(doc_cluster_sims_flat)
		global_stddev = utils.stddev(doc_cluster_sims_flat)
		mean_of_means = utils.avg(doc_cluster_means)
		stddev_of_means = utils.stddev(doc_cluster_means)
		mean_of_stddev = utils.avg(doc_cluster_std_devs)
		stddev_of_stddev = utils.stddev(doc_cluster_std_devs)

		return global_mean, global_stddev, mean_of_means, stddev_of_means, mean_of_stddev, stddev_of_stddev
示例#10
0
    def feature_eval_metrics(self, sim_fun):
        doc_cluster_sims_flat = list()
        doc_cluster_means = list()
        doc_cluster_std_devs = list()
        for cluster in self.clusters:
            cluster_sims = list()
            for _doc in cluster.members:
                val = sim_fun(cluster, _doc)
                doc_cluster_sims_flat.append(val)
                cluster_sims.append(val)
            doc_cluster_means.append(utils.avg(cluster_sims))
            doc_cluster_std_devs.append(utils.stddev(cluster_sims))
        global_mean = utils.avg(doc_cluster_sims_flat)
        global_stddev = utils.stddev(doc_cluster_sims_flat)
        mean_of_means = utils.avg(doc_cluster_means)
        stddev_of_means = utils.stddev(doc_cluster_means)
        mean_of_stddev = utils.avg(doc_cluster_std_devs)
        stddev_of_stddev = utils.stddev(doc_cluster_std_devs)

        return global_mean, global_stddev, mean_of_means, stddev_of_means, mean_of_stddev, stddev_of_stddev
def clock_gets(number=100):
    times = []
    for i in xrange(number):
        start = clock()
        x=get_message()
        time_taken = clock() - start
        times.append(time_taken)
    median_time = median(times)
    mean_time = mean(times)
    stddev_time = stddev(times)
    return (median_time,mean_time,stddev_time)
示例#12
0
	def print_cluster_cohesion(self):
		print "CLUSTER COHESION:"
		sim_names = self.clusters[0].members[0].get_feature_set_names()[:]
		sim_names.append("confirm")
		print "\t\t%s     SIZE" % ("        ".join(sim_names))
		for x, cluster in enumerate(self.clusters):
			# list of lists
			similarities = map(lambda _doc: _doc.global_sim(cluster.center), cluster.members)
			to_print = list()
			for y in xrange(len(similarities[0])):
				values = map(lambda row: row[y], similarities)
				to_print.append(utils.avg(values))
				to_print.append(utils.stddev(values))
			values = map(lambda _doc: self.confirm.cluster_doc_similarity(cluster, _doc), cluster.members)
			to_print.append(utils.avg(values))
			to_print.append(utils.stddev(values))
			l = len(cluster.members)
			print "\t%s:  %s  %d" % (x, "  ".join(map(lambda s: "%3.2f" % s, to_print)), l)
		print
		print
示例#13
0
def alpha1(df):
    """
    Alpha#1
    (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5) 

    :param df: dataframe
    :return: 
    """
    temp1 = pd.Series(np.where((df.returns < 0), u.stddev(df.returns, 20),
                               df.close),
                      index=df.index)
    return (u.rank(u.ts_argmax(temp1**2, 5)) - 0.5)
示例#14
0
 def print_cluster_cohesion(self):
     print "CLUSTER COHESION:"
     sim_names = self.clusters[0].members[0].get_feature_set_names()[:]
     sim_names.append("confirm")
     print "\t\t%s     SIZE" % ("        ".join(sim_names))
     for x, cluster in enumerate(self.clusters):
         # list of lists
         similarities = map(lambda _doc: _doc.global_sim(cluster.center),
                            cluster.members)
         to_print = list()
         for y in xrange(len(similarities[0])):
             values = map(lambda row: row[y], similarities)
             to_print.append(utils.avg(values))
             to_print.append(utils.stddev(values))
         values = map(
             lambda _doc: self.confirm.cluster_doc_similarity(
                 cluster, _doc), cluster.members)
         to_print.append(utils.avg(values))
         to_print.append(utils.stddev(values))
         l = len(cluster.members)
         print "\t%s:  %s  %d" % (x, "  ".join(
             map(lambda s: "%3.2f" % s, to_print)), l)
     print
     print
示例#15
0
    def get_stat(self, max_cnt_label=None):
        szs = sorted(self.err_log.keys())

        ave = [utils.mean(self.err_log[sz]) for sz in szs]
        dev = [utils.stddev(self.err_log[sz]) for sz in szs]

        idx = [i for i in range(0, len(ave))]
        idx = sorted(idx, key=lambda i: ave[i][0])
        ave = [ave[i] for i in idx]
        dev = [dev[i] for i in idx]

        length = len(ave) if max_cnt_label is None else len(
            [c for c in ave if c[0] <= max_cnt_label * 2])
        self.err_stat = ([c[1]
                          for c in ave[:length]], [c[1] for c in dev[:length]])
        self.label_stat = ([c[0] for c in ave[:length]],
                           [c[0] for c in dev[:length]])
        self.last_max_cnt_label = max_cnt_label
示例#16
0
    def get_stat(self, max_cnt_label=-1, robust=False):
        szs = sorted(self.err_log.keys())
        if robust:
            num = len(self.err_log[szs[0]])
            ave = [np.array([utils.robust_mean([self.err_log[sz][i][tp] for i in range(0, num)])\
                             for tp in (0,1)]) for sz in szs]
            dev = [np.array([utils.robust_stddev([self.err_log[sz][i][tp] for i in range(0, num)])\
                             for tp in (0,1)]) for sz in szs]
        else:
            ave = [utils.mean(self.err_log[sz]) for sz in szs]
            dev = [utils.stddev(self.err_log[sz]) for sz in szs]

        idx = [i for i in range(0, len(ave))]
        idx = sorted(idx, key=lambda i: ave[i][0])
        ave = [ave[i] for i in idx]
        dev = [dev[i] for i in idx]

        length = len(ave) if max_cnt_label < 0 else len(
            [c for c in ave if c[0] <= max_cnt_label * 2])
        self.err_stat = ([c[1]
                          for c in ave[:length]], [c[1] for c in dev[:length]])
        self.label_stat = ([c[0] for c in ave[:length]],
                           [c[0] for c in dev[:length]])
        self.last_max_cnt_label = max_cnt_label
示例#17
0
def esth(vals):
   """
   A reasonable estimate for h is approx. 5 * sigma.
   (i.e. 5 * std. deviation of samples.)
   """
   return 5.0 * utils.stddev(vals)