Пример #1
0
 def _write(instance_context, dest, child_conn):
     self = instance_context
     try:
         with self.open(dest, 'wb') as f:
             fp = cStringIO.StringIO()
             buffer_len = 0
             while True:
                 data = child_conn.recv()
                 if isinstance(data, EOF):
                     # end of file, break
                     break
                 fp.write(data)
                 buffer_len += len(data)
                 if buffer_len >= block_size:
                     f.write(fp.getvalue())
                     fp.close()
                     buffer_len = 0
                     fp = cStringIO.StringIO()
             # write last segment
             if buffer_len:
                 f.write(fp.getvalue())
                 fp.close()
     except:
         trace_log()
         exit(128)
Пример #2
0
 def predict(self):
     result = []
     for code in self.data:
         volume, chg_p, vma5 = Stock.get_col().index(
             "volume"), Stock.get_col().index(
                 "chg_p"), Stock.get_col().index("vma5")
         data_array = np.array(self.data[code])
         try:
             chg_p_array, vma5_array, volume_array = np.float32(
                 data_array[:, chg_p]), np.float32(
                     data_array[:, vma5]), np.float32(data_array[:, volume])
             training_data = []
             for v in volume_array:
                 training_data.append([v])
             training_label = [["down", "up"][int(x > 5.0)]
                               for x in chg_p_array]
             trend, percent = classify0([vma5_array[-1]],
                                        np.array(training_data),
                                        training_label, 5)
             if trend == "up":
                 result.append((code, percent))
         except:
             trace_log()
             pass
     sorted_result = sorted(result,
                            key=operator.itemgetter(1),
                            reverse=True)
     return sorted_result
Пример #3
0
 def predict(self):
     result = {}
     for code in self.data:
         date, close, chg_p = Stock.get_col().index("date"), Stock.get_col().index("close"), Stock.get_col().index("chg_p")
         try:
             last_date = self.data[code][-1][0]
             if last_date != time.strftime("%Y-%m-%d", time.localtime()):
                 continue
         except:
             continue
         data_array = np.array(self.data[code])
         try:
             close_array, chg_p_array = np.float32(data_array[:,close]), np.float32(data_array[:,chg_p])
             def gold_predict(close_array):
                 close_array = close_array[-10:]
                 max_close = max(close_array)
                 min_close = min(close_array)
                 if close_array[-1] < max_close and max_close == close_array[-2]:
                     if (max_close - min_close)/min_close > 0.1 :
                         ratio = (max_close - close_array[-1]) / (max_close - min_close)
                         if ratio < (0.382+0.010) and ratio > (0.382-0.010):
                             result[code] = 2*max_close-close_array[-1]
                             return True
                 return False
             top_5_close_array = close_array[-5:]
             if not gold_predict(top_5_close_array):
                 top_10_close_array = close_array[-10:]
                 gold_predict(top_10_close_array)
         except:
             trace_log()
             pass
     return result.items()
Пример #4
0
 def save_all(self):
     filename = os.path.join(mydir(), "data", time.strftime("%Y%m%d", time.localtime())+".pkl")
     try:
         logging.info("dump all the data to file %s" % filename)
         with open(filename, "w") as f:
             cPickle.dump(self.data, f)
             return os.path.basename(filename)
     except:
         trace_log()
         return None
Пример #5
0
def hdfs_ls(path):
    tmp = tempfile.mktemp()
    with open(tmp, 'w') as f:
        try:
            subprocess.check_call(
                ["hdfs", "dfs", "-ls", "-R", "-q", "-h", path],
                stdout=f,
                stderr=subprocess.PIPE)
        except:
            trace_log()
        return tmp
Пример #6
0
def hdfs_rm(path, is_folder):
    try:
        if is_folder:
            return subprocess.check_call(
                ["hdfs", "dfs", "-rmdir", "--ignore-fail-on-non-empty", path],
                stderr=subprocess.PIPE)
        else:
            return subprocess.check_call(["hdfs", "dfs", "-rm", "-f", path],
                                         stderr=subprocess.PIPE)
    except:
        logging.error("failed to delete " + path + " due to:")
        trace_log()
        return 128
Пример #7
0
 def reload_all(self, name=time.strftime("%Y%m%d", time.localtime())+".pkl"):
     filename = os.path.join(mydir(), "data", name)
     if not os.path.exists(filename):
         self.fast_refresh_all()
         self.save_all()
     else:
         try:
             logging.info("load all the data from file %s" % filename)
             with open(filename, "r") as f:
                 self.data = cPickle.load(f)
         except:
             trace_log()
             self.fast_refresh_all()
             self.save_all()
     return self.data
Пример #8
0
 def predict(self):
     result = {}
     for code in self.data:
         volume, chg_p, vma5, ma5, close, high = Stock.get_col().index(
             "volume"), Stock.get_col().index("chg_p"), Stock.get_col(
             ).index("vma5"), Stock.get_col().index("ma5"), Stock.get_col(
             ).index("close"), Stock.get_col().index("high")
         try:
             last_date = self.data[code][-1][0]
             if last_date != time.strftime("%Y-%m-%d", time.localtime()):
                 continue
         except:
             continue
         data_array = np.array(self.data[code])
         try:
             chg_p_array, vma5_array, volume_array, ma5_array, close_array, high_array = np.float32(
                 data_array[:, chg_p]), np.float32(
                     data_array[:, vma5]), np.float32(
                         data_array[:, volume]), np.float32(
                             data_array[:, ma5]), np.float32(
                                 data_array[:, close]), np.float32(
                                     data_array[:, high])
             # 判断跳空
             jumped = False
             for i in range(-1, -10, -1):
                 if high_array[i] < close_array[i - 1]:
                     jumped = True
                     break
             if jumped and chg_p_array[-1] > 3.0:
                 down_number = 0
                 for c in chg_p_array[-10:]:
                     if c < 0.0:
                         down_number += 1
                 result[code] = down_number
         except:
             trace_log()
             pass
     sorted_result = sorted(result.iteritems(),
                            key=operator.itemgetter(1),
                            reverse=True)
     return sorted_result
Пример #9
0
 def verify(self):
     up5 = 0
     up1 = 0
     up9 = 0
     up = []
     chg_p = Stock.get_col().index("chg_p")
     for code in self.up_code:
         data_array = np.array(self.data[code])
         try:
             chg_p_array = np.float32(data_array[:, chg_p])
             if chg_p_array[-1] >= 1.0:
                 up1 += 1
                 up.append((code, chg_p_array[-1]))
             if chg_p_array[-1] >= 5.0:
                 up5 += 1
             if chg_p_array[-1] >= 9.0:
                 up9 += 1
         except:
             logging.error("%s failed" % code)
             trace_log()
     print "up5 = %.2f, up1 = %.2f, up9 = %.2f" % (
         float(up5) / float(len(self.up_code)), float(up1) /
         float(len(self.up_code)), float(up9) / float(len(self.up_code)))
     print up
Пример #10
0
    def put_with_conversion(self, src, dest, from_encoding=None, to_encoding=None, regex=None):
        block_size = 64*2**20

        # sub-process to write HDFS
        def _write(instance_context, dest, child_conn):
            self = instance_context
            try:
                with self.open(dest, 'wb') as f:
                    fp = cStringIO.StringIO()
                    buffer_len = 0
                    while True:
                        data = child_conn.recv()
                        if isinstance(data, EOF):
                            # end of file, break
                            break
                        fp.write(data)
                        buffer_len += len(data)
                        if buffer_len >= block_size:
                            f.write(fp.getvalue())
                            fp.close()
                            buffer_len = 0
                            fp = cStringIO.StringIO()
                    # write last segment
                    if buffer_len:
                        f.write(fp.getvalue())
                        fp.close()
            except:
                trace_log()
                exit(128)

        parent_conn, child_conn = Pipe()
        child = Process(target=_write, args=(self, dest, child_conn))
        child.start()

        time_start = time.time()
        # parent handle the file conversion and write to sub-process
        with codecs.open(src, 'r', from_encoding) as f2:
            fp = cStringIO.StringIO()
            buffer_len = 0
            total_lines = 0
            try:
                for line in f2:
                    out = self._string_transcoding(from_encoding, to_encoding, self._regex_sub(regex, line))
                    if len(out) == 0:
                        continue
                    fp.write(out)
                    buffer_len += len(out)
                    total_lines += 1
                    if buffer_len >= block_size:
                        parent_conn.send(fp.getvalue())
                        fp.close()
                        buffer_len = 0
                        fp = cStringIO.StringIO()
                # send last segment
                if buffer_len:
                    parent_conn.send(fp.getvalue())
                    fp.close()
                parent_conn.send(EOF())
            except:
                parent_conn.send(EOF())
                while child.is_alive():
                    child.terminate()
                    time.sleep(1)
                trace_log()
                raise Exception("parent got exception")

            while child.is_alive():
                time.sleep(1)
            if child.exitcode != 0:
                raise Exception("child thread return non-zero value")
            else:
                time_end = time.time()
                return [src, dest, "succeed", total_lines, int(time_end - time_start)]
Пример #11
0
                dest_file = dest + os.path.basename(src) if dest.endswith(
                    '/') else dest + '/' + os.path.basename(src)
                files_list.append((src, dest_file))
            else:
                files_list.append((src, dest))
        hdfs.disconnect()

        # handle the src, dest in the files_list
        for src_file, dest_file in files_list:
            logging.info("hdfs put from src(%s) to hdfs(%s)" %
                         (src_file, dest_file))
            try:
                hdfs = HDFileSystemExt()
                succeed.append(
                    hdfs.put_with_conversion(src_file, dest_file,
                                             from_encoding, to_encoding,
                                             regex))
                hdfs.disconnect()
            except:
                failed.append([src_file, dest_file, "failed", None, None])
                logging.error("failed to put src(%s) to hdfs(%s) due to" %
                              (src_file, dest_file))
                trace_log()

        # print out the result summary
        headers = ["src", "dest", "result", "total lines", "time(second)"]
        logging.info(tabulate(succeed + failed, headers, tablefmt="grid"))
    except:
        trace_log()
        exit(128)