def _write(instance_context, dest, child_conn): self = instance_context try: with self.open(dest, 'wb') as f: fp = cStringIO.StringIO() buffer_len = 0 while True: data = child_conn.recv() if isinstance(data, EOF): # end of file, break break fp.write(data) buffer_len += len(data) if buffer_len >= block_size: f.write(fp.getvalue()) fp.close() buffer_len = 0 fp = cStringIO.StringIO() # write last segment if buffer_len: f.write(fp.getvalue()) fp.close() except: trace_log() exit(128)
def predict(self): result = [] for code in self.data: volume, chg_p, vma5 = Stock.get_col().index( "volume"), Stock.get_col().index( "chg_p"), Stock.get_col().index("vma5") data_array = np.array(self.data[code]) try: chg_p_array, vma5_array, volume_array = np.float32( data_array[:, chg_p]), np.float32( data_array[:, vma5]), np.float32(data_array[:, volume]) training_data = [] for v in volume_array: training_data.append([v]) training_label = [["down", "up"][int(x > 5.0)] for x in chg_p_array] trend, percent = classify0([vma5_array[-1]], np.array(training_data), training_label, 5) if trend == "up": result.append((code, percent)) except: trace_log() pass sorted_result = sorted(result, key=operator.itemgetter(1), reverse=True) return sorted_result
def predict(self): result = {} for code in self.data: date, close, chg_p = Stock.get_col().index("date"), Stock.get_col().index("close"), Stock.get_col().index("chg_p") try: last_date = self.data[code][-1][0] if last_date != time.strftime("%Y-%m-%d", time.localtime()): continue except: continue data_array = np.array(self.data[code]) try: close_array, chg_p_array = np.float32(data_array[:,close]), np.float32(data_array[:,chg_p]) def gold_predict(close_array): close_array = close_array[-10:] max_close = max(close_array) min_close = min(close_array) if close_array[-1] < max_close and max_close == close_array[-2]: if (max_close - min_close)/min_close > 0.1 : ratio = (max_close - close_array[-1]) / (max_close - min_close) if ratio < (0.382+0.010) and ratio > (0.382-0.010): result[code] = 2*max_close-close_array[-1] return True return False top_5_close_array = close_array[-5:] if not gold_predict(top_5_close_array): top_10_close_array = close_array[-10:] gold_predict(top_10_close_array) except: trace_log() pass return result.items()
def save_all(self): filename = os.path.join(mydir(), "data", time.strftime("%Y%m%d", time.localtime())+".pkl") try: logging.info("dump all the data to file %s" % filename) with open(filename, "w") as f: cPickle.dump(self.data, f) return os.path.basename(filename) except: trace_log() return None
def hdfs_ls(path): tmp = tempfile.mktemp() with open(tmp, 'w') as f: try: subprocess.check_call( ["hdfs", "dfs", "-ls", "-R", "-q", "-h", path], stdout=f, stderr=subprocess.PIPE) except: trace_log() return tmp
def hdfs_rm(path, is_folder): try: if is_folder: return subprocess.check_call( ["hdfs", "dfs", "-rmdir", "--ignore-fail-on-non-empty", path], stderr=subprocess.PIPE) else: return subprocess.check_call(["hdfs", "dfs", "-rm", "-f", path], stderr=subprocess.PIPE) except: logging.error("failed to delete " + path + " due to:") trace_log() return 128
def reload_all(self, name=time.strftime("%Y%m%d", time.localtime())+".pkl"): filename = os.path.join(mydir(), "data", name) if not os.path.exists(filename): self.fast_refresh_all() self.save_all() else: try: logging.info("load all the data from file %s" % filename) with open(filename, "r") as f: self.data = cPickle.load(f) except: trace_log() self.fast_refresh_all() self.save_all() return self.data
def predict(self): result = {} for code in self.data: volume, chg_p, vma5, ma5, close, high = Stock.get_col().index( "volume"), Stock.get_col().index("chg_p"), Stock.get_col( ).index("vma5"), Stock.get_col().index("ma5"), Stock.get_col( ).index("close"), Stock.get_col().index("high") try: last_date = self.data[code][-1][0] if last_date != time.strftime("%Y-%m-%d", time.localtime()): continue except: continue data_array = np.array(self.data[code]) try: chg_p_array, vma5_array, volume_array, ma5_array, close_array, high_array = np.float32( data_array[:, chg_p]), np.float32( data_array[:, vma5]), np.float32( data_array[:, volume]), np.float32( data_array[:, ma5]), np.float32( data_array[:, close]), np.float32( data_array[:, high]) # 判断跳空 jumped = False for i in range(-1, -10, -1): if high_array[i] < close_array[i - 1]: jumped = True break if jumped and chg_p_array[-1] > 3.0: down_number = 0 for c in chg_p_array[-10:]: if c < 0.0: down_number += 1 result[code] = down_number except: trace_log() pass sorted_result = sorted(result.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted_result
def verify(self): up5 = 0 up1 = 0 up9 = 0 up = [] chg_p = Stock.get_col().index("chg_p") for code in self.up_code: data_array = np.array(self.data[code]) try: chg_p_array = np.float32(data_array[:, chg_p]) if chg_p_array[-1] >= 1.0: up1 += 1 up.append((code, chg_p_array[-1])) if chg_p_array[-1] >= 5.0: up5 += 1 if chg_p_array[-1] >= 9.0: up9 += 1 except: logging.error("%s failed" % code) trace_log() print "up5 = %.2f, up1 = %.2f, up9 = %.2f" % ( float(up5) / float(len(self.up_code)), float(up1) / float(len(self.up_code)), float(up9) / float(len(self.up_code))) print up
def put_with_conversion(self, src, dest, from_encoding=None, to_encoding=None, regex=None): block_size = 64*2**20 # sub-process to write HDFS def _write(instance_context, dest, child_conn): self = instance_context try: with self.open(dest, 'wb') as f: fp = cStringIO.StringIO() buffer_len = 0 while True: data = child_conn.recv() if isinstance(data, EOF): # end of file, break break fp.write(data) buffer_len += len(data) if buffer_len >= block_size: f.write(fp.getvalue()) fp.close() buffer_len = 0 fp = cStringIO.StringIO() # write last segment if buffer_len: f.write(fp.getvalue()) fp.close() except: trace_log() exit(128) parent_conn, child_conn = Pipe() child = Process(target=_write, args=(self, dest, child_conn)) child.start() time_start = time.time() # parent handle the file conversion and write to sub-process with codecs.open(src, 'r', from_encoding) as f2: fp = cStringIO.StringIO() buffer_len = 0 total_lines = 0 try: for line in f2: out = self._string_transcoding(from_encoding, to_encoding, self._regex_sub(regex, line)) if len(out) == 0: continue fp.write(out) buffer_len += len(out) total_lines += 1 if buffer_len >= block_size: parent_conn.send(fp.getvalue()) fp.close() buffer_len = 0 fp = cStringIO.StringIO() # send last segment if buffer_len: parent_conn.send(fp.getvalue()) fp.close() parent_conn.send(EOF()) except: parent_conn.send(EOF()) while child.is_alive(): child.terminate() time.sleep(1) trace_log() raise Exception("parent got exception") while child.is_alive(): time.sleep(1) if child.exitcode != 0: raise Exception("child thread return non-zero value") else: time_end = time.time() return [src, dest, "succeed", total_lines, int(time_end - time_start)]
dest_file = dest + os.path.basename(src) if dest.endswith( '/') else dest + '/' + os.path.basename(src) files_list.append((src, dest_file)) else: files_list.append((src, dest)) hdfs.disconnect() # handle the src, dest in the files_list for src_file, dest_file in files_list: logging.info("hdfs put from src(%s) to hdfs(%s)" % (src_file, dest_file)) try: hdfs = HDFileSystemExt() succeed.append( hdfs.put_with_conversion(src_file, dest_file, from_encoding, to_encoding, regex)) hdfs.disconnect() except: failed.append([src_file, dest_file, "failed", None, None]) logging.error("failed to put src(%s) to hdfs(%s) due to" % (src_file, dest_file)) trace_log() # print out the result summary headers = ["src", "dest", "result", "total lines", "time(second)"] logging.info(tabulate(succeed + failed, headers, tablefmt="grid")) except: trace_log() exit(128)