def compute_zscore(): con = common.getDBConnection() cur = con.cursor() sql = "select sequence_id,stock_index,date,one_day_change from t_daily_stockindices order by sequence_id asc " cur.execute(sql) rows = cur.fetchall() updateCur = con.cursor() updateSql = "update t_daily_stockindices set zscore30 = ? ,zscore90 = ? where stock_index=? and sequence_id = ?" i = 0 for row in rows: sequenceId = row[0] stockIndex = row[1] date = row[2] oneDayChange = row[3] zscore30 = getZscore(date,stockIndex,oneDayChange,30) zscore90 = getZscore(date,stockIndex,oneDayChange,90) updateCur.execute(updateSql,(zscore30,zscore90,stockIndex,sequenceId)) i = i + 1 if i % 300 == 0: con.commit() print "commit" con.commit()
def init(): global con global cur con = common.getDBConnection() cur = con.cursor() logs.init()
def insert_database( self ): try: con = common.getDBConnection() cur = con.cursor() dataList = self.read_data_from_excel() #clear the old data firstly clearSql = "delete from t_data_source" cur.execute(clearSql) con.commit() sqlString = "insert into t_data_source(title,post_date,content,stock_index,source,update_time,country,eventCode,population,url) values(?,?,?,?,?,datetime('now','localtime'),?,?,?,?)" for item in dataList: title = item["title"] post_date = item["post_date"] content = item["content"] post_date = item["post_date"] stock_index = self.conver_to_stock_index( item["country"] ) source = item["source"] country = item["country"] eventCode = item["eventCode"] population = item["population"] url = item["url"] cur.execute( sqlString, ( title, post_date, content, stock_index, source, country, eventCode, population, url ) ) con.commit() return None except lite.Error, e: print "Error: %s" % e.args[0]
def execute(): try: con = common.getDBConnection() cur = con.cursor() stockIndexValues = [] sql = "select sub_sequence,date,last_price,one_day_change,zscore30,zscore90,stock_index from t_daily_stockindices" cur.execute(sql) rows = cur.fetchall() for row in rows: indexValue = {} indexValue["sub_sequence"] = row[0] indexValue["date"] = row[1] indexValue["lastPrice"] = row[2] indexValue["oneDayChange"] = row[3] indexValue["zscore30"] = row[4] indexValue["zscore90"] = row[5] indexValue["stockIndex"] = row[6] embersId = hashlib.sha1(json.dumps(indexValue)).hexdigest() indexValue["embersId"] = embersId stockIndexValues.append(indexValue) insertSql = "insert into t_daily_stockindex(embers_id,sub_sequence,stock_index,date,last_price,one_day_change,zscore30,zscore90) values (?,?,?,?,?,?,?,?) " for stock in stockIndexValues: cur.execute(insertSql,(stock["embersId"], stock["sub_sequence"], stock["stockIndex"],stock["date"],stock["lastPrice"],stock["oneDayChange"],stock["zscore30"],stock["zscore90"])) con.commit() except lite.Error, e: print "Error: %s" % e.args[0]
def execute(): try: con = common.getDBConnection() cur = con.cursor() stockIndexValues = [] sql = "select sub_sequence,date,last_price,one_day_change,zscore30,zscore90,stock_index from t_daily_stockindices" cur.execute(sql) rows = cur.fetchall() for row in rows: indexValue = {} indexValue["sub_sequence"] = row[0] indexValue["date"] = row[1] indexValue["lastPrice"] = row[2] indexValue["oneDayChange"] = row[3] indexValue["zscore30"] = row[4] indexValue["zscore90"] = row[5] indexValue["stockIndex"] = row[6] embersId = hashlib.sha1(json.dumps(indexValue)).hexdigest() indexValue["embersId"] = embersId stockIndexValues.append(indexValue) insertSql = "insert into t_daily_stockindex(embers_id,sub_sequence,stock_index,date,last_price,one_day_change,zscore30,zscore90) values (?,?,?,?,?,?,?,?) " for stock in stockIndexValues: cur.execute( insertSql, (stock["embersId"], stock["sub_sequence"], stock["stockIndex"], stock["date"], stock["lastPrice"], stock["oneDayChange"], stock["zscore30"], stock["zscore90"])) con.commit() except lite.Error, e: print "Error: %s" % e.args[0]
def main(): args = arg_parser() news_file = args.news_file conn = common.getDBConnection() initiate(news_file,conn) if conn: conn.close()
def insert_warningmessage(warningMessage): try: con = common.getDBConnection() cur = con.cursor() "If the warning is already in database, do not need to insert" checkSql = "select count(*) from t_warningmessage where embers_id = ?" embersId = warningMessage["embersId"] cur.execute(checkSql, (embersId, )) count = cur.fetchone()[0] if count == 0: insertSql = "insert into t_warningmessage (embers_id,derived_from,model,event_type,confidence,confidence_isprobability,\ event_date,location,population) values (?,?,?,?,?,?,?,?,?)" derivedFrom = json.dumps(warningMessage["derivedFrom"]) model = warningMessage["model"] eventType = warningMessage["eventType"] confidence = warningMessage["confidence"] confidenceIsProbability = warningMessage["confidenceIsProbability"] eventDate = warningMessage["eventDate"] population = warningMessage["population"] location = warningMessage["location"] cur.execute( insertSql, (embersId, derivedFrom, model, eventType, confidence, confidenceIsProbability, eventDate, population, location)) con.commit() except Exception as e: print "Error: %s" % e.args[0] finally: if con: con.close()
def clusterSet(traingingStart,traningEndDate): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = ["MERVAL","MEXBOL","CHILE65","BVPSBVPS","COLCAP","CRSMBCT","IBOV","IGBVL","IBVC"] for stock in stockList: sql = "select embers_id,post_date,current_value,previous_close_value,round(current_value-previous_close_value,4),round((current_value-previous_close_value)/previous_close_value,4),name from t_bloomberg_prices where name=? and post_date<=? and post_date>=? order by post_date asc" cur.execute(sql,(stock,traningEndDate,traingingStart)) rows = cur.fetchall() "The number of rows to be committed for each interval" committedInterval = 0 for row in rows: newRow = list(row) "Insert the pre-enriched stock index data into Database" InitiateEnrichedData(con, committedInterval, newRow) finalClusterRecord.append(newRow) con.commit() "Write the training data into file" trendSetRecordFile = common.get_configuration("training", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile,"w") as output: output.write(dataStr) if con: con.close()
def insert_warningmessage(warningMessage): try: con = common.getDBConnection() cur = con.cursor() "If the warning is already in database, do not need to insert" checkSql = "select count(*) from t_warningmessage where embers_id = ?" embersId = warningMessage["embersId"] cur.execute(checkSql,(embersId,)) count = cur.fetchone()[0] if count == 0: insertSql = "insert into t_warningmessage (embers_id,derived_from,model,event_type,confidence,confidence_isprobability,\ event_date,location,population) values (?,?,?,?,?,?,?,?,?)" derivedFrom = json.dumps(warningMessage["derivedFrom"]) model = warningMessage["model"] eventType = warningMessage["eventType"] confidence = warningMessage["confidence"] confidenceIsProbability = warningMessage["confidenceIsProbability"] eventDate = warningMessage["eventDate"] population = warningMessage["population"] location = warningMessage["location"] cur.execute(insertSql,(embersId,derivedFrom,model,eventType,confidence,confidenceIsProbability,eventDate,population,location)) con.commit() except Exception as e: print "Error: %s" % e.args[0] finally: if con: con.close()
def main(): args = arg_parser() r_dir = args.r_dir e_dir = args.e_dir conn = common.getDBConnection() clear(conn) initiate(r_dir,e_dir,conn) if conn: conn.close()
def get_db_connection(): global cur global con try: con = common.getDBConnection() con.text_factory = str cur = con.cursor() except lite.Error, e: print "Error: %s" % e.args[0]
def clusterSet(traingingStart,traningEndDate,clu_num): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = ["MERVAL","MEXBOL","CHILE65","BVPSBVPS","COLCAP","CRSMBCT","IBOV","IGBVL","IBVC"] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,post_date,current_value,previous_close_value,one_day_change,change_percent,name from t_enriched_bloomberg_prices where name=? and post_date<=? and post_date>=? order by post_date asc" cur.execute(sql,(stock,traningEndDate,traingingStart)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0,x) for x in fdist.keys()] c1 = KMeansClustering(clusterS) cluster = c1.getclusters(clu_num) "The sample data of cluster by the KMeans algorithm" # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1],max(clu)[1]] "The number of rows to be committed for each interval" committedInterval=0 for row in rows: for nc in namedCluster: if (0,row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) "update the trend type into Database" UpdateEnrichedData(con, committedInterval, newRow) finalClusterRecord.append(newRow) con.commit() finalOrderCluster[stock] = orderCluster print stock, " Done" "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile,"w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("training", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile,"w") as output: output.write(dataStr) if con: con.close()
def getZscore(curDate,stockIndex,curDiff,duration): con = common.getDBConnection() cur = con.cursor() scores = [] sql = "select one_day_change from t_daily_stockindices where date<? and stock_index = ? order by date desc limit ?" cur.execute(sql,(curDate,stockIndex,duration)) rows = cur.fetchall() for row in rows: scores.append(row[0]) zscore = calculator.calZscore(scores, curDiff) return zscore
def conver_to_stock_index( self, country ): con = common.getDBConnection() cur = con.cursor() sqlString = "select stock_index from s_stock_country where country = ?" cur.execute( sqlString, ( [country] ) ) stock_index = cur.fetchone() return stock_index[0]
def insert_prediction_into_database( self, predictionResults ): con = common.getDBConnection() cur = con.cursor() for predictionResult in predictionResults: embersId = predictionResult["embers_id"] eventCode = predictionResult["eventCode"] stock_index = predictionResult["stock_index"] post_date = predictionResult["post_date"] sqlString = "insert into t_prediction_result(embers_id, post_date, stock_index, update_time,eventCode) values(?,?,?,datetime('now','localtime'),?)" cur.execute( sqlString, ( embersId, post_date, stock_index, eventCode ) ) con.commit()
def main(): #get stock index values cfgPath = "../Config/config.cfg" common.init(cfgPath) con = common.getDBConnection() cur = con.cursor() sql = "select stock_index,date,zscore30,zscore90,one_day_change/(last_price-one_day_change) from t_daily_stockindex where date<'2011-01-01' order by date asc" cur.execute(sql) rows = cur.fetchall() stockBasket = {} tmpStockBasket = {} for row in rows: stockIndex = row[0] date = row[1] zscore30 = row[2] zscore90 = row[3] lastPrice = row[4] if stockIndex not in stockBasket: stockBasket[stockIndex] = [] tmpStockBasket[stockIndex] = {} dayValue = {} dayValue["date"] = date dayValue["zscore30"] = zscore30 dayValue["zscore90"] = zscore90 dayValue["lastPrice"] = lastPrice stockBasket[stockIndex].append(dayValue) tmpStockBasket[stockIndex][date] = [zscore30,zscore90,lastPrice] con.close() stockList = [] #compute the pearson for each two stock for stockIndex in stockBasket: stockList.append(stockIndex) print stockList for i in range(0,9): for j in range(i+1,9): print stockList[i],stockList[j] stock1 = tmpStockBasket[stockList[i]] stock2 = tmpStockBasket[stockList[j]] compare_stock(stock1,stock2) for i in range(0,9): for j in range(0,9): print stockList[i],stockList[j] stock1 = stockBasket[stockList[i]] stock2 = stockBasket[stockList[j]] compare_stock_shift(stockList[i],stockList[j],stock1,stock2,0)
def export_test_stock_data(estimationStart,estimationEnd): con = common.getDBConnection() cur = con.cursor() sql = "select embers_id,type,name,current_value,previous_close_value,update_time,query_time,post_date,source from t_bloomberg_prices where post_date>=? and post_date<=?" cur.execute(sql,(estimationStart,estimationEnd,)) results = cur.fetchall() for row in results: embers_id = row[0] ty = row[1] name = row[2] update_time = row[5] last_price = float(row[3]) pre_last_price = float(row[4]) one_day_change = round(last_price - pre_last_price,4) query_time = row[6] source = row[8] post_date = row[7] "Initiate the enriched Data" enrichedData = {} "calculate zscore 30 and zscore 90" zscore30 = getZscore(con,post_date,name,one_day_change,30) zscore90 = getZscore(con,post_date,name,one_day_change,90) changePercent = round((last_price - pre_last_price)/pre_last_price,4) trend_type = get_trend_type(name,changePercent) derived_from = "[" + embers_id + "]" enrichedData["derivedFrom"] = derived_from enrichedData["type"] = ty enrichedData["name"] = name enrichedData["postDate"] = post_date enrichedData["currentValue"] = last_price enrichedData["previousCloseValue"] = pre_last_price enrichedData["oneDayChange"] = one_day_change enrichedData["changePercent"] = round((last_price - pre_last_price)/pre_last_price,4) enrichedData["trendType"] = trend_type enrichedData["zscore30"] = zscore30 enrichedData["zscore90"] = zscore90 enrichedData["operateTime"] = datetime.now().isoformat() enrichedDataEmID = hashlib.sha1(json.dumps(enrichedData)).hexdigest() enrichedData["embersId"] = enrichedDataEmID insert_enriched_data(con,enrichedData) con.commit()
def import_history(): hisFile = common.get_configuration("training", "HISTORICAL_STOCK_JSON") raw_price_list = [] with open(hisFile,'r') as raw_file: lines = raw_file.readlines() for line in lines: raw_data = json.loads(line.replace("\n","").replace("\r","")) raw_price_list.append(raw_data) conn = common.getDBConnection() #process data one by one for raw_data in raw_price_list: process(conn,raw_data) if conn: conn.commit()
def data_clear(): con = common.getDBConnection() cur = con.cursor() "clear the surrogate data" clearSql = "delete from t_surrogatedata" cur.execute(clearSql) con.commit() "clear the warning data" clearSql = "delete from t_warningmessage" cur.execute(clearSql) con.commit() time.sleep(3) if con: con.close()
def check_if_tradingday(self,predictiveDate,stockIndex): "Check if the day weekend" weekDay = datetime.strptime(predictiveDate,"%Y-%m-%d").weekday() if weekDay == 5 or weekDay == 6: log.info("%s For %s is Weekend, Just Skip!" %(predictiveDate,stockIndex)) return False "Check if the day is holiday" con = common.getDBConnection() cur = con.cursor() sql = "select count(*) from s_holiday a,s_stock_country b where a.country = b.country\ and b.stock_index=? and a.date = ?" cur.execute(sql,(stockIndex,predictiveDate)) count = cur.fetchone()[0] if count == 0: return True else: log.info( "%s For %s is Holiday, Just Skip!" %(predictiveDate,stockIndex)) return False
def check_enrichedata_existed(embersId): try: con = common.getDBConnection() cur = con.cursor() flag = True sql = "select count(*) count from t_daily_enrichednews where embers_id=?" cur.execute(sql,(embersId,)) count = cur.fetchone()[0] count = int(count) if count == 0: flag = False else: flag = True except lite.ProgrammingError as e: log.info( e ) except: log.info( "Error: %s" %sys.exc_info()[0]) finally: return flag
def get_stock_index_cluster(self, predictiveDate, stockIndex): con = None try: con = common.getDBConnection() cur = con.cursor() sqlquery = "select trend_type,embers_id from t_daily_enrichedIndex where date < ? and stock_index = ? order by date desc limit 3" cur.execute(sqlquery, (predictiveDate, stockIndex)) rows = cur.fetchall() trendTypeList = [] derivedFrom = [] for row in rows: trendTypeList.append(row[0]) derivedFrom.append(row[1]) return trendTypeList, derivedFrom except sqlite.Error, e: print traceback.format_exc() print "Error: %s" % e.args[0]
def get_stock_index_cluster( self, predictiveDate, stockIndex ): con = None try: con = common.getDBConnection() cur = con.cursor() sqlquery = "select trend_type,embers_id from t_daily_enrichedIndex where date < ? and stock_index = ? order by date desc limit 3" cur.execute( sqlquery, ( predictiveDate, stockIndex ) ) rows = cur.fetchall() trendTypeList = [] derivedFrom = [] for row in rows: trendTypeList.append( row[0] ) derivedFrom.append(row[1]) return trendTypeList,derivedFrom except sqlite.Error, e: log.info( traceback.format_exc()) log.info( "Error: %s" % e.args[0])
def testPhase(stock1,stock2,z30m,z30c,z90m,z90c): cfgPath = "../Config/config.cfg" common.init(cfgPath) con = common.getDBConnection() cur = con.cursor() sql = "select stock_index,date,zscore30,zscore90,one_day_change/(last_price-one_day_change) from t_daily_stockindex where date>='2011-01-01' and stock_index=? order by date asc" cur.execute(sql,(stock1,)) rows = cur.fetchall() for row in rows: date = row[1] zscore30 = float(row[2]) zscore90 = float(row[3]) predictZ30 = z30m * zscore30 + z30c predictZ90 = z90m * zscore90 + z90c if abs(predictZ30) >= 4 or abs(predictZ90) >= 3: print "{} : using {} to predict {} z30: {} z90: {}".format(date,stock1,stock2,predictZ30,predictZ90) con.close()
def data_clear(): con = common.getDBConnection() cur = con.cursor() "clear the stock index raw data" clearSql = "delete from t_daily_stockindex" cur.execute(clearSql) con.commit() "clear the raw news data" clearSql = "delete from t_daily_news" cur.execute(clearSql) con.commit() "clear the stock index enriched data" clearSql = "delete from t_daily_enrichedIndex" cur.execute(clearSql) con.commit() "clear the mission table data" clearSql = "delete from t_news_process_mission" cur.execute(clearSql) con.commit() "clear the news Enriched data" clearSql = "delete from t_daily_enrichednews" cur.execute(clearSql) con.commit() "clear the surrogate data" clearSql = "delete from t_surrogatedata" cur.execute(clearSql) con.commit() "clear the warning data" clearSql = "delete from t_warningmessage" cur.execute(clearSql) con.commit() time.sleep(3) if con: con.close()
def data_clear(): con = common.getDBConnection() cur = con.cursor() "clear the stock index raw data" clearSql = "delete from t_bloomberg_prices" cur.execute(clearSql) con.commit() "clear the raw news data" clearSql = "delete from t_daily_news" cur.execute(clearSql) con.commit() "clear the stock index enriched data" clearSql = "delete from t_enriched_bloomberg_prices" cur.execute(clearSql) con.commit() "clear the mission table data" clearSql = "delete from t_news_process_mission" cur.execute(clearSql) con.commit() "clear the news Enriched data" clearSql = "delete from t_daily_enrichednews" cur.execute(clearSql) con.commit() "clear the surrogate data" clearSql = "delete from t_surrogatedata" cur.execute(clearSql) con.commit() "clear the warning data" clearSql = "delete from t_warningmessage" cur.execute(clearSql) con.commit() time.sleep(3) if con: con.close()
def insert_surrogatedata(self, surrogateData): try: con = common.getDBConnection() cur = con.cursor() "If the surrogate data is already in database, do not need to insert" checkSql = "select count(*) from t_surrogatedata where embers_id = ?" embersId = surrogateData["embersId"] cur.execute(checkSql, (embersId, )) count = cur.fetchone()[0] if count == 0: insertSql = "insert into t_surrogatedata (embers_id,derived_from,shift_date,shift_type,confidence,\ strength,location,date,model,value_spectrum,confidence_isprobability,population) values \ (?,?,?,?,?,?,?,?,?,?,?,?)" embersId = surrogateData["embersId"] derivedFrom = json.dumps(surrogateData["derivedFrom"]) shiftDate = surrogateData["shiftDate"] shiftType = surrogateData["shiftType"] confidence = surrogateData["confidence"] strength = surrogateData["strength"] location = surrogateData["location"] date = surrogateData["date"] model = surrogateData["model"] valueSpectrum = surrogateData["valueSpectrum"] confidenceIsPrabability = surrogateData[ "confidenceIsProbability"] population = surrogateData["population"] cur.execute( insertSql, (embersId, derivedFrom, shiftDate, shiftType, confidence, strength, location, date, model, valueSpectrum, confidenceIsPrabability, population)) con.commit() except Exception as e: print "Error: %s" % e.args[0] finally: if con: con.close()
def execute(): try: con = common.getDBConnection() con.text_factory = str cur = con.cursor() config = ConfigParser.ConfigParser() with open('../Config/config.cfg','r') as cfgFile: config.readfp(cfgFile) newsAlreadDownloadFilePath = config.get("info", "newsAlreadyDownload") newsAlreadyDownload = [] sql = "select title from t_daily_news" cur.execute(sql) rows = cur.fetchall() for row in rows: newsAlreadyDownload.append(row[0]) with open(newsAlreadDownloadFilePath,"w") as output: output.write(json.dumps(newsAlreadyDownload)) except lite.Error, e: print "Error: %s" % e.args[0]
def get_stock_news_data(self, predictiveDate, stockIndex): con = None try: con = common.getDBConnection() cur = con.cursor() "Get past 3 day's news before Predictive Day " predictiveDate = datetime.strptime(predictiveDate, "%Y-%m-%d") startDay = (predictiveDate - timedelta(days=3)).strftime("%Y-%m-%d") endDay = (predictiveDate - timedelta(days=1)).strftime("%Y-%m-%d") sqlquery = "select content,embers_id from t_daily_enrichednews where post_date>=? and post_date<=? and stock_index=?" cur.execute(sqlquery, ([startDay, endDay, stockIndex])) articleRecords = cur.fetchall() "Initiate the words List" vocabularyFile = open( common.get_configuration("model", 'VOCABULARY_FILE')) wordLines = vocabularyFile.readlines() termList = {} for line in wordLines: line = line.replace("\n", "").replace("\r", "") termList[line] = 0 newsDerived = [] "Merge all the term in each record" for record in articleRecords: jsonRecord = json.loads(record[0]) newsDerived.append(record[1]) for curWord in jsonRecord: if curWord in termList: termList[ curWord] = termList[curWord] + jsonRecord[curWord] return termList, newsDerived except sqlite.Error, e: print traceback.format_exc() print "Error: %s" % e.args[0]
def check_if_tradingday(self, predictiveDate, stockIndex): "Check if the day weekend" weekDay = datetime.strptime(predictiveDate, "%Y-%m-%d").weekday() if weekDay == 5 or weekDay == 6: print "%s For %s is Weekend, Just Skip!" % (predictiveDate, stockIndex) return False "Check if the day is holiday" con = common.getDBConnection() cur = con.cursor() sql = "select count(*) from s_holiday a,s_stock_country b where a.country = b.country\ and b.stock_index=? and a.date = ?" cur.execute(sql, (stockIndex, predictiveDate)) count = cur.fetchone()[0] if count == 0: return True else: print "%s For %s is Holiday, Just Skip!" % (predictiveDate, stockIndex) return False
def export_test_stock_data(startDate): con = common.getDBConnection() cur = con.cursor() sql = "select embers_id,sub_sequence,stock_index,date,last_price,one_day_change from t_daily_stockindex where date>=?" cur.execute(sql, (startDate, )) results = cur.fetchall() for row in results: derEmbersId = row[0] subSequence = row[1] stockIndex = row[2] date = row[3] lastPrice = row[4] oneDayChange = row[5] derivedFrom = "[" + derEmbersId + "]" changePercent = round(oneDayChange / (lastPrice - oneDayChange), 4) trendType = get_trend_type(stockIndex, changePercent) enrichedData = {} enrichedData["derivedFrom"] = derivedFrom enrichedData["stockIndex"] = stockIndex enrichedData["date"] = date enrichedData["lastPrice"] = lastPrice enrichedData["oneDayChange"] = oneDayChange enrichedData["changePercent"] = changePercent enrichedData["trendType"] = trendType enrichedData["subsequenceId"] = subSequence enrichedDataEmID = hashlib.sha1(json.dumps(enrichedData)).hexdigest() enrichedData["embersId"] = enrichedDataEmID insertSql = "insert into t_daily_enrichedindex (embers_id,derived_from,sub_sequence,stock_index,date,last_price,one_day_change,change_percent,trend_type) values (?,?,?,?,?,?,?,?,?)" cur.execute(insertSql, (enrichedDataEmID, derivedFrom, subSequence, stockIndex, date, lastPrice, oneDayChange, changePercent, trendType)) con.commit()
def import_historical_stock(): #get the historical stock dir stockFileDir = common.get_configuration("training", "HISTORICAL_STOCK") fileNames = os.listdir(stockFileDir) con = common.getDBConnection() cur = con.cursor() #clear the database clearSql = "delete from t_daily_stockindices" cur.execute(clearSql) con.commit() sql = "insert into t_daily_stockindices (sub_sequence,stock_index,date,last_price,one_day_change) values (?,?,?,?,?)"; for filename in fileNames: fpath = stockFileDir + "/" + filename stock = filename.split(".")[0] subSequence = 0 with open(fpath,"r") as stockFile: lines = stockFile.readlines()[2:] for line in lines: line = line.replace("\r","").replace("\n","") date = line.split(",")[0] lastPrice = line.split(",")[1] previousLastPrice = line.split(",")[2] if lastPrice == "#N/A N/A" or previousLastPrice == "#N/A N/A": continue lastPrice = float(lastPrice) previousLastPrice = float(previousLastPrice) date = datetime.strptime(date,"%m/%d/%Y").strftime("%Y-%m-%d") oneDayChange = round(lastPrice - previousLastPrice,4) subSequence = subSequence + 1 cur.execute(sql,(subSequence,stock,date,lastPrice,oneDayChange,)) if subSequence % 300 == 0: con.commit() con.commit()
def export_test_stock_data(estimationStart,estimationEnd): con = common.getDBConnection() cur = con.cursor() sql = "select embers_id,sub_sequence,stock_index,date,last_price,one_day_change from t_daily_stockindex where date>=? and date<=?" cur.execute(sql,(estimationStart,estimationEnd,)) results = cur.fetchall() for row in results: derEmbersId = row[0] subSequence = row[1] stockIndex = row[2] date = row[3] lastPrice = row[4] oneDayChange = row[5] derivedFrom = "[" + derEmbersId + "]" changePercent = round(oneDayChange/(lastPrice-oneDayChange),4) trendType = get_trend_type(stockIndex,changePercent) enrichedData = {} enrichedData["derivedFrom"] = derivedFrom enrichedData["stockIndex"] = stockIndex enrichedData["date"] = date enrichedData["lastPrice"] = lastPrice enrichedData["oneDayChange"] = oneDayChange enrichedData["changePercent"] = changePercent enrichedData["trendType"] = trendType enrichedData["subsequenceId"] = subSequence enrichedDataEmID = hashlib.sha1(json.dumps(enrichedData)).hexdigest() enrichedData["embersId"] = enrichedDataEmID insertSql = "insert into t_daily_enrichedindex (embers_id,derived_from,sub_sequence,stock_index,date,last_price,one_day_change,change_percent,trend_type) values (?,?,?,?,?,?,?,?,?)" cur.execute(insertSql,(enrichedDataEmID,derivedFrom,subSequence,stockIndex,date,lastPrice,oneDayChange,changePercent,trendType)) con.commit()
def insert_surrogatedata(self,surrogateData): try: con = common.getDBConnection() cur = con.cursor() "If the surrogate data is already in database, do not need to insert" checkSql = "select count(*) from t_surrogatedata where embers_id = ?" embersId = surrogateData["embersId"] cur.execute(checkSql,(embersId,)) count = cur.fetchone()[0] if count == 0: insertSql = "insert into t_surrogatedata (embers_id,derived_from,shift_date,shift_type,confidence,\ strength,location,date,model,value_spectrum,confidence_isprobability,population) values \ (?,?,?,?,?,?,?,?,?,?,?,?)" embersId = surrogateData["embersId"] derivedFrom = json.dumps(surrogateData["derivedFrom"]) shiftDate = surrogateData["shiftDate"] shiftType = surrogateData["shiftType"] confidence = surrogateData["confidence"] strength = surrogateData["strength"] location = surrogateData["location"] date = surrogateData["date"] model = surrogateData["model"] valueSpectrum = surrogateData["valueSpectrum"] confidenceIsPrabability = surrogateData["confidenceIsProbability"] population = surrogateData["population"] cur.execute(insertSql,(embersId,derivedFrom,shiftDate,shiftType,confidence,strength,location,date,model,valueSpectrum,confidenceIsPrabability,population)) con.commit() except Exception as e: log.info( "Error: %s" %e.args[0]) finally: if con: con.close()
def get_stock_news_data( self, predictiveDate , stockIndex ): con = None try: con = common.getDBConnection() cur = con.cursor() "Get past 3 day's news before Predictive Day " predictiveDate = datetime.strptime( predictiveDate, "%Y-%m-%d" ) startDay = ( predictiveDate - timedelta( days = 3 ) ).strftime( "%Y-%m-%d" ) endDay = ( predictiveDate - timedelta( days = 1 ) ).strftime( "%Y-%m-%d" ) sqlquery = "select content,embers_id from t_daily_enrichednews where post_date>=? and post_date<=? and stock_index=?" cur.execute( sqlquery, ([startDay, endDay , stockIndex])) articleRecords = cur.fetchall() "Initiate the words List" vocabularyFile = open(common.get_configuration( "training", 'VOCABULARY_FILE')) wordLines = vocabularyFile.readlines() termList = {} for line in wordLines: line = line.replace("\n","").replace("\r","") termList[line] = 0 newsDerived = [] "Merge all the term in each record" for record in articleRecords: jsonRecord = json.loads(record[0]) newsDerived.append(record[1]) for curWord in jsonRecord: if curWord in termList: termList[curWord] = termList[curWord] + jsonRecord[curWord] return termList,newsDerived except sqlite.Error, e: log.info( traceback.format_exc()) log.info( "Error: %s" % e.args[0])
def warningCheck(surObj): # surObj = {'embersId': 'f0c030a20e28a12134d9ad0e98fd0861fae7438b', 'confidence': 0.13429584033181682, 'strength': '4', 'derivedFrom': [u'5df18f77723885a12fa6943421c819c90c6a2a02', u'be031c4dcf3eb9bba2d86870683897dfc4ec4051', u'3c6571a4d89b17ed01f1345c80cf2802a8a02b7b'], 'shiftDate': '2011-08-08', 'shiftType': 'Trend', 'location': u'Colombia', 'date': '2012-10-03', 'model': 'Finance Stock Model', 'valueSpectrum': 'changePercent', 'confidenceIsProbability': True, 'population': 'COLCAP'} stockIndex = surObj["population"] trendType = surObj["strength"] date = surObj["shiftDate"] try: con = common.getDBConnection() cur = con.cursor() pClusster = trendType sql = "select sub_sequence,last_price from t_daily_stockindex where stock_index=? and date<? order by date desc limit 1" cur.execute(sql,(stockIndex,date)) row = cur.fetchone() subSequence = row[0] currentVal = row[1] querySql = "select one_day_change from t_daily_stockindex where stock_index=? and sub_sequence>=? and sub_sequence<=?" cur.execute(querySql,(stockIndex,subSequence-29,subSequence)) rows = cur.fetchall() moving30 = [] for row in rows: moving30.append(row[0]) querySql = "select one_day_change from t_daily_stockindex where stock_index=? and sub_sequence>=? and sub_sequence<=?" cur.execute(querySql,(stockIndex,subSequence-89,subSequence)) rows = cur.fetchall() moving90 = [] for row in rows: moving90.append(row[0]) m30 = sum(moving30)/len(moving30) m90 = sum(moving90)/len(moving90) std30 = calculator.calSD(moving30) std90 = calculator.calSD(moving90) eventType,cButtom,cUpper = dailySigmaTrends(stockIndex,str(pClusster),m30,m90,std30,std90,currentVal) dailyRecord = {} dailyRecord["date"] = date dailyRecord["cBottom"] = cButtom dailyRecord["cUpper"] = cUpper dailyRecord["currentValue"] = currentVal "Construct the warning message" warningMessage ={} date = surObj["date"] derivedFrom = surObj["embersId"] model = surObj["model"] event = eventType confidence = surObj["confidence"] confidenceIsProbability = surObj["confidenceIsProbability"] eventDate= surObj["shiftDate"] population = surObj["population"] location = surObj["location"] warningMessage["date"] = date warningMessage["derivedFrom"] = derivedFrom warningMessage["model"] = model warningMessage["eventType"] = event warningMessage["confidence"] = confidence warningMessage["confidenceIsProbability"] = confidenceIsProbability warningMessage["eventDate"] = eventDate warningMessage["population"] = population warningMessage["location"] = location embersId = hashlib.sha1(json.dumps(warningMessage)).hexdigest() warningMessage["embersId"] = embersId if eventType != "0000": insert_warningmessage(warningMessage) return warningMessage else: return None except lite.Error, e: print "Error: %s" % e.args[0]
def warningCheck(surObj): # surObj = {'embersId': 'f0c030a20e28a12134d9ad0e98fd0861fae7438b', 'confidence': 0.13429584033181682, 'strength': '4', 'derivedFrom': [u'5df18f77723885a12fa6943421c819c90c6a2a02', u'be031c4dcf3eb9bba2d86870683897dfc4ec4051', u'3c6571a4d89b17ed01f1345c80cf2802a8a02b7b'], 'shiftDate': '2011-08-08', 'shiftType': 'Trend', 'location': u'Colombia', 'date': '2012-10-03', 'model': 'Finance Stock Model', 'valueSpectrum': 'changePercent', 'confidenceIsProbability': True, 'population': 'COLCAP'} stockIndex = surObj["population"] trendType = surObj["strength"] date = surObj["shiftDate"] try: con = common.getDBConnection() cur = con.cursor() pClusster = trendType sql = "select sub_sequence,last_price from t_daily_stockindex where stock_index=? and date<? order by date desc limit 1" cur.execute(sql, (stockIndex, date)) row = cur.fetchone() subSequence = row[0] currentVal = row[1] querySql = "select one_day_change from t_daily_stockindex where stock_index=? and sub_sequence>=? and sub_sequence<=?" cur.execute(querySql, (stockIndex, subSequence - 29, subSequence)) rows = cur.fetchall() moving30 = [] for row in rows: moving30.append(row[0]) querySql = "select one_day_change from t_daily_stockindex where stock_index=? and sub_sequence>=? and sub_sequence<=?" cur.execute(querySql, (stockIndex, subSequence - 89, subSequence)) rows = cur.fetchall() moving90 = [] for row in rows: moving90.append(row[0]) m30 = sum(moving30) / len(moving30) m90 = sum(moving90) / len(moving90) std30 = calculator.calSD(moving30) std90 = calculator.calSD(moving90) eventType, cButtom, cUpper = dailySigmaTrends(stockIndex, str(pClusster), m30, m90, std30, std90, currentVal) dailyRecord = {} dailyRecord["date"] = date dailyRecord["cBottom"] = cButtom dailyRecord["cUpper"] = cUpper dailyRecord["currentValue"] = currentVal "Construct the warning message" warningMessage = {} date = surObj["date"] derivedFrom = surObj["embersId"] model = surObj["model"] event = eventType confidence = surObj["confidence"] confidenceIsProbability = surObj["confidenceIsProbability"] eventDate = surObj["shiftDate"] population = surObj["population"] location = surObj["location"] warningMessage["date"] = date warningMessage["derivedFrom"] = derivedFrom warningMessage["model"] = model warningMessage["eventType"] = event warningMessage["confidence"] = confidence warningMessage["confidenceIsProbability"] = confidenceIsProbability warningMessage["eventDate"] = eventDate warningMessage["population"] = population warningMessage["location"] = location embersId = hashlib.sha1(json.dumps(warningMessage)).hexdigest() warningMessage["embersId"] = embersId if eventType != "0000": insert_warningmessage(warningMessage) return warningMessage else: return None except lite.Error, e: print "Error: %s" % e.args[0]
def get_uncompleted_mission(): con = common.getDBConnection() cur = con.cursor() try: sql = "select embers_id from t_news_process_mission where mission_status = '0'" cur.execute(sql) rows = cur.fetchall() i = 0 for row in rows: sql2 = "select embers_id,title,author,post_time,post_date,stock_index,content,source,update_time from t_daily_news where embers_id=?" cur2 = con.cursor() cur2.execute(sql2,(row[0],)) rows2 = cur2.fetchall() for row2 in rows2: insertSql = "insert into t_daily_enrichednews (embers_id,derived_from,title,author,post_time,post_date,content,stock_index,source,raw_update_time,update_time) values (?,?,?,?,?,?,?,?,?,?,?)" updateSql = "update t_news_process_mission set mission_status=? where embers_id=?" derivedFrom = "["+row2[0]+"]" title = row2[1] author = row2[2] postTime = row2[3] postDate = row2[4] stockIndex = row2[5] content = row2[6] source = row2[7] rawUpdateTime = row2[8] try: tokens = nltk.word_tokenize(content) stemmer = nltk.stem.snowball.SnowballStemmer('english') words = [w.lower().strip() for w in tokens if w not in [",",".",")","]","(","[","*",";","...",":","&",'"',"'","’"] and not w.isdigit()] words = [w for w in words if w.encode("utf8") not in nltk.corpus.stopwords.words('english')] # stemmedWords = [stemmer.stem(w) for w in words] stemmedWords = [] currentWord = "" for w in words: currentWord = w stemmedWords.append(stemmer.stem(w)) fdist=nltk.FreqDist(stemmedWords) jsonStr = json.dumps(fdist) embersId = hashlib.sha1(jsonStr).hexdigest() updateTime = datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") enrichedData = {} enrichedData["emberdId"] = embersId enrichedData["derivedFrom"] = derivedFrom enrichedData["title"] = title enrichedData["author"] = author enrichedData["postTime"] = postTime enrichedData["postDate"] = postDate enrichedData["content"] = jsonStr enrichedData["stockIndex"] = stockIndex enrichedData["source"] = source enrichedData["updateTime"] = updateTime enrichedData["rawUpdateTime"] = rawUpdateTime cur3 = con.cursor() if not check_enrichedata_existed(embersId): cur3.execute(insertSql,(embersId,derivedFrom,title,author,postTime,postDate,jsonStr,stockIndex,source,rawUpdateTime,updateTime)) cur3.execute(updateSql,("1",row2[0],)) i = i + 1 if i%100 == 0: con.commit() except lite.ProgrammingError as e: log.info( "Error:",e ) except: log.info( "Error-----:[",currentWord ,']++',sys.exc_info()) continue con.commit() except lite.OperationalError as e: log.info( e ) except: log.info( "Error****: ", sys.exc_info()[0] )
def clusterSet(traningEndDate): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = ["MERVAL","MEXBOL","CHILE65","BVPSBVPS","COLCAP","CRSMBCT","IBOV","IGBVL"] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,sub_sequence,date,last_price,one_day_change,round(one_day_change/(last_price-one_day_change),4),stock_index from t_daily_stockindex where stock_index=? and date<=?" cur.execute(sql,(stock,traningEndDate)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0,x) for x in fdist.keys()] print "StartTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") c1 = KMeansClustering(clusterS) print "MiddleTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") cluster = c1.getclusters(20) # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] print "EndTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1],max(clu)[1]] for m in orderCluster: min1 = orderCluster[m][0] max1 = orderCluster[m][1] for n in orderCluster: min2 = orderCluster[n][0] max2 = orderCluster[n][1] if (min1 > min2 and min1 < max2) or (max1 > min2 and max1 < max2): print m," intersect with ", n, " values: ",min1,max1,min2,max2 clusterR = [] for row in rows: for nc in namedCluster: if (0,row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) clusterR.append(newRow) finalClusterRecord.append(newRow) #insert the clusterR into Database insertSql = "insert into t_daily_enrichedIndex (embers_id,derived_from,sub_sequence,stock_index,date,last_price,one_day_change,change_percent,trend_type)values (?,?,?,?,?,?,?,?,?)" m = 0 for j in clusterR: contentStr = json.dumps(j) embersId = hashlib.sha1(contentStr).hexdigest() derivedFrom = "[" + str(j[0]) + "]" subsequenceId = j[1] postDate = j[2] lastPrice = j[3] oneDayChange = j[4] changePercent = j[5] stockIndex = j[6] trendType = j[7] cur.execute(insertSql,(embersId,derivedFrom,subsequenceId,stockIndex,postDate,lastPrice,oneDayChange,changePercent,trendType)) m = m + 1 if m%1000 == 0: con.commit() con.commit() finalOrderCluster[stock] = orderCluster "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile,"w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("model", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile,"w") as output: output.write(dataStr) if con: con.close()
def clusterSet(traningEndDate): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = [ "MERVAL", "MEXBOL", "CHILE65", "BVPSBVPS", "COLCAP", "CRSMBCT", "IBOV", "IGBVL" ] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,sub_sequence,date,last_price,one_day_change,round(one_day_change/(last_price-one_day_change),4),stock_index from t_daily_stockindex where stock_index=? and date<=?" cur.execute(sql, (stock, traningEndDate)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0, x) for x in fdist.keys()] print "StartTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") c1 = KMeansClustering(clusterS) print "MiddleTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") cluster = c1.getclusters(20) # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] print "EndTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1], max(clu)[1]] for m in orderCluster: min1 = orderCluster[m][0] max1 = orderCluster[m][1] for n in orderCluster: min2 = orderCluster[n][0] max2 = orderCluster[n][1] if (min1 > min2 and min1 < max2) or (max1 > min2 and max1 < max2): print m, " intersect with ", n, " values: ", min1, max1, min2, max2 clusterR = [] for row in rows: for nc in namedCluster: if (0, row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) clusterR.append(newRow) finalClusterRecord.append(newRow) #insert the clusterR into Database insertSql = "insert into t_daily_enrichedIndex (embers_id,derived_from,sub_sequence,stock_index,date,last_price,one_day_change,change_percent,trend_type)values (?,?,?,?,?,?,?,?,?)" m = 0 for j in clusterR: contentStr = json.dumps(j) embersId = hashlib.sha1(contentStr).hexdigest() derivedFrom = "[" + str(j[0]) + "]" subsequenceId = j[1] postDate = j[2] lastPrice = j[3] oneDayChange = j[4] changePercent = j[5] stockIndex = j[6] trendType = j[7] cur.execute( insertSql, (embersId, derivedFrom, subsequenceId, stockIndex, postDate, lastPrice, oneDayChange, changePercent, trendType)) m = m + 1 if m % 1000 == 0: con.commit() con.commit() finalOrderCluster[stock] = orderCluster "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile, "w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("model", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile, "w") as output: output.write(dataStr) if con: con.close()