def copy_Data_from_Raw_to_Result(): # 获得配置文件参数 MongoDB_Host = mod_config.getConfig("database", "db_Host") MongoDB_Port = mod_config.getConfig("database", "db_Port") UniversityList = mod_config.get_University_list() #建立数据库连接 conn = MongoClient(MongoDB_Host, int(MongoDB_Port)) RawPOA = conn.RawPOA ResultPOA = conn.ResultPOA CopyList = [] tempDict = {} for iterm in RawPOA["news"].find(): tempDict = iterm tempDict["classification"] = "undefined" tempDict["sentiment"] = "99" CopyList.append(tempDict) print tempDict ResultPOA["news"].insert(CopyList) print "复制成功"
def insert_university_list(): # 获得配置文件参数 MongoDB_Host = mod_config.getConfig("database", "db_Host") MongoDB_Port = mod_config.getConfig("database", "db_Port") UniversityList = mod_config.get_University_list() #建立数据库连接 conn = MongoClient(MongoDB_Host, int(MongoDB_Port)) ResultPOA = conn.ResultPOA ResultPOA["universitylist"].insert(UniversityList) print "插入成功"
def check(): """检查脚本,也是需要自定义的地方,以后需要动态生成。 """ dirlist = eval(getConfig('check','dirlist')) filelist = eval(getConfig('check','filelist')) conflist = eval(getConfig('check','conflist')) tag = True for item in dirlist: if not checkdir(item): tag = False for item in filelist: if not checkfile(item): tag = False for item in conflist: if not checkconfig(item[0],item[1]): tag = False return tag
def check(): """检查脚本,也是需要自定义的地方,以后需要动态生成。 """ dirlist = eval(getConfig('check', 'dirlist')) filelist = eval(getConfig('check', 'filelist')) conflist = eval(getConfig('check', 'conflist')) tag = True for item in dirlist: if not checkdir(item): tag = False for item in filelist: if not checkfile(item): tag = False for item in conflist: if not checkconfig(item[0], item[1]): tag = False return tag
def check(): """检查脚本,也是需要自定义的地方,以后需要动态生成。 """ dirlist = eval(getConfig('check','dirlist')) filelist = eval(getConfig('check','filelist')) conflist = eval(getConfig('check','conflist')) tag = True print "check dir staring ......................." for item in dirlist: if not checkdir(item): tag = False print "check file starting ......................" for item in filelist: if not checkfile(item): tag = False print "check conf starting .........................." for item in conflist: if not checkconfig(item[0],item[1]): tag = False print "all check done........................." return tag
def check(): """检查脚本,也是需要自定义的地方,以后需要动态生成。 """ dirlist = eval(getConfig('check', 'dirlist')) filelist = eval(getConfig('check', 'filelist')) conflist = eval(getConfig('check', 'conflist')) tag = True print "check dir staring ......................." for item in dirlist: if not checkdir(item): tag = False print "check file starting ......................" for item in filelist: if not checkfile(item): tag = False print "check conf starting .........................." for item in conflist: if not checkconfig(item[0], item[1]): tag = False print "all check done........................." return tag
def create_TrainData(): # 获得配置文件参数 MongoDB_Host = mod_config.getConfig("database", "db_Host") MongoDB_Port = mod_config.getConfig("database", "db_Port") UniversityList = mod_config.get_University_list() #建立数据库连接 conn = MongoClient(MongoDB_Host, int(MongoDB_Port)) RawPOA = conn.RawPOA newsCollection = RawPOA["news"] for uni in UniversityList: path = './rawdata/' + uni["chn_name"] + "/" if not os.path.exists(path): os.makedirs(path) # 调取每个学校前50篇新闻作为训练数据集 for iterm in newsCollection.find({"Uname": uni["chn_name"]}).limit(50): filePath = path + str( iterm["title"].encode("utf8")).strip() + ".txt" fileBody = iterm["body"] try: f = codecs.open(filePath, 'w', "utf-8") f.write(fileBody) print filePath + " 写入成功" except Exception, e: print e print filePath + " 写入失败" finally:
# Author: wsh # Time: 2015-09-22 """ urlres ------- 向web服务器发送server状态。 """ import os,sys import httplib, urllib from mod_config import getConfig webip = getConfig('webserver','webip') webport = int(getConfig('webserver','webport')) servername = getConfig('servername','name') class URLRes(object): def __init__(self,servername=servername,domain = webip, port = webport): self.domain = domain self.port = port self.httpClient = None self.data = None self.uri = None self.headers = None self.servername = "ND" + servername def setpostparams(self,level,astr):
进行xml解析,同时生成domai_config 文件。主要是针对区服的xml解析模块 """ import sys import getopt import xml.etree.ElementTree as ET from getConfigClient import getres,getresbynum from mod_config import getConfig from CSLogging import write_logger import re filepath = getConfig('domai','filepath') filetopath = getConfig('domai','filetopath') csip = getConfig('csserver','CSip') gameversion=getConfig('version','gameversion') #############base xml 函数 ############### def read_xml(filename): """读取并解析xml文件,返回句柄 """ tree = ET.ElementTree() tree.parse(filename) return tree
def __init__(self, db): dbhost = mod_config.getConfig('db.conf','database','dbhost') dbport = mod_config.getConfig('db.conf','database','dbport') client = pymongo.MongoClient(dbhost, int(dbport)) self.db = client[db]
from csdaemon import Daemon from mod_config import getConfig from encrypt import getmd5 from check import check from initilize import g,initlize from urlres import URLRes basedir = os.path.abspath(os.path.dirname(__file__)) g.path = basedir g.name = 'CSserver' host = getConfig('csserver','CSip') port = int(getConfig('csserver','CSport')) addr = (host,port) mainpid = getConfig('pid','mainpid') logpid = getConfig('pid','logpid') logfile = getConfig('log','logfile') log2file = getConfig('log','log2file') log2enable=getConfig('log','enablelog') class Servers(SRH): """Sockertserver handler的封装,记住,这个不是socketserver,是request处理的封装 """ #Don't call the base class finish() method as it does the above #return SocketServer.StreamRequestHandler.finish(self) def finish(self,*args,**kw):
from commands import dataanalyse from CSLogging import write_logger from csdaemon import Daemon from mod_config import getConfig from encrypt import getmd5 from check import check from initilize import g,initlize from urlres import URLRes basedir = os.path.abspath(os.path.dirname(__file__)) g.path = basedir g.name = 'NDserver' host = getConfig('nodeserver','NDip') port = int(getConfig('nodeserver','NDport')) addr = (host,port) mainpid = getConfig('pid','mainpid') logpid = getConfig('pid','logpid') logfile = getConfig('log','logfile') log2file = getConfig('log','log2file') log2enable=getConfig('log','enablelog') class Servers(SRH): """Sockertserver handler的封装,记住,这个不是socketserver,是request处理的封装。 """ def finish(self,*args,**kw): """因为存在bug,所以重写了这个函数 """
def create_NewsNumbersInfo(): # 获得配置文件参数 MongoDB_Host = mod_config.getConfig("database", "db_Host") MongoDB_Port = mod_config.getConfig("database", "db_Port") UniversityList = mod_config.get_University_list() #建立数据库连接 conn = MongoClient(MongoDB_Host, int(MongoDB_Port)) ResultPOA = conn.ResultPOA ResultList = [] for uni in UniversityList: studyNumberList = [] activityNumberList = [] entranceNumberList = [] socialNumberList = [] studyNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "study", "sentiment": "-1" }).count()) studyNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "study", "sentiment": "0" }).count()) studyNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "study", "sentiment": "1" }).count()) studyNumberList.append(studyNumberList[0] + studyNumberList[1] + studyNumberList[2]) activityNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "activity", "sentiment": "-1" }).count()) activityNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "activity", "sentiment": "0" }).count()) activityNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "activity", "sentiment": "1" }).count()) activityNumberList.append(activityNumberList[0] + activityNumberList[1] + activityNumberList[2]) entranceNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "entrance", "sentiment": "-1" }).count()) entranceNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "entrance", "sentiment": "0" }).count()) entranceNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "entrance", "sentiment": "1" }).count()) entranceNumberList.append(entranceNumberList[0] + entranceNumberList[1] + entranceNumberList[2]) socialNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "social", "sentiment": "-1" }).count()) socialNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "social", "sentiment": "0" }).count()) socialNumberList.append(ResultPOA["news"].find({ "Uname": uni["zh_name"], "classification": "social", "sentiment": "1" }).count()) socialNumberList.append(socialNumberList[0] + socialNumberList[1] + socialNumberList[2]) # print uni["zh_name"] # print studyNumberList # print activityNumberList # print entranceNumberList # print socialNumberList ResultList.append({ "Uname": uni["zh_name"], "abbr": uni["en_name"], "studyNumber": studyNumberList, "activityNumber": activityNumberList, "entranceNumber": entranceNumberList, "socialNumber": socialNumberList }) ResultPOA["newsNumber"].drop() ResultPOA["newsNumber"].insert(ResultList) print "保存成功"
def report(self, signal): olddirlist = {} while True: if not signal.is_set(): break if mod_config.getConfig("device", "deviceid"): values = {} values['enews'] = "get_info" values['deviceid'] = mod_config.getConfig( "device", "deviceid") #设备id data = urllib.urlencode(values) # 上报日志的商务系统的网址 url = mod_config.getConfig("network", "website") + "/api/devicelog.php" geturl = url + "?" + data try: #先获取服务器上上传的最近的时间 request = urllib2.Request(geturl) response = urllib2.urlopen(request) res = json.loads(response.read()) if res['status'] == 1: date = res['date'] else: date = '2017-01-01 08:00:00' try: for i in os.listdir('log'): if i[0:6] == 'thread': try: f = open('./log/' + i, 'rb') md5 = hashlib.md5() while True: b = f.read(8096) if not b: break md5.update(b) filemd5 = md5.hexdigest() f.close() if not olddirlist.has_key( i) or olddirlist[i] != filemd5: try: f = open('./log/' + i, 'rb') if f: key = 0 values = {} values['enews'] = "post_info" values[ 'deviceid'] = mod_config.getConfig( "device", "deviceid") logs = {} for a in f: if a[0:19] > date: logs[key] = {} logs[key]['date'] = a[ 0:19] logs[key][ 'info'] = a.split( ' - ')[2] key += 1 values['logs'] = json.dumps( logs) if logs: data = urllib.urlencode( values) #上报日志的商务系统的网址 url = mod_config.getConfig( "network", "website" ) + "/api/devicelog.php" try: request = urllib2.Request( url, data) response = urllib2.urlopen( request) res = json.loads( response.read()) if res['status'] == 1: olddirlist[ i] = filemd5 print '日志上报成功' else: logging.error( '日志上报失败') except Exception, e: logging.error('日志提交失败') f.close() except Exception, e: print '失败' except IOError: print '打开文件失败' except Exception, e: print '失败' except IOError: print '打开文件失败'
def mainTask(): # 获得配置文件参数 MongoDB_Host = mod_config.getConfig("database", "db_Host") MongoDB_Port = mod_config.getConfig("database", "db_Port") UniversityList = mod_config.get_University_list() #建立数据库连接 conn = MongoClient(MongoDB_Host, int(MongoDB_Port)) RawPOA = conn.RawPOA ResultPOA = conn.ResultPOA raw_tempList = {} result_tempList = {} for uni in UniversityList: raw_tempList[uni["zh_name"]] = [] result_tempList[uni["zh_name"]] = [] print "从缓存新闻表中取出数据..." for uni in UniversityList: for iterm in RawPOA["tempNews"].find({ "Uname": uni["zh_name"] }).sort("date", pym.DESCENDING).limit(20): if iterm["date"].encode("utf-8") != "暂无日期": doc = { "body": iterm["body"], "title": iterm["title"], "url": iterm["url"], "Uname": iterm["Uname"], "abbr": iterm["abbr"], "date": iterm["date"] } raw_tempList[uni["zh_name"]].insert(0, doc) for iterm in ResultPOA["tempNews"].find({ "Uname": uni["zh_name"] }).sort("date", pym.DESCENDING).limit(20): if iterm["date"].encode("utf-8") != "暂无日期": doc = { "body": iterm["body"], "title": iterm["title"], "url": iterm["url"], "Uname": iterm["Uname"], "abbr": iterm["abbr"], "date": iterm["date"], "sentiment": iterm["sentiment"], "classification": iterm["classification"] } result_tempList[uni["zh_name"]].insert(0, doc) print "从缓存新闻表中取出数据成功\n\n" spiderList = spider() print "开始查重去重操作..." for uni in UniversityList: count = 0 predictedList = [] for iterm in spiderList[uni["zh_name"]]: if (findUrlInList(iterm["url"], raw_tempList[uni["zh_name"]]) == -1) and (findUrlInList( iterm["url"], result_tempList[uni["zh_name"]])): count = count + 1 raw_tempList[uni["zh_name"]].insert(0, iterm) predictResult = predict(iterm["body"]) tempIterm = { "body": iterm["body"], "title": iterm["title"], "url": iterm["url"], "Uname": iterm["Uname"], "abbr": iterm["abbr"], "date": iterm["date"], "sentiment": predictResult["sentiment"], "classification": predictResult["classification"] } result_tempList[uni["zh_name"]].insert(0, tempIterm) predictedList.append(tempIterm) for i in range(0, count): raw_tempList[uni["zh_name"]].pop() result_tempList[uni["zh_name"]].pop() ResultPOA["news"].insert(predictedList) print "结果数据表的数据保存成功" print "去重操作完成\n\n" RawPOA["tempNews"].drop() ResultPOA["tempNews"].drop() for uni in UniversityList: RawPOA["tempNews"].insert(raw_tempList[uni["zh_name"]]) ResultPOA["tempNews"].insert(result_tempList[uni["zh_name"]]) print "数据库存入数据成功\n\n" update_NewsNumbersInfo()
# Version: v0.1 # reference url: http://www.cnblogs.com/hongten/p/hongten_python_sqlite3.html#3192737 import sys sys.path.append('libbase') import sqlite3 import os # libbase from CSLogging import write_logger from mod_config import getConfig # global var dbFilePath = getConfig('csserver','sqlitepath') tableName = '' def getConn(): """连接sqlite """ global dbFilePath conn = sqlite3.connect(dbFilePath) return conn def getCursor(conn): """获取游标 """ return conn.cursor()
#!/bin/env python #coding=utf-8 # 加密 # Author: wsh # Time: 2015-06-15 """ encrypt ---------- 加密程序,目前简单是一个md5加密。 """ import hashlib,time from datetime import date from mod_config import getConfig entrystr = getConfig('entry','str') def getmd5(): """然后一个时间和字符串的md5值。 """ amd5str = hashlib.md5(entrystr + str(int(time.time())/86400)).hexdigest() return amd5str if __name__ == '__main__': print getmd5()
---------------- 用来连接 **Sockertserver** 包括( *CSserver* 和 *NDserver* )。主体函数 **getres** """ from socket import * import sys import time from CSLogging import write_logger from encrypt import getmd5 from mod_config import getConfig csip = getConfig('csserver','CSip') csport = int(getConfig('csserver','CSport')) def getres(astr,host=csip,port=csport): """向 socketserver发送一个 *astr* 。 server采用密码验证,所以需要线进行密码判断,然后再传送 * astr * 字符串。 收到服务器传送的数据之后,进行长度验证,如果不正确,重新传送。 判断成功或者失败次数超出限制之后端开连接,返回结果。 :param astr: 字符串命令 :param host: 发送命令的server ip。 默认是给CSserver发送, :param port: server port。 默认是CSserver 的port。
LOG_COLORS = { 'DEBUG': COLOR_GREEN + '%s' + COLOR_RESET, 'INFO': COLOR_GRAY + '%s' + COLOR_RESET, 'WARNING': COLOR_YELLOW + '%s' + COLOR_RESET, 'ERROR': COLOR_RED + '%s' + COLOR_RESET, 'CRITICAL': COLOR_RED + '%s' + COLOR_RESET, 'EXCEPTION': COLOR_RED + '%s' + COLOR_RESET, } # Global logger g_logger = None # get some configs from config.conf filename=mod_config.getConfig('log','logfile') loglevels=mod_config.getConfig('log','streamloglevel') + ":" + mod_config.getConfig('log','fileloglevel') class ColorFormatter(logging.Formatter): '''A colorful formatter''' def __init__(self,fmt = None, datefmt = None): """ Arguments: - `self`: - `fmt`: - `datefmt`: """ logging.Formatter.__init__(self,fmt,datefmt)