import time from urlparse import urlparse from urlparse import parse_qsl from urllib import urlencode import os import sys import lzo from decimal import Decimal abspath = os.path.abspath(sys.argv[0]) abspath = os.path.dirname(abspath) sys.path.append(abspath) from util import initLogger from util import ParseHeader logger = initLogger('ec', abspath) from pybloom import BloomFilter from extractor import Extractor from seeder import seed import boto3 class MyBloomFilter(object): @staticmethod def normalize_url(url): # http://www.xiami.com/artist/album-135?spm=0.0.0.0.NUyK9b # ParseResult(scheme='http', netloc='www.xiami.com', path='/artist/album-1198', params='', query='spm=0.0.0.0.AuRnGD&d=&p=&page=10' normalized = url
#-*-coding:utf-8 -*- #__autor__='jufu' from util import initLogger from util import initDB logger = initLogger('log.conf', 'dlmLogger') #logger.info('dlm logger error') if __name__ == '__main__': table = initDB('fintech', 'ygdai_detail') newTable = initDB('fintech', 'new_ygdai_detail') for user in table.find(): data = {} for userString in user[u'信息']: for s in userString.split('\n'): #print s.split(u'\uff1a') key = s.split(u'\uff1a')[0].strip() value = s.split(u'\uff1a')[1].strip() #data[str(count)]=key+':'+value data[key] = value #print key,value #print data #newTable.insert_one(data) data[u'抓取时间'] = user[u'抓取时间'] data[u'总逾期借款笔数'] = user[u'总逾期借款笔数']
def letsGoMates(): try: graphUtils.initLogger() util.initLogger() util.initGlobal() util.initSettings() #capture Google News & Its links # status = GoogleNews() # if status == False: # util.logger.error("Existing application because of GoogleNews not being updated for today") # return # else: # util.logger.info("Google news done for today") #Remove boiler data from Google News & Html status = RemoveBoiler() if status == False: util.logger.error("Existing application because of Boiler not being updated for today") return else: util.logger.info("Boiler done for today") #Do some relevance Test status = Relevance() if status == False: util.logger.error("Existing application because of Relevance not being updated for today") return else: util.logger.info("Relevance done for today") #Do some smoothness task status = Smoothness() if status == False: util.logger.error("Existing application because of Smoothness not being updated for today") return else: util.logger.info("Smoothness done for today") #Do some Connection Clarity check status = ConnectionClarity() if status == False: util.logger.error("Existing application because of ConnectionClarity not being updated for today") return else: util.logger.info("Connection clarity done for today") #Finally recommend something status = RecommendationMetric() if status == False: util.logger.error("Existing application google recommendation for today") return else: util.logger.info("Recommended links done for today") status = RecommendationSuggMetric() if status == False: util.logger.error("Existing application googleSugg recommendation for today") return else: util.logger.info("Recommended links done for today") util.logger.info("Graph strategy to begin now") graphMain.graphStart() except Exception, e: print "Exception at NeoRedPrediction : %s" % traceback.print_exc()
#!/usr/bin/env python3 """ main loop starter for the ftpusbwatch program """ import os,sys,subprocess,time os.chdir(os.path.dirname(__file__)) # ensure correct working direcory try: import util,usbwait util.ntpTimeWait() config=util.loadConfig("config.json") log=util.initLogger(config) except KeyboardInterrupt: raise except Exception as e: """ if all else fails, do a git pull, hoping that will fix it """ import traceback traceback.print_exc() util.waitForNetwork(timeout=None) gitlog = subprocess.check_output("git pull -f",shell=True).decode('utf-8').strip() subprocess.call("sync") print(gitlog) if gitlog == "Already up-to-date.": print("Could not start. Waiting and git-pulling") time.sleep(3600) subprocess.Popen("./main.py",shell=True) sys.exit(0) try: log.info("boot|Waiting for network and updating") util.waitForNetwork(timeout=None)
from GoogleNews import GoogleNews from RemoveBoiler import RemoveBoiler from Relevance import Relevance from Smoothness import Smoothness from Clarity import ConnectionClarity from NER import NER def downloadGoogleNewsSched(): GoogleNews() if __name__ == '__main__': util.initLogger() util.initGlobal() util.initSettings() ConnectionClarity() # downloadGoogleNewsSched() # RemoveBoiler() # sched = BackgroundScheduler() # job = sched.add_job(downloadGoogleNewsSched, 'interval', seconds=2) # sched.start() # while True: # time.sleep(10) # sched.shutdown()
Language: Python File: main.py ------------------------------------------------------------------------ Description: Main loop starter for the USBMonitor program. ------------------------------------------------------------------------ License: Beerware License; if you find the code useful, and we happen to cross paths, you're encouraged to buy us a beer. The code is distributed hoping that you in fact find it useful, but without warranty of any kind. """ import os, sys, subprocess, time cwd = os.path.dirname(os.path.abspath(__file__)) os.chdir(cwd) # ensure correct working direcory import util, usbmonitor config = util.loadConfig("config.json") log = util.initLogger(config) try: ignoreAlreadyMounted = len(sys.argv) > 1 and sys.argv[1] == "skip" app = usbmonitor.USBMonitor(config) app.main_loop(not ignoreAlreadyMounted) # rerun this and exit #subprocess.Popen("./main.py",shell=True) except KeyboardInterrupt: app.unmount_partition() log.info("|Killed by Keyboard") except: log.exception("Error in main loop|")