示例#1
0
    def __enter__(self):
        '''Needed in with-as statements
    :return: This instance
    '''
        if not os.path.exists(self.directory):
            try:
                logging.debug('Directory %s does not exist, creating one' %
                              self.directory)
                os.makedirs(self.directory)
            except IOError as err:
                logging.error('Could not create directory %s; reason: %s' %
                              (self.directory, err))
                raise ValueError

        try:
            logging.debug('Binding the socket to %s:%d' % self.addr_port)
            self.socket.bind(self.addr_port)
        except socket.error as err:
            logging.error('Can\'t bind the socket to %s:%d; reason: %s' %
                          (self.addr_port + (err, )))
            raise ValueError

        logging.debug('Socket bound to %s:%d' % self.socket.getsockname())

        self.db = db_manager(os.path.join(self.directory, 'db.json'))

        return self
示例#2
0
	def __init__( _self ):
		"""hook up database, framer, and bind UDP"""
		_self.db = db_manager()

		settings = settings_manager()

		_self.my_uuid = settings.get( "uuid" )
		_self.my_last_seq = _self.db.get_seq_from_uuid( _self.my_uuid )

		_self.handlers={
			dbframe.framer.typeDbUpsert:_self.handle_frame_upsert,
			dbframe.framer.typeDbDelete:_self.handle_frame_delete,
			dbframe.framer.typeNetHello:_self.handle_frame_hello,
			dbframe.framer.typeNetReqClientList:_self.handle_frame_req_client_list,
			dbframe.framer.typeNetClientList:_self.handle_frame_client_list,
			dbframe.framer.typeNetReqClientUpdates:_self.handle_frame_req_client_updates,
			}

		_self.framer = dbframe.framer()

		#broadcast to everybody.
		#Should be able to programatically
		#compute local broadcast address
		#but also seems that most routers drop these
		#so this should work fine
		_self.UDP_IP = "255.255.255.255"
		_self.UDP_PORT = 32250

		_self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
		_self.sock.bind((_self.UDP_IP, _self.UDP_PORT))
		if hasattr(socket,'SO_BROADCAST'):
			#add broadcast abilities
			_self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
示例#3
0
    def setup_db_manager(self):
        """
        Set up the db manager based on a dictionary of options supplied by get_configuration
        """
        db = db_manager()

        if(self.config.db_file):
            db.init_sb_db(self.config.db_file)
        else:
            db.init_sb_db('spiderbro.db')

        if(self.config.mysql):
            db.xbmc_init_mysql(self.config.host, self.config.user, self.config.pwd, self.config.schema) 
        else: 
            db.xbmc_init_sqlite(self.config.xbmc_sqlite_db) 

        if(self.config.clear_cache and self.config.show):
            self.logger.info("Clearing db cache for show %s" % (self.config.show))
            db.clear_cache(self.config.show)
        
        if(self.config.high_quality and self.config.show):
            self.logger.info("Changing quality to high for show %s" % (self.config.show))
            db.set_quality(self.config.show, 1)
        
        if(self.config.low_quality and self.config.show):
            self.logger.info("Changing quality to low for show %s" % (self.config.show))
            db.set_quality(self.config.show, 0)
        
        if(self.config.force_id and self.config.show):
            self.logger.info("Forcing new id %s for show %s" % (self.config.force_id, self.config.show))
            db.update_series_id(self.config.show, self.config.force_id)
        return db
示例#4
0
 def __init__(self, parent, title):
     super(Example, self).__init__(parent, title=title, size=(790, 350))
     self.db = db_manager()
     self.filter = self.db.filter()
     self.InitUI()
     self.Centre()
     self.Show()
示例#5
0
文件: img_bot.py 项目: Erfre/ig_bot
    def db_connect(self, subreddit):
        path, img_path = get_settings(subreddit)

        self.db = db_manager(path)
        self.db.table = subreddit
        self.conn = self.db.create_connect()
        self.db.count_row(self.conn)
        return img_path
示例#6
0
	def __init__(self, parent, title):
		super(Example, self).__init__(parent, title=title, 
			size=(790, 350))
		self.db = db_manager()
		self.filter = self.db.filter()
		self.InitUI()
		self.Centre()
		self.Show()
示例#7
0
    def __init__(self, parent, title):
        super(Example, self).__init__(parent, title=title, size=(790, 200))
        self.db = db_manager()

        s = settings_manager()
        self.uuid = s.get("uuid")

        self.InitUI()
        self.Centre()
        self.Show()
示例#8
0
	def __init__(self, parent, title):
		super(Example, self).__init__(parent, title=title, 
			size=(790, 300))
		self.db = db_manager()

		settings = settings_manager()
		self.uuid = settings.get( "uuid" )

		self.InitUI()
		self.Centre()
		self.Show()
示例#9
0
    def setup_db_manager(self):
        """
        Set up the db manager based on a dictionary of options supplied by get_configuration
        """
        db = db_manager()

        if (self.config.db_file):
            db.init_sb_db(self.config.db_file)
        else:
            db.init_sb_db('spiderbro.db')

        if (self.config.mysql):
            db.xbmc_init_mysql(self.config.host, self.config.user,
                               self.config.pwd, self.config.schema)
        else:
            db.xbmc_init_sqlite(self.config.xbmc_sqlite_db)

        if (self.config.clear_cache and self.config.show):
            self.logger.info("Clearing db cache for show %s" %
                             (self.config.show))
            db.clear_cache(self.config.show)

        if (self.config.high_quality and self.config.show):
            self.logger.info("Changing quality to high for show %s" %
                             (self.config.show))
            db.set_quality(self.config.show, 1)

        if (self.config.low_quality and self.config.show):
            self.logger.info("Changing quality to low for show %s" %
                             (self.config.show))
            db.set_quality(self.config.show, 0)

        if (self.config.force_id and self.config.show):
            self.logger.info("Forcing new id %s for show %s" %
                             (self.config.force_id, self.config.show))
            db.update_series_id(self.config.show, self.config.force_id)
        return db
示例#10
0
    'https://script.googleusercontent.com/macros/echo?user_content_key=m6tk5dBDTsx5CUYHpHzKjS-GG4sZTTqmlYpLs174ECmpZi8gyB70GF_GmKO6AT0ASwbHiJOhTH8Q47_CNmtGHt5jWKjJF5H7OJmA1Yb3SEsKFZqtv3DaNYcMrmhZHmUMWojr9NvTBuBLhyHCd5hHa3djn4kcaeuceAVPTcb_IKOQizEdNNom8Sk6pZs7_CDNMUWiDq7n5DCWZiujjnbT-IrTlwrp2DSWLgANXR6ofjDf5WHNTOrgsudrZcxZzA24N6hwApyDVcMAPWBcdI_E2UMraeaYJFFKNiV4qZl1oW8CU6cvb-wj4e0BQ2CJeOqTEMNlqVLIbS8G8d9eiTjLOMGD6mYBOprp&lib=MkTrT-GFjciLZr9a0QLCCFly6XnJGsUf7':
    'ecology_twitter'
    #'https://news.yandex.ru/incident.rss':'accidents'
    #'https://news.google.com/news/feeds?q=%D0%B4%D0%BE%D0%BD%D0%BE%D1%80%D1%81%D0%BA%D0%B0%D1%8F+%D0%BA%D1%80%D0%BE%D0%B2%D1%8C&output=rss':'blood'
    #'https://news.google.com/news/feeds?q=%D0%BF%D1%80%D0%BE%D1%82%D0%B5%D1%81%D1%82%D1%8B+%D0%B4%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D0%B1%D0%BE%D0%B9%D1%89%D0%B8%D0%BA%D0%B8&output=rss':'antiplaton',
    #'https://news.google.com/news/feeds?q=%D0%BF%D0%BB%D0%B0%D1%82%D0%BE%D0%BD+%D0%B4%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D0%B1%D0%BE%D0%B9%D1%89%D0%B8%D0%BA%D0%B8&output=rss':'antiplaton',
    #'https://news.google.com/news/feeds?q=%D0%B0%D0%BD%D1%82%D0%B8%D0%BF%D0%BB%D0%B0%D1%82%D0%BE%D0%BD&output=rss':'antiplaton',
    #'https://script.googleusercontent.com/macros/echo?user_content_key=TZsyA5Fq2q6HGLuDWq23ixHb8GfsSZj1_JsqVpggKTsVY7Cuzu-12LGxQfuqCj3KFI2QQt-XcV8Zm62l0aqJS1a1OoCyjxEXm5_BxDlH2jW0nuo2oDemN9CCS2h10ox_1xSncGQajx_ryfhECjZEnNrJjcn5tMnJozsIHLgb4WDmzEc3V7pY_qbmxUTSbtzxx0nSp4K8Rg-TeO0TF_iSt4Tt-flz1lUkWh6nmMXDfIYlXmOAssj6TcGD6mYBOprp&lib=Msp86L4y290o5xM5axajKI1y6XnJGsUf7':'antiplaton_tw'
    #'https://news.google.com/news/feeds?q=%D0%B2%D1%80%D0%B5%D0%BC%D0%B5%D0%BD%D0%BD%D1%8B%D0%B5%20%D0%BB%D0%B0%D0%B3%D0%B5%D1%80%D1%8F%20%D0%B1%D0%B5%D0%B6%D0%B5%D0%BD%D1%86%D0%B5%D0%B2&output=rss':'ukraine'
    #'http://blogs.yandex.ru/search.rss?text=%D1%83%D0%BA%D1%80%D0%B0%D0%B8%D0%BD%D1%81%D0%BA%D0%B8%D0%B5+%D0%B1%D0%B5%D0%B6%D0%B5%D0%BD%D1%86%D1%8B':'ukraine'
    #,'http://news.google.com/news?gl=us&pz=1&ned=us&hl=en&q=%D0%B7%D0%B5%D0%BC%D0%BB%D0%B5%D1%82%D1%80%D1%8F%D1%81%D0%B5%D0%BD%D0%B8%D0%B5&output=rss':'earthquakes'
    #,'http://news.google.com/news?hl=en&gl=us&q=%D0%BD%D0%B0%D0%B2%D0%BE%D0%B4%D0%BD%D0%B5%D0%BD%D0%B8%D1%8F&um=1&ie=UTF-8&output=rss':'floods'
})

#try:
db = db_manager()

db.connect()
print "connect"
for url in news_sources.keys():
    try:
        fires_lenta = feedparser.parse(url)

        for entry in fires_lenta.entries:
            str_date = time.strftime('%d/%m/%Y %H:%M:%S', entry.updated_parsed)
            print str_date, entry.guid

            if hasattr(entry, 'georss_point'):
                coords = entry.georss_point.split(' ')
                entry.geo_long = coords[0]
                entry.geo_lat = coords[1]
示例#11
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from db_manager import db_manager
from settings_manager import settings_manager
from dbframe import framer
from localtimeutil import local8601

db = db_manager()
s = settings_manager()
uuid = s.get("uuid")
db.insert_local_contact(uuid, local8601(), "KD0LIX", "KC5YTI", "80m", "testmode", "1A", "KS")
示例#12
0
def Crawling(URL, db):
	driver = None
	info_name = URL['info'].split('_')
	crawling_name = info_name[0]	#게시판 크롤링 선택
	page = 1
	main_url = URL['url']	#게시판 url 추출 : 페이지 바꾸는 데에 사용
	page_url = eval(crawling_name + '.Change_page(main_url, page)')	#현재 페이지 포스트 url 반환
	end_date = date_cut(URL['info'])	# end_date 추출
	if crawling_name in ["sj34"]:		# 동적 게시판 예외
		sj34.everytime_all_board(URL, end_date, db)
		return
	if crawling_name in ["sj20"]:		# 제외 게시판
		return;

	#현재 크롤링하는 게시판 info 출력
	print("Target : ", URL['info'])
	continue_handler(URL['info'], URL, page_url)

	#크롤링 유무판단
	if is_crawling(db, URL['info']) == False:
		return

	while True:
		if crawling_name in ["sj23", "sj26", "sj27", "sj28", "sj30", "sj44"]:
			lastly_post = get_lastly_post(URL, db)
		try:
			print("\npage_url :::: ", page_url)	#현재 url 출력
			print("Page : ", page)				#현재 페이지 출력
			#driver_page 생성---------------------------
			if crawling_name in ['sj10']:
				driver_page = URLparser_EUCKR(page_url)
			elif crawling_name in ['sj12']:
				driver_page = URLparser_UTF8(page_url)
			else:
				driver_page = URLparser(page_url)
			#-------------------------------------------
			#Selenium을 쓰는 경우----------------------------------------------------------------------------------------------
			if crawling_name in ["sj23", "sj26", "sj27", "sj28", "sj29", "sj38", "sj44"]:
				data = eval(crawling_name + '.Parsing_list_url(URL, page_url)')
				driver = data[0]
				post_urls = data[1]
			elif crawling_name in ["sj30"]:#---------------------------세종대역 예외처리
				data = eval(crawling_name + '.Parsing_list_url(URL, page_url, lastly_post, db, driver)')
				driver = data[0]
				post_urls = data[1]
			#Requests를 쓰는 경우----------------------------------------------------------------------------------------------
			else:
				#로그인을 하는 경우-------------------------------------------------------------------------------
				if URL['login'] == 1:
					post_urls = eval(crawling_name + '.Parsing_list_url(URL, page_url)')
				#로그인을 하지않는 경우---------------------------------------------------------------------------
				else:
					if driver_page is None:		#Connect Failed 이면 break
						error_handler("driver_none", URL, page_url, db)
						break
					else:
						#parsing 형태--------------------------------------------------
						if crawling_name in ['sj10']:
							bs_page = BeautifulSoup(driver_page, 'lxml')
						else:
							bs_page = BeautifulSoup(driver_page, 'html.parser')
						#--------------------------------------------------------------
					post_urls = eval(crawling_name + '.Parsing_list_url(URL, bs_page)')
				#-----------------------------------------------------------------------------------------------
			#-----------------------------------------------------------------------------------------------------------------
			#get_post_data 형식 : [게시글정보dictionary, title, date]-------------------------------------------------------------------------------------------------------
			#date 규격은 "0000-00-00 00:00:00"
			post_data_prepare = []
			for post_url in post_urls:
				#Selenium인 경우--------------------------------------------------------------------------------------------------------------------
				if crawling_name in ['sj29', 'sj30']:#------------------게시판 규격인 경우
					get_post_data = eval(crawling_name + '.Parsing_post_data(driver, post_url, URL)')
				#---------------------------------------------------------------------------------------------------게시판 규격이 아닌 경우
				elif crawling_name in ['sj23', 'sj26', 'sj27', 'sj28', 'sj44']:
					data = eval(crawling_name + '.Parsing_post_data(driver, post_url, URL, lastly_post)')
					post_data_prepare = data[0]
					lastly_post = data[1]
					if lastly_post is None:
						pass
					else:
						push_lastly_post(URL, lastly_post, db)
				#Requests인 경우--------------------------------------------------------------------------------------------------------------------
				else:
					#driver_post 생성--------------------------------
					if crawling_name in ["sj21", "sj4", "sj5", "sj8", "sj16"]: #---driver_post가 필요없는 경우
						pass
					elif crawling_name in ['sj10', 'sj33']:
						driver_post = URLparser_EUCKR(post_url)
					elif crawling_name in ['sj12']:
						driver_post = URLparser_UTF8(post_url)
					else:
						driver_post = URLparser(post_url)
					#------------------------------------------------
					#-----------------------------------------------------------------------------------------------위키백과 구조
					if crawling_name in ['sj21']:
						get_post_data = eval(crawling_name + '.Parsing_post_data(post_url, URL)')
					#-----------------------------------------------------------------------------------------------게시판 규격이 아닌 구조
					elif crawling_name in ["sj4", "sj5", "sj8", "sj16"]:
						post_data_prepare = eval(crawling_name + '.Parsing_post_data(post_url, URL)')
						break
					#-----------------------------------------------------------------------------------------------게시판 규격인 구조
					else:
						if driver_post is None:		#Connect Failed 이면 continue
							error_handler("driver_none", URL, page_url, db)
							break
						else:
							#parsing 형태-------------------------------------------
							if crawling_name in ['sj10']:
								bs_post = BeautifulSoup(driver_post, 'lxml')
							elif crawling_name in ['sj12']:
								bs_post = driver_post
							else:
								bs_post = BeautifulSoup(driver_post, 'html.parser')
							#-------------------------------------------------------
						get_post_data = eval(crawling_name + '.Parsing_post_data(bs_post, post_url, URL)')
				#-----------------------------------------------------------------------------------------------------------------------------------
				
				#post_data_prepare이 이미 완성된 경우-----------------------------------------------------------------------
				if crawling_name in ["sj4", "sj5", "sj8", "sj16", "sj23", "sj26", "sj27", "sj28", "sj44"]:
					pass
				#post_data_prepare이 완성되지 않은 경우---------------------------------------------------------------------
				else:
					if get_post_data == None:	#잘못된 포스트 데이터인 경우
						continue
					title = get_post_data[1]
					date = get_post_data[2]
		
					print(date, "::::", title)	#현재 크롤링한 포스트의 date, title 출력
		
					#게시물의 날짜가 end_date 보다 옛날 글이면 continue, 최신 글이면 append
					if str(date) <= end_date:
						continue
					else:
						post_data_prepare.append(get_post_data[0])
			#----------------------------------------------------------------------------------------------------------
			#--------------------------------------------------------------------------------------------------------------------------------------------------------------
			add_cnt = db_manager(URL, post_data_prepare, db)
			print("add_OK : ", add_cnt)	#DB에 저장된 게시글 수 출력
		
			#dirver 종료 [Selenium 을 사용했을 시]
			if crawling_name in ["sj23", "sj26", "sj27", "sj28", "sj29", "sj30", "sj38", "sj44"]:
				driver.quit()
			
			#DB에 추가된 게시글이 0 이면 break, 아니면 다음페이지
			if add_cnt == 0:
				break
			else:
				page += 1
				page_url = eval(crawling_name + '.Change_page(main_url, page)')
		# Error handler : 만약 크롤링이 실패했을 경우, 에러를 logging 하고 크롤링을 중단한다.
		except Exception as e:
			error_handler(e, URL, page_url, db)
			break
示例#13
0
	def handle_frame_delete( _self, frame ):
		print "delete"
		db = db_manager()
		_self.db.insert_frames( [frame] )
示例#14
0
 def __init__(self):
     self.parser = get_rest_args_parser()
     self.dbmanager = db_manager()
     return
示例#15
0
def everytime_all_board(URL, end_date, db):
    main_url = URL['url']
    board_search_url = "https://everytime.kr/community/search?keyword="
    board_search_word = ['게시판', '갤러리']
    board_list = []
    # driver 연결
    try:
        driver = chromedriver()
        driver = everytime.login(driver)
    except Exception as e:
        error_handler(e, URL, main_url, db)
        return
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "a.article")))
    html = driver.page_source
    bs = BeautifulSoup(html, 'html.parser')
    # 에브리타임 상단 동적 게시판 긁기=============================================================================
    board_group_list = bs.find("div", {
        "id": "submenu"
    }).findAll('div', {"class": "group"})
    for board_group in board_group_list:
        try:
            board_li_list = board_group.find("ul").findAll("li")
            for board_li in board_li_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_li.find("a").text
                if board_li.find("a").text.strip() == "더 보기":
                    continue
                else:
                    board_li_dic['url'] = main_url + board_li.find("a")['href']
                if (board_li_dic['tag'].find("찾기") != -1):
                    continue
                board_list.append(board_li_dic)
        except:
            continue
    # 에브리타임 추가 동적 게시판 긁기
    for search_word in board_search_word:
        try:
            board_search_url_done = board_search_url + search_word
            driver.get(board_search_url_done)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "a.result")))
            html = driver.page_source
            bs = BeautifulSoup(html, 'html.parser')
            board_a_list = bs.find("div", {
                "class": "searchresults"
            }).findAll('a')
            for board_a in board_a_list:
                board_li_dic = {}
                board_li_dic['tag'] = board_a.find("h3").text
                board_li_dic['url'] = main_url + board_a.get('href')
                board_list.append(board_li_dic)
        except:
            continue
    #===========================================================================================================
    # 동적 게시판들 반복문
    for board in board_list:
        page = 1
        page_flag = 0
        board_url = board['url']
        page_url = Change_page(board_url, page)  #현재 페이지 포스트 url 반환
        print("\nTarget : ", URL['info'], " :: ", board['tag'])
        continue_handler(URL['info'] + " :: " + board['tag'], URL, page_url)
        # 페이지 반복문
        while True:
            if page_flag == 50:
                page_flag = 0
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
            try:
                print("page_url :::: ", page_url)  #현재 url 출력
                print("Page : ", page)  #현재 페이지 출력
                post_urls = Parsing_list_url(main_url, page_url, driver, db)
                # everytime 고질병 문제 고려, 재시도
                if len(post_urls) == 0:
                    time.sleep(2)
                    post_urls = Parsing_list_url(main_url, page_url, driver,
                                                 db)
                post_data_prepare = []
                # 포스트 반복문
                for post_url in post_urls:
                    get_post_data = Parsing_post_data(driver, post_url, URL,
                                                      board['tag'], db)
                    if get_post_data == "error":
                        break
                    title = get_post_data[1]
                    date = get_post_data[2]
                    print(date, "::::", title)  #현재 크롤링한 포스트의 date, title 출력
                    #게시물의 날짜가 end_date 보다 옛날 글이면 continue, 최신 글이면 append
                    if str(date) <= end_date:
                        continue
                    else:
                        post_data_prepare.append(get_post_data[0])
                add_cnt = db_manager(URL, post_data_prepare, db)
                print("add_OK : ", add_cnt)  #DB에 저장된 게시글 수 출력
                #DB에 추가된 게시글이 0 이면 break, 아니면 다음페이지
                if add_cnt == 0:
                    page_flag = 0
                    break
                else:
                    page_flag += 1
                    page += 1
                    page_url = Change_page(board_url, page)
            except Exception as e:
                error_handler(e, URL, page_url, db)
                driver.quit()
                time.sleep(3)
                driver = chromedriver()
                driver = everytime.login(driver)
                break
    #드라이버 연결 해제
    driver.quit()
示例#16
0
 def load_db_params(self):
   dbm = db_manager.db_manager()
   pprint(dir(dbm))
   dbm.setup_db(db_name='swapmeetdb', db_user='******', db_pass='******', db_host='mysql.haoliu.net')
   return dbm