def content_parse(url): html = URLparser(url) try: bs0bj = BeautifulSoup(html.read(), "lxml") except: print("connect error") bs0bj = BeautifulSoup(html.read(), "lxml") bs0bj = bs0bj.find("div",{"class":"view_content_wrap"}) db_record = {} db_record.update({"url":url}) obj = bs0bj.find("h3",{"class":"title ub-word"}).find("span",{"class":"title_subject"}).get_text().strip() db_record.update({"title":obj}) obj = bs0bj.find("div", {"class":"gall_writer ub-writer"}).find("div",{"class":"fl"}) obj = bs0bj.find("span",{"class":"gall_date"}).attrs['title'] obj = obj.strip() db_record.update({"date":obj}) try: obj = bs0bj.find("div",{"class":"gallview_contents"}).find("div",{"style":"overflow:hidden;"}) obj = obj.get_text().strip() db_record.update({"post":post_wash(obj)}) except: db_record.update({"post":1}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser").find("td", {"class": "text12graylight"}) db_record = {} db_record.update({"url": url}) obj = bs0bj.find("td", {"class": "title12"}).get_text().strip() db_record.update({"title": obj}) obj = bs0bj.find("td", {"class": "text11darkgray"}).get_text().strip() obj = obj.replace(".", "-") db_record.update({"date": obj}) try: obj = bs0bj.find("td", { "class": "text12graylight", "align": "left", "valign": "top" }).get_text().strip() db_record.update({"post": post_wash(obj)}) except: db_record.update({"post": 1}) return db_record
def content_parse(domain, url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") bs0bj = bs0bj.find("div",{"class":"view-wrap"})\ .find("article",{"itemprop":"articleBody"}) db_record = {} db_record.update({"url": url}) obj = bs0bj.find("h1", {"itemprop": "headline"}) db_record.update({"title": obj.get_text().strip()}) if bs0bj.find("span", {"class": "hidden-xs"}) != None: obj = bs0bj.find("span", {"class": "hidden-xs"}) if obj.get_text().strip() != "": db_record.update({"class": obj.get_text().strip()}) obj = bs0bj.find("span", {"itemprop": "datePublished"}) date = obj.attrs["content"].split( "KST")[0] + " " + obj.attrs["content"].split("KST")[1] db_record.update({"date": date}) try: obj = bs0bj.find("div", {"itemprop": "description"}) db_record.update({"post": post_wash(str(obj.get_text().strip()))}) except: db_record.update({"post": 1}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) obj = bs0bj.find("div", {"class": "read_header"}).h1 db_record.update({"title": obj.get_text().strip()}) obj = bs0bj.find("p", {"class": "time"}).get_text().strip() obj = obj.replace(".", "-") db_record.update({"date": obj}) obj = bs0bj.find("div", {"class": "read_body"}).get_text().strip() db_record.update({"post": post_wash(obj)}) return db_record
def content_parse(domain, url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url":url}) obj = bs0bj.find("table",{"class":"bbs-view-info"}) obj2 = obj.find("tr").find("td") db_record.update({"title":obj2.get_text().strip()}) obj2 = obj.find("tr").findNext("tr").find("td") db_record.update({"date":obj2.get_text().strip()}) obj = bs0bj.find("table",{"class":"bbs-view"}) db_record.update({"post":post_wash(str(obj.get_text().strip()))}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) obj = bs0bj.find("td", {"class": "list_loop_left"}) db_record.update({"title": obj.get_text().strip()}) obj = obj.findNext("td", {"class": "list_loop_left"}).get_text().strip() obj = obj.replace(".", "-").split("(")[1].split(" ")[0] db_record.update({"date": obj}) obj = bs0bj.find("td", {"class": "view_content"}).get_text().strip() db_record.update({"post": post_wash(obj)}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url":url}) obj = bs0bj.find("td",{"class":"boardSub"}) db_record.update({"title":obj.get_text().strip()}) obj = obj.findNext("td").findNext("td").get_text().strip() obj = obj.replace(".","-") db_record.update({"date":obj}) obj = bs0bj.find("td",{"class":"contens"}).get_text().strip() db_record.update({"post":post_wash(obj)}) return db_record
def content_parse(domain, url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) obj = bs0bj.find(text="제목") db_record.update({"title": obj.findNext('td').get_text().strip()}) obj = bs0bj.find(text="작성일") db_record.update({"date": obj.findNext('td').get_text().strip()}) try: obj = bs0bj.find("div", {'class': "bbs-body"}) db_record.update({"post": post_wash(str(obj.get_text().strip()))}) except: db_record.update({"post": 1}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) obj = bs0bj.find("td", {"class": "title"}) db_record.update({"title": obj.get_text().strip()}) obj = obj.findNext("td").findNext("td") db_record.update({"date": obj.get_text().strip()}) try: obj = bs0bj.find("td", {"class": "tdc"}).get_text().strip() db_record.update({"post": post_wash(obj)}) except: db_record.update({"post": 1}) return db_record
def content_parse(url): db_record = {} html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") bs0bj = bs0bj.find("div",{"id":"board_view"}) db_record.update({"url":url}) obj = bs0bj.find("h3").get_text().strip() db_record.update({"title":obj}) obj = bs0bj.find("p",{"class":"writer"}).find("strong").get_text().strip() db_record.update({"date":obj}) try: obj = bs0bj.find("div",{"class":"board_stance"}).get_text().strip() db_record.update({"post":post_wash(obj)}) except: db_record.update({"post":1}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) obj = bs0bj.find("h3", {"class": "title"}).get_text().strip() db_record.update({"title": obj}) obj = bs0bj.find("span", {"class": "date"}).get_text().strip() obj = obj.split('.')[0] + "-" + obj.split('.')[1] + "-" + obj.split('.')[2] db_record.update({"date": obj}) try: obj = bs0bj.find("div", {"class": "boardReadBody"}).get_text().strip() db_record.update({"post": post_wash(obj)}) except: db_record.update({"post": 1}) return db_record
def content_parse(domain, url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) obj = bs0bj.find("tr", { "class": "head" }).find("td", {"class": "first txt-l"}) db_record.update({"title": obj.get_text().strip()}) obj = obj.find_next("td").find_next("td") db_record.update({"date": obj.get_text().strip()}) try: obj = bs0bj.find("tr", {"class": "head"}).find_next("tr") db_record.update({"post": post_wash(str(obj.get_text().strip()))}) except: db_record.update({"post": 1}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser").find("article",{"id":"bo_v"}) db_record = {} db_record.update({"url":url}) obj = bs0bj.find("h1",{"id":"bo_v_title"}).get_text().strip() db_record.update({"title":obj}) obj = bs0bj.find("section",{"id":"bo_v_info"}).find("strong").find_next("strong") obj = "20" + obj.get_text().strip() db_record.update({"date":obj}) try: obj = bs0bj.find("div",{"id":"bo_v_con"}).get_text().strip() db_record.update({"post":post_wash(obj)}) except: db_record.update({"post":1}) return db_record
def content_parse(url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) obj = bs0bj.find("span", {"class": "view_subj_core"}) obj = obj.get_text().strip() db_record.update({"title": obj}) obj = bs0bj.find("span", {"class": "view_subj_date"}) obj = obj.get_text().strip() db_record.update({"date": obj}) try: obj = bs0bj.find("div", {"class": "view_txt_container"}) obj = obj.get_text().strip() db_record.update({"post": post_wash(str(obj))}) except: db_record.update({"post": 1}) return db_record
def content_parse(domain, url): html = URLparser(url) bs0bj = BeautifulSoup(html.read(), "html.parser") db_record = {} db_record.update({"url": url}) bs0bj = bs0bj.find("table", {"class": "board_view"}) obj = bs0bj.find("thead").get_text().strip() db_record.update({"title": obj}) obj = bs0bj.find("tbody").find("tr").find("td").find_next("td").find_next( "td") obj = obj.get_text().strip().split(" ")[2] db_record.update({"date": obj}) try: obj = bs0bj.find("tbody").find("td", {"class": "tdc"}) obj = obj.get_text().strip() db_record.update({"post": post_wash(str(obj))}) except: db_record.update({"post": 1}) return db_record