Exemplo n.º 1
0
def content_parse(url):
	html = URLparser(url)
	try:
		bs0bj = BeautifulSoup(html.read(), "lxml")
	except:
		print("connect error")
		bs0bj = BeautifulSoup(html.read(), "lxml")

	bs0bj = bs0bj.find("div",{"class":"view_content_wrap"})
	db_record = {}
	db_record.update({"url":url})

	obj = bs0bj.find("h3",{"class":"title ub-word"}).find("span",{"class":"title_subject"}).get_text().strip()
	db_record.update({"title":obj})

	obj = bs0bj.find("div", {"class":"gall_writer ub-writer"}).find("div",{"class":"fl"})
	obj = bs0bj.find("span",{"class":"gall_date"}).attrs['title']
	obj = obj.strip()
	db_record.update({"date":obj})

	try:
		obj = bs0bj.find("div",{"class":"gallview_contents"}).find("div",{"style":"overflow:hidden;"})
		obj = obj.get_text().strip()
		db_record.update({"post":post_wash(obj)})
	except:
		db_record.update({"post":1})

	return db_record
Exemplo n.º 2
0
def content_parse(url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(),
                          "html.parser").find("td",
                                              {"class": "text12graylight"})
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("td", {"class": "title12"}).get_text().strip()
    db_record.update({"title": obj})

    obj = bs0bj.find("td", {"class": "text11darkgray"}).get_text().strip()
    obj = obj.replace(".", "-")
    db_record.update({"date": obj})

    try:
        obj = bs0bj.find("td", {
            "class": "text12graylight",
            "align": "left",
            "valign": "top"
        }).get_text().strip()
        db_record.update({"post": post_wash(obj)})
    except:
        db_record.update({"post": 1})

    return db_record
Exemplo n.º 3
0
def content_parse(domain, url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    bs0bj = bs0bj.find("div",{"class":"view-wrap"})\
        .find("article",{"itemprop":"articleBody"})

    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("h1", {"itemprop": "headline"})
    db_record.update({"title": obj.get_text().strip()})

    if bs0bj.find("span", {"class": "hidden-xs"}) != None:
        obj = bs0bj.find("span", {"class": "hidden-xs"})
        if obj.get_text().strip() != "":
            db_record.update({"class": obj.get_text().strip()})

    obj = bs0bj.find("span", {"itemprop": "datePublished"})
    date = obj.attrs["content"].split(
        "KST")[0] + " " + obj.attrs["content"].split("KST")[1]
    db_record.update({"date": date})

    try:
        obj = bs0bj.find("div", {"itemprop": "description"})
        db_record.update({"post": post_wash(str(obj.get_text().strip()))})
    except:
        db_record.update({"post": 1})
    return db_record
Exemplo n.º 4
0
def content_parse(url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("div", {"class": "read_header"}).h1
    db_record.update({"title": obj.get_text().strip()})

    obj = bs0bj.find("p", {"class": "time"}).get_text().strip()
    obj = obj.replace(".", "-")
    db_record.update({"date": obj})

    obj = bs0bj.find("div", {"class": "read_body"}).get_text().strip()
    db_record.update({"post": post_wash(obj)})
    return db_record
Exemplo n.º 5
0
def content_parse(domain, url):
	html = URLparser(url)
	bs0bj = BeautifulSoup(html.read(), "html.parser")
	db_record = {}
	db_record.update({"url":url})

	obj = bs0bj.find("table",{"class":"bbs-view-info"})
	obj2 = obj.find("tr").find("td")
	db_record.update({"title":obj2.get_text().strip()})
	obj2 = obj.find("tr").findNext("tr").find("td")
	db_record.update({"date":obj2.get_text().strip()})

	obj = bs0bj.find("table",{"class":"bbs-view"})
	db_record.update({"post":post_wash(str(obj.get_text().strip()))})

	return db_record
Exemplo n.º 6
0
def content_parse(url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("td", {"class": "list_loop_left"})
    db_record.update({"title": obj.get_text().strip()})

    obj = obj.findNext("td", {"class": "list_loop_left"}).get_text().strip()
    obj = obj.replace(".", "-").split("(")[1].split(" ")[0]
    db_record.update({"date": obj})

    obj = bs0bj.find("td", {"class": "view_content"}).get_text().strip()
    db_record.update({"post": post_wash(obj)})

    return db_record
Exemplo n.º 7
0
def content_parse(url):
	html = URLparser(url)
	bs0bj = BeautifulSoup(html.read(), "html.parser")
	db_record = {}
	db_record.update({"url":url})

	obj = bs0bj.find("td",{"class":"boardSub"})
	db_record.update({"title":obj.get_text().strip()})

	obj = obj.findNext("td").findNext("td").get_text().strip()
	obj = obj.replace(".","-")
	db_record.update({"date":obj})

	obj = bs0bj.find("td",{"class":"contens"}).get_text().strip()
	db_record.update({"post":post_wash(obj)})

	return db_record
	
Exemplo n.º 8
0
def content_parse(domain, url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find(text="제목")
    db_record.update({"title": obj.findNext('td').get_text().strip()})
    obj = bs0bj.find(text="작성일")
    db_record.update({"date": obj.findNext('td').get_text().strip()})

    try:
        obj = bs0bj.find("div", {'class': "bbs-body"})
        db_record.update({"post": post_wash(str(obj.get_text().strip()))})
    except:
        db_record.update({"post": 1})

    return db_record
Exemplo n.º 9
0
def content_parse(url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("td", {"class": "title"})
    db_record.update({"title": obj.get_text().strip()})

    obj = obj.findNext("td").findNext("td")
    db_record.update({"date": obj.get_text().strip()})

    try:
        obj = bs0bj.find("td", {"class": "tdc"}).get_text().strip()
        db_record.update({"post": post_wash(obj)})
    except:
        db_record.update({"post": 1})

    return db_record
Exemplo n.º 10
0
def content_parse(url):
	db_record = {}
	html = URLparser(url)
	bs0bj = BeautifulSoup(html.read(), "html.parser")
	bs0bj = bs0bj.find("div",{"id":"board_view"})
	db_record.update({"url":url})

	obj = bs0bj.find("h3").get_text().strip()
	db_record.update({"title":obj})

	obj = bs0bj.find("p",{"class":"writer"}).find("strong").get_text().strip()
	db_record.update({"date":obj})

	try:
		obj = bs0bj.find("div",{"class":"board_stance"}).get_text().strip()
		db_record.update({"post":post_wash(obj)})
	except:
		db_record.update({"post":1})

	return db_record
Exemplo n.º 11
0
def content_parse(url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("h3", {"class": "title"}).get_text().strip()
    db_record.update({"title": obj})

    obj = bs0bj.find("span", {"class": "date"}).get_text().strip()
    obj = obj.split('.')[0] + "-" + obj.split('.')[1] + "-" + obj.split('.')[2]
    db_record.update({"date": obj})

    try:
        obj = bs0bj.find("div", {"class": "boardReadBody"}).get_text().strip()
        db_record.update({"post": post_wash(obj)})
    except:
        db_record.update({"post": 1})

    return db_record
Exemplo n.º 12
0
def content_parse(domain, url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("tr", {
        "class": "head"
    }).find("td", {"class": "first txt-l"})
    db_record.update({"title": obj.get_text().strip()})
    obj = obj.find_next("td").find_next("td")
    db_record.update({"date": obj.get_text().strip()})

    try:
        obj = bs0bj.find("tr", {"class": "head"}).find_next("tr")
        db_record.update({"post": post_wash(str(obj.get_text().strip()))})
    except:
        db_record.update({"post": 1})

    return db_record
Exemplo n.º 13
0
def content_parse(url):
	html = URLparser(url)
	bs0bj = BeautifulSoup(html.read(), "html.parser").find("article",{"id":"bo_v"})
	db_record = {}
	db_record.update({"url":url})

	obj = bs0bj.find("h1",{"id":"bo_v_title"}).get_text().strip()
	db_record.update({"title":obj})

	obj = bs0bj.find("section",{"id":"bo_v_info"}).find("strong").find_next("strong")
	obj = "20" + obj.get_text().strip()
	db_record.update({"date":obj})

	try:
		obj = bs0bj.find("div",{"id":"bo_v_con"}).get_text().strip()
		db_record.update({"post":post_wash(obj)})
	except:
		db_record.update({"post":1})

	return db_record
Exemplo n.º 14
0
def content_parse(url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    obj = bs0bj.find("span", {"class": "view_subj_core"})
    obj = obj.get_text().strip()
    db_record.update({"title": obj})

    obj = bs0bj.find("span", {"class": "view_subj_date"})
    obj = obj.get_text().strip()
    db_record.update({"date": obj})

    try:
        obj = bs0bj.find("div", {"class": "view_txt_container"})
        obj = obj.get_text().strip()
        db_record.update({"post": post_wash(str(obj))})
    except:
        db_record.update({"post": 1})
    return db_record
Exemplo n.º 15
0
def content_parse(domain, url):
    html = URLparser(url)
    bs0bj = BeautifulSoup(html.read(), "html.parser")
    db_record = {}
    db_record.update({"url": url})

    bs0bj = bs0bj.find("table", {"class": "board_view"})
    obj = bs0bj.find("thead").get_text().strip()
    db_record.update({"title": obj})

    obj = bs0bj.find("tbody").find("tr").find("td").find_next("td").find_next(
        "td")
    obj = obj.get_text().strip().split(" ")[2]
    db_record.update({"date": obj})

    try:
        obj = bs0bj.find("tbody").find("td", {"class": "tdc"})
        obj = obj.get_text().strip()
        db_record.update({"post": post_wash(str(obj))})
    except:
        db_record.update({"post": 1})

    return db_record