예제 #1
0
    def run(self):
        global conn
        print "mlbpark_start"
        db = model.model(conn)

        for i in self.data:
            for j in i.findAll('div', {'class', 'page'}):
                max_page = 0
                if (j.text):
                    max_page = len(j.findAll("a")) + 1
                    try:
                        if (max_page > 3):
                            max_page = 3
                        now_page = 1
                        else_page = 1
                        while (now_page <= max_page):
                            if (now_page == 1):
                                mlbpark_paging_find(db, self.url,
                                                    now_page).start()
                            else:
                                else_page += 30
                                mlbpark_paging_find(db, self.url,
                                                    else_page).start()
                            now_page += 1
                    except:
                        pass
                else:
                    break
예제 #2
0
def bing_find():
    global query
    url = "https://www.bing.com/search?&q=" + urllib.quote("\"" + query + "\"")

    req_queue = Queue.Queue()
    header = {
        "user-agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
    }
    YPIMcrawler_nonlogin(req_queue, url, header, "span", "class",
                         "sb_count").start()  # class
    data = req_queue.get()
    db = model.model(conn)

    max_page = 0
    try:

        for i in data:

            max_page = math.ceil(
                int(i.text.encode("utf-8").replace(',', "").replace('결과', ""))
                / 10.0)

            if (max_page > 7):
                max_page = 7
        now_page = 1
        first = 1
        while (now_page < max_page):
            bing_paging_find(db, url, first).start()
            first += 10
            now_page += 1

    except Exception as e:
        print e
예제 #3
0
    def run(self):
        print "chosun_find"
        # req_class, url, header, search_tag, search_attr, search_key
        return_list = []

        for i in self.data:

            try:
                count_article = i.h3.em.text.strip('(').encode('utf-8')
                spl = count_article.index(
                    '건')  # total page url - wikidocs.net/13
                n_cnt_article = int(count_article[:spl])
                total_page_no = int(math.ceil(n_cnt_article / 10.0))  # casting

                now_page = 1
                end_page_no = 1
                db = model.model(conn)

                while (True):
                    if (now_page > total_page_no):
                        break
                    else:
                        #print "chosun_find_threading..."
                        chosun_paging_find(self.url, now_page, db).start()

                    now_page += 1
                    end_page_no += 1

                    if (end_page_no == 4):
                        end_page_no = total_page_no
                        break

            except:
                print "chosun Error"
                pass
예제 #4
0
    def run(self):
        db = model.model(conn)
        total_page_no = -1
        for i in self.data:
            for j in i.findAll('td'):
                try:
                    if (j['align'] == "center"):
                        if (j['height'] == str(40)):
                            try:
                                if (len(j.findAll('font')) > 0):
                                    total_page_no += len(j.findAll('font'))

                            except Exception as e:
                                print e

                            if (total_page_no >= 3):
                                total_page_no = 3

                            now_page = 1

                            while (True):
                                if (now_page > total_page_no):
                                    break
                                else:
                                    todayhumor_paging_find(
                                        db, self.url[:-1], now_page).start()

                                now_page += 1
                except:
                    pass
예제 #5
0
    def run(self):
        global conn
        print "ddanzi_start"
        db = model.model(conn)
        for i in self.data:
            try:
                for j in i.findAll('h3', {'class': 'subTitle'}):
                    count_article = j.text.encode('utf-8')
                    if (count_article.find('문서') >= 0):
                        n_cnt_article = int(
                            j.span.text.encode('utf-8').strip('(|)'))
                        total_page_no = int(math.ceil(n_cnt_article / 30.0))

                    if (total_page_no >= 3):
                        total_page_no = 3

                    now_page = 1
                    while (True):
                        if (now_page > total_page_no):
                            break
                        else:
                            ddanzi_paging_find(db, self.url, now_page).start()
                        now_page += 1
            except Exception as e:

                print e
                pass
예제 #6
0
    def run(self):
        global conn
        print "ppomppu_start"
        db = model.model(conn)

        for i in self.data:
            try:
                total_page_no = len(i.findAll('a'))

                if (total_page_no >= 11):
                    total_page_no = 3
                else:
                    total_page_no = total_page_no + 1

                if (total_page_no > 3):
                    total_page_no = 3

                now_page = 1
                while (True):
                    if (now_page > total_page_no):
                        break
                    else:
                        ppomppu_paging_find(db, self.url, now_page).start()
                    now_page += 1
            except Exception as e:
                print e
                pass
예제 #7
0
def yahoo_find():
    global query
    url = "https://search.yahoo.com/search?fr=yfp-t&fp=1&toggle=1&cop=mss&ei=UTF-8&p=" + "\"" + query + "\""

    req_queue = Queue.Queue()
    header = {
        "user-agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
    }
    YPIMcrawler_nonlogin(req_queue, url, header, "div", "class",
                         "compPagination").start()  # class
    data = req_queue.get()
    db = model.model(conn)

    max_page = 0
    try:
        for i in data:

            max_page = math.ceil(
                int(i.span.text.replace(" results", "").replace(",", "")) /
                10.0)

            if (max_page > 7):
                max_page = 7
        now_page = 1
        first = 1
        while (now_page < max_page):

            yahoo_paging_find(db, url, first).start()
            first += 10
            now_page += 1

    except Exception as e:
        print e
예제 #8
0
    def __init__(self, query_, page):
        self.page = page
        self.query = query_.encode("utf-8")

        self.conn = db_conn.db_conn()
        self.db = model.model(self.conn)
        self.cnt = self.db.tb_detail_count(self.query)
예제 #9
0
    def __init__(self, board):
        threading.Thread.__init__(self)
        global query, conn  #추가
        self.url = "http://cafe984.daum.net/_c21_/cafesearch?grpid=mEr9&listnum=50&head=&viewtype=all&searchPeriod=all&item=" + board
        #html = urllib2.urlopen(self.url + board[1][1] + unicode(query, 'utf-8').encode('euc-kr')).read()

        req_queue = Queue.Queue()
        self.db = model.model(conn)  # 추가

        YPIMcrawler_nonlogin(
            req_queue, self.url + unicode(query, 'utf-8').encode('euc-kr'),
            None, "tr", "class", "list_row_info").start()  # class
        self.data = req_queue.get()
예제 #10
0
def ypim_crawler_inven3():
    conn = db_conn.db_conn()

    model_ = model.model(conn)
    return_list = model_.tb_url_detail_select("inven", 3)
    url_list = []
    for i in return_list:
        url_list.append(i['href'])

    name_list = ["subjcont", "nicname"]

    for i in name_list:
        for j in url_list:
            inven_find(model_, j, i).start()
예제 #11
0
    def run(self):
        global conn
        print "dcinside_start"
        db = model.model(conn)

        for i in self.data:
            max_page = None
            for j in i.findAll('div', {'id': 'dgn_btn_paging'}):
                max_page = len(j.findAll('a'))

            if (max_page > 3):
                max_page = 3
            now_page = 1
            while (now_page <= max_page):
                dcinside_paging_find(db, self.url, now_page).start()
                now_page += 1
예제 #12
0
def ypim_crawler_inven2():
    conn = db_conn.db_conn()

    model_ = model.model(conn)
    return_list = model_.tb_url_detail_select("inven", 2)

    #url_list = ["http://www.inven.co.kr/board/powerbbs.php?come_idx=2730"]
    url_list = []
    for i in return_list:
        url_list.append(i['href'])

    name_list = ["subjcont", "nicname"]

    for i in name_list:
        for j in url_list:
            inven_find(model_, j, i).start()
예제 #13
0
    def run(self):
        global conn
        db = model.model(conn)
        max_page = 0
        try:
            for i in self.data:
                max_page = len(i.findAll('a'))
                print max_page
            if (max_page > 3):
                max_page = 3

            now_page = 1
            while (now_page <= max_page):
                jobkorea_paging_find(db, self.url + "&page=", now_page).start()
                now_page += 1

        except Exception as e:
            print e
예제 #14
0
    def run(self):
        global conn
        print "clien_start"
        db = model.model(conn)

        for i in self.data:
            print
            try:
                total_page_no = len(i.findAll("a"))
                #print total_page_no
                print total_page_no
                if (total_page_no >= 3):
                    total_page_no = 3
                now_page = 1
                while (now_page <= total_page_no):
                    clien_paging_find(db, self.url, now_page).start()
                    now_page += 1

            except Exception as e:
                print e
예제 #15
0
    def run(self):
        global query, conn, lock

        url = "https://www.facebook.com/search/str/" + query + "/keywords_users"
        resp = self.broswer.open(
            url)  # data = set_cookie_broswer (cookie 값 저장됨)
        html = str(resp.read()).replace("<!--", "").replace("-->", "")
        Soup = BeautifulSoup(html, "html.parser")
        db = model.model(conn)
        # print Soup.prettify()
        data_list = Soup.find_all('div', {"class", "_glj"})
        return_facebook_list = []

        for i in data_list:
            each_data = i.find('div', {"class", "_gll"})
            each_name = each_data.text
            each_url = each_data.a['href']

            each_data = i.find('div', {"class", "_glm"})
            job = each_data.text

            each_data = i.find('div', {"class", "_glo"})
            school = each_data.text

            # title = each_name + ", " +  + ", " + job + ", " + school

            title = each_name + "/ " + job + "/ " + school
            img = "facebook.png"
            lock.acquire()
            face_data = {
                "web_site": "facebook",
                "href": each_url,
                "title": title,
                "img": img
            }
            db.tb_detail_insert(query, face_data)
            lock.release()

        print "facebook_end"
예제 #16
0
def inven_get_come_idx():
    list = []
    non_overlap_list = []
    """
    url = ["http://lovelive.inven.co.kr", "http://durango.inven.co.kr/"]

    for i in url:
        queue = Queue.Queue()
        t = YPIMcrawler_nonlogin(queue,  i, None, "li", "class", "firstMenuItem")
        t.start()
        list.append(queue.get())
    """
    try:

        for i in inven():
            for j in i.findAll('a'):
                queue = Queue.Queue()
                print j['href']
                t = YPIMcrawler_nonlogin(queue, j['href'], None, "li", "class",
                                         "firstMenuItem")
                t.start()
                list.append(queue.get())

        for i in list:
            for j in i:
                for x in j.parent:
                    try:
                        if (x.a['href'].find("come_idx") >= 0):
                            if (x.a['href'].find("category") >= 0):
                                non_overlap_list.append(x.a['href'])
                            else:
                                index = x.a['href'].find("come_idx")
                                try:
                                    href = int(x.a['href'][index + 9:index +
                                                           13])

                                    non_overlap_list.append(
                                        "http://www.inven.co.kr/board/powerbbs.php?come_idx="
                                        + str(href))
                                except Exception as e:
                                    print e
                                    pass
                    except:
                        pass
        db = db_conn.db_conn()
        conn = db.db_conn()

        model_ = model.model(conn)

        model_.tb_url_insert("inven")
        url_seqno = model_.tb_url_select("inven")[0]['url_seqno']

        part_num = 0
        lock = threading.Lock()
        for n, i in enumerate(set(non_overlap_list)):
            if (n % 100 == 0):
                part_num += 1

            lock.acquire()
            model_.tb_url_detail_insert(url_seqno, part_num, i)
            lock.release()

    except Exception as e:
        print e
예제 #17
0
파일: main.py 프로젝트: eunseokOh/ypim
def groups(query):
    conn = db_conn.db_conn()
    model_ = model.model(conn)
    return jsonify(model_.tb_detail_groups_cnt(query))
예제 #18
0
파일: main.py 프로젝트: eunseokOh/ypim
def web_site(query, web_site):
    conn = db_conn.db_conn()
    model_ = model.model(conn)
    print query, web_site
    return jsonify(model_.tb_detail_group(query, web_site))
예제 #19
0
파일: main.py 프로젝트: eunseokOh/ypim
def isfirst(query):
    conn = db_conn.db_conn()
    model_ = model.model(conn)
    return jsonify(
        str(model_.tb_query_select(query.encode("utf-8"))[0]['que_seqno']))