def __init__(self, query_, page): self.page = page self.query = query_.encode("utf-8") self.conn = db_conn.db_conn() self.db = model.model(self.conn) self.cnt = self.db.tb_detail_count(self.query)
def ypim_crawler_inven3(): conn = db_conn.db_conn() model_ = model.model(conn) return_list = model_.tb_url_detail_select("inven", 3) url_list = [] for i in return_list: url_list.append(i['href']) name_list = ["subjcont", "nicname"] for i in name_list: for j in url_list: inven_find(model_, j, i).start()
def ypim_crawler_inven2(): conn = db_conn.db_conn() model_ = model.model(conn) return_list = model_.tb_url_detail_select("inven", 2) #url_list = ["http://www.inven.co.kr/board/powerbbs.php?come_idx=2730"] url_list = [] for i in return_list: url_list.append(i['href']) name_list = ["subjcont", "nicname"] for i in name_list: for j in url_list: inven_find(model_, j, i).start()
def init(query_): global conn, query, lock conn = db_conn.db_conn() query = query_ lock = threading.Lock()
def web_site(query, web_site): conn = db_conn.db_conn() model_ = model.model(conn) print query, web_site return jsonify(model_.tb_detail_group(query, web_site))
def groups(query): conn = db_conn.db_conn() model_ = model.model(conn) return jsonify(model_.tb_detail_groups_cnt(query))
def isfirst(query): conn = db_conn.db_conn() model_ = model.model(conn) return jsonify( str(model_.tb_query_select(query.encode("utf-8"))[0]['que_seqno']))
def inven_get_come_idx(): list = [] non_overlap_list = [] """ url = ["http://lovelive.inven.co.kr", "http://durango.inven.co.kr/"] for i in url: queue = Queue.Queue() t = YPIMcrawler_nonlogin(queue, i, None, "li", "class", "firstMenuItem") t.start() list.append(queue.get()) """ try: for i in inven(): for j in i.findAll('a'): queue = Queue.Queue() print j['href'] t = YPIMcrawler_nonlogin(queue, j['href'], None, "li", "class", "firstMenuItem") t.start() list.append(queue.get()) for i in list: for j in i: for x in j.parent: try: if (x.a['href'].find("come_idx") >= 0): if (x.a['href'].find("category") >= 0): non_overlap_list.append(x.a['href']) else: index = x.a['href'].find("come_idx") try: href = int(x.a['href'][index + 9:index + 13]) non_overlap_list.append( "http://www.inven.co.kr/board/powerbbs.php?come_idx=" + str(href)) except Exception as e: print e pass except: pass db = db_conn.db_conn() conn = db.db_conn() model_ = model.model(conn) model_.tb_url_insert("inven") url_seqno = model_.tb_url_select("inven")[0]['url_seqno'] part_num = 0 lock = threading.Lock() for n, i in enumerate(set(non_overlap_list)): if (n % 100 == 0): part_num += 1 lock.acquire() model_.tb_url_detail_insert(url_seqno, part_num, i) lock.release() except Exception as e: print e