def find_href(self, tbname, colval): try: ret = self.find_col(comm.site2db(tbname), 'href', colval) return ret except ValueError: log.add_log(log.g_logger.error(ValueError)) return []
def writeList(self): f = open('tmp/tmp.tr','w') f.writelines(self.list) log.add_log(log.g_logger.info('完成目录树,保存至tmp/tmp.tr文件中')) f.close() for line in self.list: print line,
def show_tree(self, dbfile): log.add_log(log.g_logger.info('正在生成目录树...')) tr_db = os.path.splitext(dbfile)[0]+'_tr.db' self.ld = LocalData(tr_db) par_id = 0 for tb in self.ld.all_table()[0]: self.getDirList(par_id, tb) self.ld.close_db() self.writeList()
def add_data(self, ctbox, **data): log.add_log(log.g_logger.info('页面数据加入数据库')) site = comm.site2db(data['site']) if not self.find_table(site): self.__create__(site) if not self.find_col(site, 'href', data['href']).__len__(): reid = self.__insert__(**data) else: reid = self.__update__(**data) data = {'href':data['href']} ctbox.add_data(**data)
def end_data(self): '''工作结束时将内存数据库保存到本地''' log.add_log(log.g_logger.info('将内存中的数据保存到本地数据库中')) str_sql = self.get_men_script().getvalue() #import pdb #pdb.set_trace() self.cur_mem.close() self.__close__(self.conn_mem) #本地数据库 try: os.remove(self.dbfile) except: pass conn_file = sqlite3.connect(self.dbfile) cur_file = conn_file.cursor() cur_file.executescript(str_sql) cur_file.close() self.__close__(conn_file)
def save_local(self,**select): '''参数格式 select = {'dbfile':'test.db','tbname':'www_baidu_com','href':'http://www.baidu.com/1'}''' log.add_log(log.g_logger.info('正在将数据库中数据转换到本地目录下')) if select['href'] is not None: self.save_file(self.show_href(select['tbname'], select['href'])) return if select['tbname'] is not None: self.save_table(select['tbname']) return if select['dbfile'] is not None: try: for tb in self.all_table()[0]: self.save_table(tb) except: pass self.close_db() log.add_log(log.g_logger.info('完成转换:%s' % os.path.abspath('local/'+''.join(select['dbfile'].split('.')[:-1])))) return
def add_data(self, **data): '''传入href''' try: site = self.get_scheme_netloc_path_(data['href']).netloc self.create_tree(site) except: pass href = self.split_data(data['href']) log.add_log(log.g_logger.debug('分割url '+str(href))) #import pdb #pdb.set_trace() for i in range(len(href)): if i == 0: self.par_id = 0 log.add_log(log.g_logger.debug('加入路径'+str(i)+str(href[i]))) d_hash = comm.get_hash(href[i]) try: if d_hash not in self.list_data: #未加入的路径 #test = self.dbTree.find_href(site, href[i]) #print test #if href[i] not in test: self.par_id = self.add_node(site, href[i], i,self.par_id) else: self.par_id = self.dbTree.find_id(comm.site2db(site), href[i]) except: pass log.add_log(log.g_logger.debug(str(self.par_id)+'父节点'))
def getDirList(self, row_id, tbname): files = self.list_dir(row_id,tbname) fileNum = self.getCount(row_id, tbname) tmpNum = 0 log.add_log(log.g_logger.debug(files)) for file in files: myfile = self.find_id(tbname,file['path']) size = self.getCount(myfile, tbname) file = file['path'] if not size: tmpNum = tmpNum +1 if (tmpNum != fileNum): self.list.append(str(self.SPACE) + "|--" + file + "\n") else: self.list.append(str(self.SPACE) + "`--" + file + "\n") if size: self.list.append(str(self.SPACE) + "|--" + file + "\n") # change into sub directory self.SPACE = self.SPACE + "| " self.getDirList(myfile, tbname) # if iterator of sub directory is finished, reduce "│ " self.SPACE = self.SPACE[:-4] return self.list
def test(self): #'''遍历所有表名''' tbname = '172_4_16_168' str_sql = '''SELECT name FROM sqlite_master WHERE type='table' order by name''' log.add_log(log.g_logger.debug(self.__cmd__(str_sql))) #'''显示这张表所有数据''' str_sql = '''SELECT * FROM '%s' ''' % (tbname) #print str_sql log.add_log(log.g_logger.debug(self.__cmd__(str_sql))) str_sql = "PRAGMA table_info('%s')" % (tbname) log.add_log(log.g_logger.debug(self.__cmd__(str_sql)))
def add_node(self,site, data, deep, par_id): log.add_log(log.g_logger.info('加入目录树结点')) cid = self.insert_data(site, data,deep, par_id) self.list_data.append(comm.get_hash(data)) return cid