def open(self, path): ''' open storage or create it if not exist :return: ''' try: with clock(self.lock): #init storage path self.path = path if not path_exists(self.path): #create database self._create() else: # load database self._load() self._rebuild_tindex() return self logger.info("open storage %s...success. %d tables.", self.path, len(self.tables)) except Exception, e: logger.error("open storage %s...failed. error: %s", self.path, str(e)) raise e
def open(self, host, user, pwd, dbn, port=3306): ''' open database or create it if not exist :return: ''' #init storage path try: self.host, self.port, self.user, self.pwd = host, port, user, pwd self.dbn = dbn self.dbc = MySQLdb.connect(host=host, user=user, passwd=pwd, port=port) if not self._exists(): #create database self._create() self._use() else: # load database self._use() self._load() self._rebuild_tindex() return self logger.info( "open storage mysql://%s:%s@%s:%d/%s...success. %d tables.", user, pwd, host, port, self.dbn, len(self.tables)) except Exception, e: logger.error( "open storage mysql://%s:%s@%s:%d/%s...failed. error: %s", user, pwd, host, port, self.dbn, str(e)) raise e
def update(self, uri, extras): if self.__linker is None: logger.error( "linker manager: there is no linker registered. invoke update failed." ) return None return self.__linker.update(uri, extras)
def pull(self): if self.__linker is None: logger.error( "linker manager: there is no linker registered. invoke pull failed." ) return None return self.__linker.pull()
def drop(self): ''' drop table :return: ''' try: remove_dir(self.path) except Exception, e: logger.error("drop table %s...failed. error %s", self.name, str(e)) raise e
def truncate(self): ''' truncate table :return: ''' try: with clock(self.lock): remove_files(self.data_file) self._create_data_file() except Exception, e: logger.error("truncate table %s...failed. error %s", self.name, str(e)) raise e
def push(self, uri): ''' push a new uri to linker :param uri: :return: ''' if self.__linker is None: logger.error( "linker manager: there is no linker registered. invoke push failed." ) self.__linker.push(uri)
def filter(self, *cond): ''' add accept condition for linker :param cond: object, filter accept condition :return: ''' if self.__linker is None: logger.error( "linker manager: there is no linker registered. invoke filter failed." ) return self.__linker.filter(*cond)
def create(self, dbpath, table): ''' create table :return self ''' try: #initialize table parameters self.table = table self.name = table.name self.path = join_paths(dbpath, table.name) self.table_file = join_paths(self.path, "table") self.data_file = join_paths(self.path, "data") #create table directory if it is not exists make_dirs(self.path) #create or replace table file if is_file(self.table_file): #replace old table file if needed old_table = self.desc() if self.table != old_table: #replace table file self._replace_table_file() else: #new table is same as exists table pass else: #create new table file self._create_table_file() #create or upgrade or replace data file if is_file(self.data_file): #replace old data file if needed with open(self.data_file) as fdata: nfields = strips(fdata.readline().split(",")) if self.table.nfields() != nfields: if is_subset(nfields, self.table.nfields()): self._upgrade_data_file() else: self._replace_data_file() else: #create new data file self._create_data_file() logger.info("create table %s...success.", self.name) return self except Exception, e: logger.error("create table %s...failed. error: %s", self.name, str(e)) raise e
def config(self, pattern, config): ''' add configure for url pattern for crawling :param pattern: :param config: :return: ''' if self.__linker is None: logger.error( "linker manager: there is no linker registered. invoke config failed." ) return self.__linker.config(pattern, config)