Exemplo n.º 1
0
    def open(self, path):
        '''
            open storage or create it if not exist
        :return:
        '''
        try:
            with clock(self.lock):
                #init storage path
                self.path = path

                if not path_exists(self.path):
                    #create database
                    self._create()
                else:
                    # load database
                    self._load()

                self._rebuild_tindex()

                return self
            logger.info("open storage %s...success. %d tables.", self.path,
                        len(self.tables))
        except Exception, e:
            logger.error("open storage %s...failed. error: %s", self.path,
                         str(e))
            raise e
Exemplo n.º 2
0
    def open(self, host, user, pwd, dbn, port=3306):
        '''
            open database or create it if not exist
        :return:
        '''
        #init storage path
        try:
            self.host, self.port, self.user, self.pwd = host, port, user, pwd
            self.dbn = dbn
            self.dbc = MySQLdb.connect(host=host,
                                       user=user,
                                       passwd=pwd,
                                       port=port)

            if not self._exists():
                #create database
                self._create()
                self._use()
            else:
                # load database
                self._use()
                self._load()

            self._rebuild_tindex()

            return self
            logger.info(
                "open storage mysql://%s:%s@%s:%d/%s...success. %d tables.",
                user, pwd, host, port, self.dbn, len(self.tables))
        except Exception, e:
            logger.error(
                "open storage mysql://%s:%s@%s:%d/%s...failed. error: %s",
                user, pwd, host, port, self.dbn, str(e))
            raise e
Exemplo n.º 3
0
    def update(self, uri, extras):
        if self.__linker is None:
            logger.error(
                "linker manager: there is no linker registered. invoke update failed."
            )
            return None

        return self.__linker.update(uri, extras)
Exemplo n.º 4
0
    def pull(self):
        if self.__linker is None:
            logger.error(
                "linker manager: there is no linker registered. invoke pull failed."
            )
            return None

        return self.__linker.pull()
Exemplo n.º 5
0
 def drop(self):
     '''
         drop table
     :return:
     '''
     try:
         remove_dir(self.path)
     except Exception, e:
         logger.error("drop table %s...failed. error %s", self.name, str(e))
         raise e
Exemplo n.º 6
0
 def truncate(self):
     '''
         truncate table
     :return:
     '''
     try:
         with clock(self.lock):
             remove_files(self.data_file)
             self._create_data_file()
     except Exception, e:
         logger.error("truncate table %s...failed. error %s", self.name, str(e))
         raise e
Exemplo n.º 7
0
    def push(self, uri):
        '''
            push a new uri to linker
        :param uri:
        :return:
        '''
        if self.__linker is None:
            logger.error(
                "linker manager: there is no linker registered. invoke push failed."
            )

        self.__linker.push(uri)
Exemplo n.º 8
0
    def filter(self, *cond):
        '''
            add accept condition for linker
        :param cond: object, filter accept condition
        :return:
        '''
        if self.__linker is None:
            logger.error(
                "linker manager: there is no linker registered. invoke filter failed."
            )
            return

        self.__linker.filter(*cond)
Exemplo n.º 9
0
    def create(self, dbpath, table):
        '''
            create table
        :return self
        '''
        try:
            #initialize table parameters
            self.table = table
            self.name = table.name

            self.path = join_paths(dbpath, table.name)
            self.table_file = join_paths(self.path, "table")
            self.data_file = join_paths(self.path, "data")

            #create table directory if it is not exists
            make_dirs(self.path)

            #create or replace table file
            if is_file(self.table_file):

                #replace old table file if needed
                old_table = self.desc()
                if self.table != old_table:
                    #replace table file
                    self._replace_table_file()
                else:
                    #new table is same as exists table
                    pass
            else:
                #create new table file
                self._create_table_file()

            #create or upgrade or replace data file
            if is_file(self.data_file):
                #replace old data file if needed
                with open(self.data_file) as fdata:
                    nfields = strips(fdata.readline().split(","))
                    if self.table.nfields() != nfields:
                        if is_subset(nfields, self.table.nfields()):
                            self._upgrade_data_file()
                        else:
                            self._replace_data_file()
            else:
                #create new data file
                self._create_data_file()

            logger.info("create table %s...success.", self.name)
            return self
        except Exception, e:
            logger.error("create table %s...failed. error: %s", self.name, str(e))
            raise e
Exemplo n.º 10
0
    def config(self, pattern, config):
        '''
            add configure for url pattern for crawling
        :param pattern:
        :param config:
        :return:
        '''
        if self.__linker is None:
            logger.error(
                "linker manager: there is no linker registered. invoke config failed."
            )
            return

        self.__linker.config(pattern, config)