def __main__(): logger = Logger() logger.set_debug_level('info') config= Config() u_base_path = config.base_path hddb = HoerdatDB(config.hoerdat_db_path, {'debug': 0}) dupmerge = DupMerge( config.md5_db_path, u_base_path, {'logger': logger}, ) hd = Hoerdat(hddb) for file, md5 in dupmerge.all_files_md5(): if not hd.set_filename(file, md5): continue if DONT_RESCAN_PRESENT and hd.data['hoerdat'].get_data('hoerdat_id'): continue print file, hd.data['hoerdat'].get_data('hoerdat_id') fetch_result = hd.fetch_data() best_match = hd.get_best_match(fetch_result) if best_match: hd.merge_data(best_match[0], best_match[1]) hd.update() print "Best rank:", best_match[1] print "\t", '"' + hd.data['hoerdat'].get_data('title') + '"'
def __main__(): config= Config() u_base_path = config.base_path logger = Logger(config.ftp_log_path) logger.set_debug_level('info') dupmerge = DupMerge( config.md5_db_path, u_base_path, {'logger': logger}, ) ftp = FTPFetch( host = config.ftp_hostname, port = config.ftp_port, credentials = config.ftp_credentials, params = { 'dstdir': config.ftp_base_path, 'db': config.ftp_db_path, 'ignore': config.ftp_ignore_list, 'possible_hidden_dirs': config.ftp_possible_hidden_dirs, 'dupmerge': dupmerge, 'reverse': 0, 'old_cleanup': 1, 'logger': logger, } ) ftp.link_all_duplicates()
def __main__(): logger = Logger() logger.set_debug_level('info') config= Config() u_base_path = config.base_path dupmerge = DupMerge( config.md5_db_path, u_base_path, {'logger': logger}, ) re_ignore = [ re.compile(_) for _ in [ '^' + u_base_path + '/dbs', '^' + u_base_path + '/lost\+found', '^' + config.ftp_log_path, '^' + u_base_path + '/scripts', '^' + u_base_path + '/temp', '^' + u_base_path + '/txt', '^' + u_base_path + '/semaphores', '\.tmp$', '\.tmp\.\d+$', '\.db$' ] ] for root, dirs, files in os.walk(u_base_path): full_path = lambda u_name: os.path.join(root, u_name) if len(dirs) == 0 and len(files) == 0: logger.warning("Removing empty dir '" + root + "'...") os.removedirs(root); continue; dirs.sort() files.sort() for u_dir in dirs: if [ _ for _ in re_ignore if _.search(full_path(u_dir))]: dirs.remove(u_dir) logger.info("Scanning '" + root + "'...") for u_file in files: if [ _ for _ in re_ignore if _.search(full_path(u_file))]: continue dupmerge.file_exists(full_path(u_file)) logger.info("Doing reverse scan...") for u_file in [ _ for _ in dupmerge.all_files() if not os.path.isfile(_)]: dupmerge.file_exists(u_file)
def __main__(): config= Config() u_base_path = config.base_path logger = Logger(config.ftp_log_path) logger.set_debug_level('info') dupmerge = DupMerge( config.md5_db_path, u_base_path, {'logger': logger}, ) ftp = FTPFetch( host = config.ftp_hostname, port = config.ftp_port, credentials = config.ftp_credentials, params = { 'dstdir': config.ftp_base_path, 'db': config.ftp_db_path, 'ignore': config.ftp_ignore_list, 'possible_hidden_dirs': config.ftp_possible_hidden_dirs, 'dupmerge': dupmerge, 'reverse': 1, 'old_cleanup': 0, 'logger': logger, } ) iterator = ftp.iterator([ # u'/download', u'/', ]) while iterator(): pass ftp.close()
class DBBase(object): """Base class for db handling dbname: name of database basedir: base dir for path parsing (will be removed from files before inserting in db and vice versa) params['logger']: logger object params['debug']: set True for db debug messages (if logger is unset) """ def __init__(self, dbname, basedir='./', params=None): self.dbname = os.path.realpath(dbname) self.basedir = os.path.realpath(basedir) if params == None: params = {} if 'logger' in params and params['logger']: self._logger = params['logger'] else: self._logger = Logger() if 'debug' in params and params['debug']: self._logger.set_debug_level('debug') # create dir for db file if it does not exists already u_db_dir_name = os.path.dirname(self.dbname) if not os.path.isdir(u_db_dir_name): os.makedirs(u_db_dir_name) self.dbh = sqlite3.connect(self.dbname, timeout = DB_TIMEOUT) # self.dbh.text_factory = str self.__db_lock = 0 self._init_db() def _init_db(self): """dummy function to initialize db""" pass def _db_update(self, s_sql, t_data = ()): """does db update and commits changes""" t_data= tuple([TO_UNICODE(_) for _ in t_data]) self._logger.debug( "Executing '" + s_sql + "' with data '" + "', '".join(t_data) + "'" ) try: result = self.dbh.execute(s_sql, t_data) if not self.__db_lock: self.dbh.commit() except sqlite3.OperationalError: self.dbh.rollback() self._logger.error( "Error executing '" + s_sql + "' with data '" + "', '".join(t_data) + "'" ) raise return result def _db_select(self, s_sql, t_data = ()): """does db update and commits changes""" t_data= tuple([TO_UNICODE(_) for _ in t_data]) self._logger.debug( "Executing '" + s_sql + "' with data '" + "', '".join(t_data) + "'" ) result = self.dbh.execute(s_sql, t_data) return result def db_begin_transaction(self): """begins db transaction""" self.__db_lock += 1 def db_commit(self): """commits db transaction""" if self.__db_lock: self.__db_lock -= 1 if not self.__db_lock: self.dbh.commit() def db_rollback(self): """rolls back db transaction""" if self.__db_lock: self.__db_lock -= 1 self.dbh.rollback() def parse_file_param(self, t_file): """returns file parameter as tuple. Strips basdir if needed""" if type(t_file).__name__ != 'tuple': t_file = os.path.split(t_file) if t_file[0].find(self.basedir) == 0: t_file = (t_file[0][len(self.basedir):], t_file[1]) return (os.path.normpath(t_file[0]) + '/', t_file[1]) def local_file_name(self, p_file): """returns real file name on local devices""" t_file = self.parse_file_param(p_file) return os.path.realpath( self.basedir + os.path.join('/' + t_file[0], t_file[1]) )