Пример #1
0
 def __init__(self, dbpath, dir_list=None, file_regex=None,
              preview_path=None, watch_directory=None, init_sql_path=None,
              video_cmd=None, zip_cmd=None):
     self.dir_list = dir_list
     self.file_regex = file_regex
     self.preview_path = preview_path
     self.preview_creation_threads = 4
     self.watch_directory = watch_directory
     self.video_cmd = video_cmd
     self.zip_cmd = zip_cmd
     self.q = queue.Queue()
     self.work_cv = threading.Condition()
     self.sql_runner = SQLRunner(dbpath, self.work_cv, self.q)
     self.sql_runner.daemon = True
     self.sql_runner.start()
     self.preview_creator = PreviewCreator()
     self.update_running = False
Пример #2
0
class DBtool:
    '''
    This class contains the core functions for interacting with the database
    and processing the files from directories specified during initialization.
    '''

    def __init__(self, dbpath, dir_list=None, file_regex=None,
                 preview_path=None, watch_directory=None, init_sql_path=None,
                 video_cmd=None, zip_cmd=None):
        self.dir_list = dir_list
        self.file_regex = file_regex
        self.preview_path = preview_path
        self.preview_creation_threads = 4
        self.watch_directory = watch_directory
        self.video_cmd = video_cmd
        self.zip_cmd = zip_cmd
        self.q = queue.Queue()
        self.work_cv = threading.Condition()
        self.sql_runner = SQLRunner(dbpath, self.work_cv, self.q)
        self.sql_runner.daemon = True
        self.sql_runner.start()
        self.preview_creator = PreviewCreator()
        self.update_running = False

    # There probably is a somewhat rare bug happening here, which causes the
    # execution of queries lock down.
    def run_sql(self, sql, params=None, commit=True):
        if not self.sql_runner.is_alive():
            return
        cond = threading.Condition()
        res = list()
        job = [sql, params, res, cond, commit]
        with cond:
            with self.work_cv:
                self.q.put(job)
                self.work_cv.notify()
            cond.wait()
        return job[2]

    # TODO: proper error reporting
    def add_to_playlist(self, file_id_list, pl_name):
        '''Add files to a playlist. Upon completion returns a list of id's that
        were inserted.

        Arguments:
        file_id_list -- list of id's of files to be added to the playlist
        pl_name -- name of the playlist'''
        self.run_sql('INSERT OR IGNORE Into Playlist Values(NULL, ?)', (pl_name,), True)
        inserted = list()
        for i, e in enumerate(file_id_list):
            commit = False
            if i == len(file_id_list)-1:
                commit = True
            query = ('''INSERT INTO FilePlaylist (file_id, playlist_id)
                    SELECT ?, Playlist.id FROM Playlist WHERE Playlist.name=?''')
            res = self.run_sql(query, (e, pl_name), commit)
            if res != None:
                inserted.append(e)
        return inserted

    def change_paths(self, id_path_pairs):
        '''Change paths of files in the database. id_path_pairs should be a list of
        dicts of format {'id':file_id, 'old_path':old_path, 'new_path':new_path}'''
        res = list()
        for p in id_path_pairs:
            file_id = int(p['id'])
            destpath = p['new_path']
            u_res = self.run_sql('UPDATE File SET Name=? WHERE id=?',
                                 (destpath, file_id))
            if u_res:
                res.append({'id':p['id'], 'path':'new_path'})
            else:
                logging.error('Error updating file name withd id %d to %s',
                               file_id, destpath)
        return res


    def create_preview(self, file_id, path=None, outpath=None, force=False):
        if not path:
            entry = self.run_sql('''SELECT File.name,Preview.path FROM File
                                 LEFT JOIN Preview ON File.id=Preview.file_id
                                 WHERE File.id=?''', (file_id,))
            path = entry[0][0]
            if entry[0][1] and not force:
                logging.info('Preview already exists for file with id %s. Skipping overwrite', file_id)
                return entry[0][1]

        if path.find('.zip') != -1:
            return
        fname = os.path.basename(path)
        m = hashlib.md5()
        m.update(bytes(fname, 'utf-8'))
        outpath = os.path.join(self.preview_path, '{0}.{1}'.format(m.hexdigest(), 'mp4'))
        res = self.preview_creator.create_preview(path, outpath)
        print("PREVIEW CREATED", res)
        if res:
            self.run_sql('INSERT OR IGNORE INTO Preview VALUES(?, ?)', (file_id, outpath), False)
            self.run_sql('UPDATE Preview SET Path=? WHERE file_id=?', (outpath, file_id), True)
        return res

    def delete_files(self, files_ids):
        '''Permanently delete files from hard disk and all data associated to
        them from the database

        Arguments:
        files_ids -- list of dicts of format {'id':file_id, 'name':file_name}
        '''
        res = list()
        for i in files_ids:
            p = (i,)
            path = None
            try:
                path = self.run_sql('SELECT name FROM File WHERE id=?', (i,), False)[0][0]
            except:
                pass
            self.run_sql('DELETE FROM File WHERE id=?', p, False)
            self.run_sql('DELETE FROM Viewcount WHERE file_id=?', p, False)
            self.run_sql('DELETE FROM Preview WHERE file_id=?', p, False)
            self.run_sql('DELETE FROM FilePlaylist WHERE file_id=?', p, True)
            try:
                os.remove(path)
            except:
                print('Error when attempting to remove file "{}"'.format(path))
            res.append(i)
        return res

    def __get_join_type(self, join_str):
        allowed_types = ['LEFT JOIN', 'JOIN', 'RIGHT JOIN', 'FULL JOIN']
        if join_str in allowed_types:
            return join_str
        return 'JOIN'

    def search_files(self, file_terms=None, file_terms_not=None,
            view_count_min=None, view_count_max=None, size_min=None,
            size_max=None, added_min=None, added_max=None, playlist=None,
            playlist_not=None):
        query_params = list()
        query_str = ""
        clause_list = list()
        if file_terms:
            clause_list.extend(['File.name LIKE ?' for f in file_terms])
            query_params.extend(['%{}%'.format(f) for f in file_terms])
        if file_terms_not:
            clause_list.extend(['File.name NOT LIKE ?' for f in file_terms_not])
            query_params.extend(['%{}%'.format(f) for f in file_terms_not])
        if view_count_min:
            clause_list.append('Viewcount.views > ?')
            query_params.append(view_count_min)
        if view_count_max:
            clause_list.append('Viewcount.views < ?')
            query_params.append(view_count_max)
        if size_min:
            clause_list.append('File.fsize > ?')
            query_params.append(size_min)
        if size_max:
            clause_list.append('File.fsize < ?')
            query_params.append(size_max)
        if added_min:
            clause_list.append('File.added > ?')
            query_params.append(added_min)
        if added_max:
            clause_list.append('File.added < ?')
            query_params.append(added_max)
        if not any([playlist, playlist_not]):
            query_str = ('''SELECT File.id,File.name,File.fsize,Preview.path,Viewcount.views
                    FROM File
                    LEFT JOIN Preview ON File.id=Preview.file_id
                    LEFT JOIN Viewcount ON File.id=Viewcount.file_id
                    WHERE {}
                    '''.format(' AND '.join(clause_list)))
        else:
            playlist_q = list()
            playlist_params = list()
            if playlist:
                playlist_q.append('''File.id IN (SELECT FilePlaylist.file_id
                    FROM Playlist,FilePlaylist
                    WHERE Playlist.id=FilePlaylist.playlist_id AND
                    Playlist.name=?)''')
                playlist_params.append(playlist)
            if playlist_not:
                playlist_q.append('''File.id NOT IN (SELECT FilePlaylist.file_id
                    FROM Playlist,FilePlaylist
                    WHERE Playlist.id=FilePlaylist.playlist_id AND
                    Playlist.name=?)''')
                playlist_params.append(playlist_not)
            query_params = playlist_params + query_params
            query_str = ('''SELECT File.id,File.name,File.fsize,Preview.path,Viewcount.views
                    FROM File
                    LEFT JOIN Preview ON File.id=Preview.file_id
                    LEFT JOIN Viewcount ON File.id=Viewcount.file_id
                    WHERE {}'''.format(' AND '.join(playlist_q)))
            print("DEBUG QUERY LEN, PLAYLIST_Q_LEN", len(query_params), len(playlist_q))
            if len(query_params)-len(playlist_q) > 0:
                query_str += ' AND {}'.format(' AND '.join(clause_list))
        res = self.run_sql(query_str, query_params)
        return res

    def insert_files(self, flist):
        '''
        Insert files to the database. Files should be passed as a list of
        3-tuples of format (path, size_bytes, date_added)'''
        commit = False
        for i, e in enumerate(flist):
            if i == len(flist)-1:
                commit = True
            res_q = self.run_sql('INSERT INTO File Values(NULL, ?, ?, datetime(?))', (e[0], e[1], e[2],), commit)
            if res_q != None:
                vc = self.run_sql('''INSERT INTO Viewcount (file_id, views)
                                  SELECT id, 0 FROM File
                                  WHERE File.name=?''', (e[0],), commit)

    def open_file(self, fileId):
        res = self.run_sql('SELECT Name FROM File where id=?', (fileId,))
        for row in res:
            # TODO: Fix hard-coding and open the file based on mime-type
            logging.debug('Opening file %s', row[0])
            if row[0].lower().find('.zip') != -1:
                subprocess.Popen([self.zip_cmd, row[0]], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            else:
                subprocess.Popen([self.video_cmd, row[0]], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            break

        self.run_sql('UPDATE Viewcount SET views=views+1 WHERE file_id=?',(fileId,))
        self.run_sql('INSERT OR IGNORE INTO Viewcount Values(?, ?)', (fileId, 1))

    # TODO: perform this using batch insertion
    def update_database(self):
        '''Add new files to the database'''
        if self.update_running:
            return None
        self.update_running = True
        res = dict()
        res['inserted_bytes'] = 0
        res['inserted_files'] = 0
        for d in self.dir_list:
            print('Updating from "{0}"'.format(d))
            dw = DirWalker(d, self.file_regex)
            dw.walk()

            for i, e in enumerate(dw.get_path_list()):
                commit = False
                if i == len(dw.get_path_list())-1:
                    commit = True
                res_q = self.run_sql('INSERT INTO File Values(NULL, ?, ?, datetime(?))', (e[0], e[1], e[2]), commit)
                if res_q != None:
                    self.run_sql('''INSERT OR IGNORE INTO Viewcount (file_id, views)
                            SELECT id, 0 FROM File WHERE File.name=?''', (e[0],),
                            False)
                    res['inserted_bytes'] += e[1]
                    res['inserted_files'] += 1
        self.update_running = False
        return res

    def clean_preview_entries(self):
        '''Remove paths to nonexisting preview files'''
        res = [x for x in self.conn.execute('SELECT * From Preview')]
        for e in res:
            if not os.path.exists(e[1]):
                self.conn.execute('DELETE FROM Preview WHERE fileId=?', (e[0], ))
                logging.debug('Clearing preview %d %s', e[0], e[1])
        self.conn.commit()

    def get_disk_files(self):
        '''Recursively get all files from directories specified in the list
        self.dir_list returns a dictionary in the format:
        {file_name:file_size_bytes}'''
        path_entries = dict()
        for d in self.dir_list:
            print('Scanning files from directory "{}"'.format(d))
            dw = DirWalker(d, self.file_regex)
            dw.walk()
            path_entries.update({ x[0]:x[1] for x in dw.get_path_list()})
        return path_entries

    def get_disk_files2(self):
        '''Recursively get all files from directories specified in the list
        self.dir_list. Returns a list of tuples in the format:
        [(path1,size1), (path2,size2), ... ]'''
        print(self.dir_list)
        for d in self.dir_list:
            dw = DirWalker(d, self.file_regex)
            dw.walk()
        return dw.get_path_list()

    def get_duplicates(self):
        '''Return list of database entries of files that are similar in size but
        differ in path name'''
        get_all_q = 'SELECT id, name, fsize FROM File ORDER BY fsize'
        db_entries = [x for x in self.conn.execute(get_all_q)]
        record = False
        res = dict()
        key = None
        for i in range(0, len(db_entries)-2):
            e1 = db_entries[i]
            e2 = db_entries[i+1]
            diff = abs(e1[2]-e2[2])
            if diff < 100 and e1[1] != e2[1]:
                record = True
                if not key:
                    key = e1
            else:
                record = False
                key = None

            if record:
                if key not in res:
                    res[key] = list()
                res[key].append(e2)
        return res

    def get_stats(self):
        stat_q = 'SELECT fileId, Name, SUM(fsize), COUNT(fileId) FROM File'
        res = [x for x in self.conn.execute(stat_q)]
        return {'file_sizes': res[0][2], 'file_count': res[0][3]}


    def find_missing_entries(self):
        '''Return missing entries from database and find files that are similar
        in size from hard disk..

        Upon completion returns a dict in the format:
        {db_entry: [(path1, size1), (path2, size2) ...]}'''
        logging.info('Scanning directories...')
        path_entries = self.get_disk_files()
        db_entries = self.run_sql('SELECT id, fsize, name FROM File')

        missing_entries = list()
        for d in db_entries:
            if not d[2] in path_entries:
                missing_entries.append(d)
            else:
                del path_entries[d[2]]

        def size_check(*args):
            return args[1] == args[0]

        path_entries = [(x, path_entries[x]) for x in path_entries]
        res = get_dupes_bsearch(missing_entries, path_entries,
                                is_dupe=size_check, cmpr_func=lambda x: x[1],
                                get_attr=lambda x: x[1])
        return res

    def repair(self):
        if self.update_running:
            return {}
        self.update_running = True
        missing = self.find_missing_entries()
        self.update_running = False
        return missing

    def mark_as_viewed(self, file_id_list):
        '''
        Set the view counts of files, with id's given in file_id_list to 1.
        If a file already has a view count greater than 1, then that view count
        is let unchanged.

        Returns the list of id's that were marked.

        NOTICE: currently the files are correctly marked, but UPDATE statement
        is considered incorrectly to fail, because it doesn't return anything
        upon succesful execution.
        '''
        if len(file_id_list) == 0:
            return list()
        ids = tuple([int(i) for i in file_id_list])
        query = 'SELECT file_id FROM Viewcount WHERE file_id IN ({}) AND views=0'
        query = query.format(','.join(['?' for i in file_id_list]))
        files = [x[0] for x in self.run_sql(query, ids)]
        update = 'UPDATE Viewcount SET views=1 WHERE file_id IN ({})'
        update = update.format(','.join(['?' for i in files]))
        res = self.run_sql(update, tuple(files))
        return files

    def get_playlist(self, name):
        '''Get all files from a playlist'''
        query = ('''SELECT File.id, File.name, File.fsize, Preview.path, Viewcount.views
                From File
                LEFT JOIN Preview ON File.id=Preview.file_id
                LEFT JOIN Viewcount ON File.id=Viewcount.file_id
                WHERE File.id in (SELECT FilePlaylist.file_id FROM FilePlaylist,Playlist
                WHERE Playlist.id=FilePlaylist.playlist_id AND Playlist.name=?)
                ''')
        res = self.run_sql(query, (name,))
        return res