def _common_purge_filpeaths_test(self): # Get all the filepaths so we can test if they've been removed or not sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath" fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)] # Make sure that the files exist - specially for travis for fp in fps: if not exists(fp): with open(fp, "w") as f: f.write("\n") self.files_to_remove.append(fp) _, raw_data_mp = get_mountpoint("raw_data")[0] removed_fps = [join(raw_data_mp, "2_sequences_barcodes.fastq.gz"), join(raw_data_mp, "2_sequences.fastq.gz")] for fp in removed_fps: with open(fp, "w") as f: f.write("\n") sql = """INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES ('2_sequences_barcodes.fastq.gz', 3, '852952723', 1, 5), ('2_sequences.fastq.gz', 1, '852952723', 1, 5) RETURNING filepath_id""" fp_ids = self.conn_handler.execute_fetchall(sql) fps = set(fps).difference(removed_fps) # Check that the files exist for fp in fps: self.assertTrue(exists(fp)) for fp in removed_fps: self.assertTrue(exists(fp)) exp_count = get_count("qiita.filepath") - 2 purge_filepaths() obs_count = get_count("qiita.filepath") # Check that only 2 rows have been removed self.assertEqual(obs_count, exp_count) # Check that the 2 rows that have been removed are the correct ones sql = """SELECT EXISTS( SELECT * FROM qiita.filepath WHERE filepath_id = %s)""" obs = self.conn_handler.execute_fetchone(sql, (fp_ids[0][0],))[0] self.assertFalse(obs) obs = self.conn_handler.execute_fetchone(sql, (fp_ids[1][0],))[0] self.assertFalse(obs) # Check that the files have been successfully removed for fp in removed_fps: self.assertFalse(exists(fp)) # Check that all the other files still exist for fp in fps: self.assertTrue(exists(fp))
def _common_purge_filpeaths_test(self): # Get all the filepaths so we can test if they've been removed or not sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath" fps = [ join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp) ] # Make sure that the files exist - specially for travis for fp in fps: if not exists(fp): with open(fp, 'w') as f: f.write('\n') self.files_to_remove.append(fp) _, raw_data_mp = get_mountpoint('raw_data')[0] removed_fps = [ join(raw_data_mp, '2_sequences_barcodes.fastq.gz'), join(raw_data_mp, '2_sequences.fastq.gz') ] fps = set(fps).difference(removed_fps) # Check that the files exist for fp in fps: self.assertTrue(exists(fp)) for fp in removed_fps: self.assertTrue(exists(fp)) exp_count = get_count("qiita.filepath") - 2 purge_filepaths(self.conn_handler) obs_count = get_count("qiita.filepath") # Check that only 2 rows have been removed self.assertEqual(obs_count, exp_count) # Check that the 2 rows that have been removed are the correct ones sql = """SELECT EXISTS( SELECT * FROM qiita.filepath WHERE filepath_id = %s)""" obs = self.conn_handler.execute_fetchone(sql, (3, ))[0] self.assertFalse(obs) obs = self.conn_handler.execute_fetchone(sql, (4, ))[0] self.assertFalse(obs) # Check that the files have been successfully removed for fp in removed_fps: self.assertFalse(exists(fp)) # Check that all the other files still exist for fp in fps: self.assertTrue(exists(fp))
def test_get_mountpoint_path_by_id(self): exp = join(get_db_files_base_dir(), "raw_data", "") obs = get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), "analysis", "") obs = get_mountpoint_path_by_id(1) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), "job", "") obs = get_mountpoint_path_by_id(2) self.assertEqual(obs, exp) # inserting new ones so we can test that it retrieves these and # doesn't alter other ones self.conn_handler.execute("UPDATE qiita.data_directory SET active=false WHERE " "data_directory_id=1") self.conn_handler.execute( "INSERT INTO qiita.data_directory (data_type, mountpoint, " "subdirectory, active) VALUES ('analysis', 'analysis', 'tmp', " "true), ('raw_data', 'raw_data', 'tmp', false)" ) # this should have been updated exp = join(get_db_files_base_dir(), "analysis", "tmp") obs = get_mountpoint_path_by_id(10) self.assertEqual(obs, exp) # these 2 shouldn't exp = join(get_db_files_base_dir(), "raw_data", "") obs = get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), "job", "") obs = get_mountpoint_path_by_id(2) self.assertEqual(obs, exp)
def _common_purge_filpeaths_test(self): # Get all the filepaths so we can test if they've been removed or not sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath" fps = [join(get_mountpoint_path_by_id(dd_id), fp) for fp, dd_id in self.conn_handler.execute_fetchall(sql_fp)] # Make sure that the files exist - specially for travis for fp in fps: if not exists(fp): with open(fp, 'w') as f: f.write('\n') self.files_to_remove.append(fp) _, raw_data_mp = get_mountpoint('raw_data')[0] removed_fps = [ join(raw_data_mp, '2_sequences_barcodes.fastq.gz'), join(raw_data_mp, '2_sequences.fastq.gz')] fps = set(fps).difference(removed_fps) # Check that the files exist for fp in fps: self.assertTrue(exists(fp)) for fp in removed_fps: self.assertTrue(exists(fp)) exp_count = get_count("qiita.filepath") - 2 purge_filepaths(self.conn_handler) obs_count = get_count("qiita.filepath") # Check that only 2 rows have been removed self.assertEqual(obs_count, exp_count) # Check that the 2 rows that have been removed are the correct ones sql = """SELECT EXISTS( SELECT * FROM qiita.filepath WHERE filepath_id = %s)""" obs = self.conn_handler.execute_fetchone(sql, (3,))[0] self.assertFalse(obs) obs = self.conn_handler.execute_fetchone(sql, (4,))[0] self.assertFalse(obs) # Check that the files have been successfully removed for fp in removed_fps: self.assertFalse(exists(fp)) # Check that all the other files still exist for fp in fps: self.assertTrue(exists(fp))
def test_get_mountpoint_path_by_id(self): exp = join(get_db_files_base_dir(), 'raw_data', '') obs = get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), 'analysis', '') obs = get_mountpoint_path_by_id(1) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), 'job', '') obs = get_mountpoint_path_by_id(2) self.assertEqual(obs, exp) # inserting new ones so we can test that it retrieves these and # doesn't alter other ones self.conn_handler.execute( "UPDATE qiita.data_directory SET active=false WHERE " "data_directory_id=1") self.conn_handler.execute( "INSERT INTO qiita.data_directory (data_type, mountpoint, " "subdirectory, active) VALUES ('analysis', 'analysis', 'tmp', " "true), ('raw_data', 'raw_data', 'tmp', false)") # this should have been updated exp = join(get_db_files_base_dir(), 'analysis', 'tmp') obs = get_mountpoint_path_by_id(10) self.assertEqual(obs, exp) # these 2 shouldn't exp = join(get_db_files_base_dir(), 'raw_data', '') obs = get_mountpoint_path_by_id(5) self.assertEqual(obs, exp) exp = join(get_db_files_base_dir(), 'job', '') obs = get_mountpoint_path_by_id(2) self.assertEqual(obs, exp)
sql = """SELECT filepath, data_directory_id FROM qiita.analysis_job JOIN qiita.job USING (job_id) JOIN qiita.job_results_filepath USING (job_id) JOIN qiita.filepath USING (filepath_id) WHERE analysis_id = %s""" TRN.add(sql, [analysis_id]) fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()]) # no filepaths in the analysis if not fps: continue tgz = join(analysis_mp, '%d_files.tgz' % analysis_id) if not exists(tgz): full_fps = [join(get_mountpoint_path_by_id(mid), f) for f, mid in fps] with taropen(tgz, "w:gz") as tar: for f in full_fps: tar.add(f, arcname=basename(f)) # Add the new tgz file to the analysis. fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis', move_files=False) sql = """INSERT INTO qiita.analysis_filepath (analysis_id, filepath_id) VALUES (%s, %s)""" sql_args = [[analysis_id, fp_id] for fp_id in fp_ids] TRN.add(sql, sql_args, many=True) TRN.execute()
FROM qiita.analysis_job JOIN qiita.job USING (job_id) JOIN qiita.job_results_filepath USING (job_id) JOIN qiita.filepath USING (filepath_id) WHERE analysis_id = %s""" TRN.add(sql, [analysis_id]) fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()]) # no filepaths in the analysis if not fps: continue tgz = join(analysis_mp, '%d_files.tgz' % analysis_id) if not exists(tgz): full_fps = [ join(get_mountpoint_path_by_id(mid), f) for f, mid in fps ] with taropen(tgz, "w:gz") as tar: for f in full_fps: tar.add(f, arcname=basename(f)) # Add the new tgz file to the analysis. fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis', move_files=False) sql = """INSERT INTO qiita.analysis_filepath (analysis_id, filepath_id) VALUES (%s, %s)""" sql_args = [[analysis_id, fp_id] for fp_id in fp_ids] TRN.add(sql, sql_args, many=True)
sql = """SELECT filepath, data_directory_id FROM qiita.analysis_job JOIN qiita.job USING (job_id) JOIN qiita.job_results_filepath USING (job_id) JOIN qiita.filepath USING (filepath_id) WHERE analysis_id = %s""" TRN.add(sql, [analysis_id]) fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()]) # no filepaths in the analysis if not fps: continue tgz = join(analysis_mp, '%d_files.tgz' % analysis_id) if not exists(tgz): full_fps = [join(get_mountpoint_path_by_id(mid), f) for f, mid in fps] with taropen(tgz, "w:gz") as tar: for f in full_fps: tar.add(f, arcname=basename(f)) # Add the new tgz file to the analysis. fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis', "filepath", move_files=False) sql = """INSERT INTO qiita.analysis_filepath (analysis_id, filepath_id) VALUES (%s, %s)""" sql_args = [[analysis_id, fp_id] for fp_id in fp_ids] TRN.add(sql, sql_args, many=True) TRN.execute()