Exemplo n.º 1
0
    def list_raw(self, sample_names=None, city_name=None, project_name=None, grouped=False):
        """List raw read files, from a given city if specified."""
        samples = set()
        if city_name or project_name:
            samples |= set(get_samples_from_city(city_name, project_name=project_name))
        if sample_names:
            samples |= set(sample_names)
        raw_reads = {
            key.key
            for key in self.bucket.objects.filter(Prefix='data')
            if key.key[-9:] == '.fastq.gz'
        }
        raw_read_files = {}
        for raw_read in raw_reads:
            sname = basename(raw_read).split('_1.fastq.gz')[0].split('_2.fastq.gz')[0]
            if samples and sname not in samples:
                continue
            raw_read_files[sname] = sorted([raw_read] + raw_read_files.get(sname, []))

        raw_list = []
        for read_files in raw_read_files.values():
            if not grouped:
                raw_list += read_files
            else:
                raw_list.append(read_files)
        return raw_list
Exemplo n.º 2
0
 def list_contigs(self,
                  sample_names=None, city_name=None, project_name=None,
                  contig_file='scaffolds.fasta'):
     """List all the contigs."""
     samples = set()
     if city_name or project_name:
         samples |= set(get_samples_from_city(city_name, project_name=project_name))
     if sample_names:
         samples |= set(sample_names)
     contigs = [
         key.key
         for key in self.bucket.objects.all()
         if 'assemblies' in key.key and contig_file == basename(key.key)
     ]
     if samples:
         contigs = [el for el in contigs if basename(dirname(el)).split('.')[0] in samples]
     return contigs
Exemplo n.º 3
0
def nonhuman_reads(sample_names=None, city_name=None, project_name=None):
    samples = set()
    if city_name or project_name:
        samples |= set(get_samples_from_city(city_name, project_name=project_name))
    if sample_names:
        samples |= set(sample_names)
    nonhuman_reads = [
        filename for filename in list_public_files()
        if 'nonhuman_read' in filename and '/human_filtered_data/' in filename
    ]
    nonhuman_read_files = {}
    for nonhuman_read in nonhuman_reads:
        sname = basename(nonhuman_read).split('.')[0]
        if samples and sname not in samples:
            continue
        nonhuman_read_files[sname] = sorted([nonhuman_read] + nonhuman_read_files.get(sname, []))

    return nonhuman_read_files
Exemplo n.º 4
0
 def test_get_samples_from_city_project(self):
     """Test that we can filter sample names by city and project."""
     sample_names = get_samples_from_city('swansea', project_name='tigress')
     self.assertTrue(len(sample_names) == 6)
Exemplo n.º 5
0
 def test_get_samples_from_project(self):
     """Test that we can filter sample names by project."""
     sample_names = get_samples_from_city(None, project_name='tigress')
     self.assertTrue(len(sample_names) == 83)
Exemplo n.º 6
0
 def test_get_samples_from_city(self):
     """Test that we get some sample names from a city (assume correct)."""
     sample_names = get_samples_from_city('paris')
     self.assertTrue(sample_names)