Пример #1
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        for bird in Species.objects.filter(is_visible=True):
            self.stdout.write('Processing bird {}'.format(bird))

            cornell = bird.get_resolved_cornell_all_about_birds_url()
            if cornell and not bird.has_cornell_all_about_birds_url:
                bird.has_cornell_all_about_birds_url = True
                bird.save()
                self.stdout.write('\tAdded Cornell url for {}'.format(bird))

            elif not cornell and bird.has_cornell_all_about_birds_url:
                self.stderr.write('\tWARNING: Cornell lookup failed for {}'
                                  .format(bird))

            wiki = bird.get_resolved_wikipedia_url()
            if wiki and not bird.has_wikipedia_url:
                bird.has_wikipedia_url = True
                bird.save()
                self.stdout.write('\tAdded Wikipedia url for {}'.format(bird))

            elif not wiki and bird.has_wikipedia_url:
                self.stderr.write('\tWARNING: Wikipedia lookup failed for {}'
                                  .format(bird))

            mn = bird.get_resolved_mn_bird_atlas_url()
            if mn and not bird.has_mn_bird_atlas_url:
                bird.has_mn_bird_atlas_url = True
                bird.save()
                self.stdout.write('\tAdded MN Bird Atlas url for {}'.format(bird))

            elif not mn and bird.has_mn_bird_atlas_url:
                self.stderr.write('\tWARNING: MN Bird Atlas lookup failed for {}'
                                  .format(bird))
Пример #2
0
    def handle(self, **options):
        f = options['file']

        require_db_write_acknowledgement()

        descriptions = _parse_wormbase_file(f)

        genes = Gene.objects.all()

        num_mismatches = 0
        for gene in genes:
            if gene.id not in descriptions:
                raise CommandError(
                    '{} not found in WormBase file'.format(gene))
            info = descriptions[gene.id]

            # Sanity check: Does WormBase molecular_name match the
            # cosmid_id in Firoz's database?
            wb_molecular = info['molecular_name']
            firoz_cosmid = gene.cosmid_id
            if (not wb_molecular.startswith(firoz_cosmid)):
                num_mismatches += 1
                self.stderr.write('Molecular/cosmid mismatch for {}: '
                                  'WormBase says {}, Firoz says {}'.format(
                                      gene, wb_molecular, firoz_cosmid))

            # Sanity check: Does WormBase public_name match the
            # locus in Firoz's database?
            wb_public = info['public_name']
            firoz_locus = gene.locus
            if (wb_public != firoz_locus
                    and not (firoz_locus == '' and wb_public == firoz_cosmid)):
                num_mismatches += 1
                self.stderr.write('Public/locus mismatch for {}: '
                                  'WormBase says {}, Firoz says {}'.format(
                                      gene, wb_public, firoz_locus))

            gene.functional_description = info['concise_description']
            gene.gene_class_description = info['gene_class_description']
            del descriptions[gene.id]
            gene.save()

        if num_mismatches:
            self.stderr.write(
                'Total number mismatches: {}'.format(num_mismatches))

        for description in descriptions.keys():
            gene, created = Gene.objects.get_or_create(
                id=description,
                cosmid_id=descriptions[description]['molecular_name'],
                locus=descriptions[description]['public_name'],
                gene_type="",
                gene_class_description=descriptions[description]
                ['gene_class_description'],
                functional_description=descriptions[description]
                ['concise_description'])

            gene.save()
        _genes_to_json()
Пример #3
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        f = options['file']

        reader = csv.DictReader(f, delimiter='\t')

        for row in reader:
            sequencing_id = row['sequencing_id']
            clone_hit = row['clone_hit']
            e_value = row['e_value']
            bit_score = row['bit_score']
            hit_rank = row['hit_rank']

            try:
                sequencing = LibrarySequencing.objects.get(id=sequencing_id)
            except ObjectDoesNotExist:
                raise CommandError('ID {} not found in LibrarySequencing'
                                   .format(sequencing_id))

            try:
                clone = Clone.objects.get(pk=clone_hit)
            except ObjectDoesNotExist:
                raise CommandError('clone_hit {} not present in database'
                                   .format(clone_hit))

            try:
                e_value = float(e_value)
            except ValueError:
                raise CommandError('e_value {} not convertible to float'
                                   .format(e_value))

            try:
                bit_score = int(float(bit_score))
            except ValueError:
                raise CommandError('bit_score {} not convertible to int'
                                   .format(bit_score))

            try:
                hit_rank = int(hit_rank)
            except ValueError:
                raise CommandError('hit_rank {} not convertible to int'
                                   .format(hit_rank))

            result = LibrarySequencingBlatResult(
                sequencing=sequencing,
                clone_hit=clone,
                e_value=e_value,
                bit_score=bit_score,
                hit_rank=hit_rank
            )

            result.save()
Пример #4
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        cherrypick_list = options['cherrypick_list']
        tracking_numbers = options['tracking_numbers']
        genewiz_root = options['genewiz_output_root']

        if not os.path.isdir(genewiz_root):
            raise CommandError('genewiz_root directory not found')

        ####################################################
        # FIRST STAGE: Create a mapping of sequencing result
        #   to library stock, using the cherrypick_list
        #   input file.
        ####################################################

        seq_to_source = {}

        reader = csv.DictReader(cherrypick_list)

        for row in reader:
            source_plate = row['source_plate'].strip()

            # skip empty lines
            if not source_plate:
                continue

            source_well = row['source_well'].strip()
            library_stock = LibraryStock.objects.get(plate_id=source_plate,
                                                     well=source_well)

            seq_plate = row['destination_plate'].strip()
            seq_well = row['destination_well'].strip()
            key = seq_plate + '_' + seq_well

            seq_to_source[key] = library_stock

        #######################################################
        # SECOND STAGE: Add sequencing results (sequences plus
        #   quality scores) to the database, for all tracking
        #   numbers listed in the tracking_numbers input file.
        #   Use the mapping from FIRST STAGE to create the
        #   pointers to LibraryStock.
        #######################################################

        reader = csv.DictReader(tracking_numbers)

        for row in reader:
            tracking_number = row['tracking_number'].strip()
            order_date = row['order_date'].strip()
            process_tracking_number(tracking_number, order_date, genewiz_root,
                                    seq_to_source)
Пример #5
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        f = options['file']

        reader = csv.DictReader(f, delimiter='\t')

        for row in reader:
            sequencing_id = row['sequencing_id']
            clone_hit = row['clone_hit']
            e_value = row['e_value']
            bit_score = row['bit_score']
            hit_rank = row['hit_rank']

            try:
                sequencing = LibrarySequencing.objects.get(id=sequencing_id)
            except ObjectDoesNotExist:
                raise CommandError(
                    'ID {} not found in LibrarySequencing'.format(
                        sequencing_id))

            try:
                clone = Clone.objects.get(pk=clone_hit)
            except ObjectDoesNotExist:
                raise CommandError(
                    'clone_hit {} not present in database'.format(clone_hit))

            try:
                e_value = float(e_value)
            except ValueError:
                raise CommandError(
                    'e_value {} not convertible to float'.format(e_value))

            try:
                bit_score = int(float(bit_score))
            except ValueError:
                raise CommandError(
                    'bit_score {} not convertible to int'.format(bit_score))

            try:
                hit_rank = int(hit_rank)
            except ValueError:
                raise CommandError(
                    'hit_rank {} not convertible to int'.format(hit_rank))

            result = LibrarySequencingBlatResult(sequencing=sequencing,
                                                 clone_hit=clone,
                                                 e_value=e_value,
                                                 bit_score=bit_score,
                                                 hit_rank=hit_rank)

            result.save()
Пример #6
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        cherrypick_list = options["cherrypick_list"]
        tracking_numbers = options["tracking_numbers"]
        genewiz_root = options["genewiz_output_root"]

        if not os.path.isdir(genewiz_root):
            raise CommandError("genewiz_root directory not found")

        ####################################################
        # FIRST STAGE: Create a mapping of sequencing result
        #   to library stock, using the cherrypick_list
        #   input file.
        ####################################################

        seq_to_source = {}

        reader = csv.DictReader(cherrypick_list)

        for row in reader:
            source_plate = row["source_plate"].strip()

            # skip empty lines
            if not source_plate:
                continue

            source_well = row["source_well"].strip()
            library_stock = LibraryStock.objects.get(plate_id=source_plate, well=source_well)

            seq_plate = row["destination_plate"].strip()
            seq_well = row["destination_well"].strip()
            key = seq_plate + "_" + seq_well

            seq_to_source[key] = library_stock

        #######################################################
        # SECOND STAGE: Add sequencing results (sequences plus
        #   quality scores) to the database, for all tracking
        #   numbers listed in the tracking_numbers input file.
        #   Use the mapping from FIRST STAGE to create the
        #   pointers to LibraryStock.
        #######################################################

        reader = csv.DictReader(tracking_numbers)

        for row in reader:
            tracking_number = row["tracking_number"].strip()
            order_date = row["order_date"].strip()
            process_tracking_number(tracking_number, order_date, genewiz_root, seq_to_source)
Пример #7
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        all_wells = get_well_set()

        # Get all wells, to determine which wells are missing.
        #
        # Skip 384-well plates, since the empty wells from these Ahringer
        # parent plates can be created in concert with their 96-well children.
        #
        # Also skip 'GHR-' style plates. Since we don't actually have these
        # plates in the lab, we only care about the small fraction of the
        # wells from these plates that were used to generated our Vidal
        # rearrays.
        library_stocks = (LibraryStock.objects.filter(
            plate__number_of_wells=96).exclude(plate__id__startswith='GHR-'))

        plate_wells = {}

        for library_stock in library_stocks:
            if library_stock.plate not in plate_wells:
                plate_wells[library_stock.plate] = set()

            plate_wells[library_stock.plate].add(library_stock.well)

        for library_plate in plate_wells:
            missing_wells = all_wells - plate_wells[library_plate]

            for missing_well in missing_wells:
                library_stock = LibraryStock(
                    id=generate_library_stock_name(library_plate.id,
                                                   missing_well),
                    plate=library_plate,
                    well=missing_well,
                    parent_stock=None,
                    intended_clone=None,
                )

                if library_plate.is_ahringer_96_plate():
                    parent_stock = _get_ahringer_384_parent(library_stock)

                    if parent_stock.intended_clone:
                        self.stderr.write(
                            '384 well {} has a non-null intended clone, '
                            'but its 96-well derivative {} is empty\n'.format(
                                parent_stock, library_stock))

                    library_stock.parent_stock = parent_stock

                library_stock.save()
Пример #8
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        all_wells = get_well_set()

        # Get all wells, to determine which wells are missing.
        #
        # Skip 384-well plates, since the empty wells from these Ahringer
        # parent plates can be created in concert with their 96-well children.
        #
        # Also skip 'GHR-' style plates. Since we don't actually have these
        # plates in the lab, we only care about the small fraction of the
        # wells from these plates that were used to generated our Vidal
        # rearrays.
        library_stocks = (LibraryStock.objects
                          .filter(plate__number_of_wells=96)
                          .exclude(plate__id__startswith='GHR-'))

        plate_wells = {}

        for library_stock in library_stocks:
            if library_stock.plate not in plate_wells:
                plate_wells[library_stock.plate] = set()

            plate_wells[library_stock.plate].add(library_stock.well)

        for library_plate in plate_wells:
            missing_wells = all_wells - plate_wells[library_plate]

            for missing_well in missing_wells:
                library_stock = LibraryStock(
                    id=generate_library_stock_name(library_plate.id,
                                                   missing_well),
                    plate=library_plate, well=missing_well,
                    parent_stock=None, intended_clone=None,
                )

                if library_plate.is_ahringer_96_plate():
                    parent_stock = _get_ahringer_384_parent(library_stock)

                    if parent_stock.intended_clone:
                        self.stderr.write(
                            '384 well {} has a non-null intended clone, '
                            'but its 96-well derivative {} is empty\n'
                            .format(parent_stock, library_stock))

                    library_stock.parent_stock = parent_stock

                library_stock.save()
Пример #9
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        # mapping_db = MySQLdb.connect(host=MAPPING_DATABASE['HOST'],
        mapping_db = mysql.connector.connect(
            host=MAPPING_DATABASE['HOST'],
            user=MAPPING_DATABASE['USER'],
            passwd=MAPPING_DATABASE['PASSWORD'],
            db=MAPPING_DATABASE['NAME'])

        cursor = mapping_db.cursor()

        # Get dictionary to translate from this database's pk to the
        #   mapping database pk
        pk_translator = _get_pk_translator(cursor)

        # Get all info from the mapping database
        all_mapping_clones = _get_all_mapping_clones(cursor)
        all_mapping_genes = _get_all_mapping_genes(cursor)
        all_mapping_targets = _get_all_mapping_targets(cursor)

        # Delete all Clone-to-Gene mappings from this database.
        CloneTarget.objects.all().delete()

        # Counters to keep track of no-target and multiple-target cases
        num_clones_no_targets = 0
        num_clones_multiple_targets = 0

        # Iterate over the clones in this table, updating all mapping info
        clones = Clone.objects.exclude(id='L4440')
        for clone in clones:
            num_targets = _process_clone(clone, pk_translator,
                                         all_mapping_clones, all_mapping_genes,
                                         all_mapping_targets)

            if num_targets == 0:
                num_clones_no_targets += 1

            elif num_targets > 1:
                num_clones_multiple_targets += 1

        self.stdout.write(
            '{} clones with no targets.'.format(num_clones_no_targets))
        self.stdout.write('{} clones with multiple targets.'.format(
            num_clones_multiple_targets))
Пример #10
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        birds = Species.objects.all()
        for bird in birds:
            abc = bird.get_resolved_abc_bird_of_the_week_url()
            if abc:
                bird.has_abc_bird_of_the_week_url = True

            cornell = bird.get_resolved_cornell_all_about_birds_url()
            if cornell:
                bird.has_cornell_all_about_birds_url = True

            wikipedia = bird.get_resolved_wikipedia_url()
            if wikipedia:
                bird.has_wikipedia_url = True

            bird.save()
Пример #11
0
    def handle(self, **options):
        f = options['file']

        require_db_write_acknowledgement()

        descriptions = _parse_wormbase_file(f)

        genes = Gene.objects.all()

        num_mismatches = 0
        for gene in genes:
            if gene.id not in descriptions:
                raise CommandError('{} not found in WormBase file'
                                   .format(gene))
            info = descriptions[gene.id]

            # Sanity check: Does WormBase molecular_name match the
            # cosmid_id in Firoz's database?
            wb_molecular = info['molecular_name']
            firoz_cosmid = gene.cosmid_id
            if (not wb_molecular.startswith(firoz_cosmid)):
                num_mismatches += 1
                self.stderr.write('Molecular/cosmid mismatch for {}: '
                                  'WormBase says {}, Firoz says {}'
                                  .format(gene, wb_molecular, firoz_cosmid))

            # Sanity check: Does WormBase public_name match the
            # locus in Firoz's database?
            wb_public = info['public_name']
            firoz_locus = gene.locus
            if (wb_public != firoz_locus and
                    not (firoz_locus == '' and wb_public == firoz_cosmid)):
                num_mismatches += 1
                self.stderr.write('Public/locus mismatch for {}: '
                                  'WormBase says {}, Firoz says {}'
                                  .format(gene, wb_public, firoz_locus))

            gene.functional_description = info['concise_description']
            gene.gene_class_description = info['gene_class_description']
            gene.save()

        if num_mismatches:
            self.stderr.write('Total number mismatches: {}'
                              .format(num_mismatches))
Пример #12
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        mapping_db = MySQLdb.connect(host=MAPPING_DATABASE['HOST'],
                                     user=MAPPING_DATABASE['USER'],
                                     passwd=MAPPING_DATABASE['PASSWORD'],
                                     db=MAPPING_DATABASE['NAME'])

        cursor = mapping_db.cursor()

        # Get dictionary to translate from this database's pk to the
        #   mapping database pk
        pk_translator = _get_pk_translator(cursor)

        # Get all info from the mapping database
        all_mapping_clones = _get_all_mapping_clones(cursor)
        all_mapping_genes = _get_all_mapping_genes(cursor)
        all_mapping_targets = _get_all_mapping_targets(cursor)

        # Delete all Clone-to-Gene mappings from this database.
        CloneTarget.objects.all().delete()

        # Counters to keep track of no-target and multiple-target cases
        num_clones_no_targets = 0
        num_clones_multiple_targets = 0

        # Iterate over the clones in this table, updating all mapping info
        clones = Clone.objects.exclude(id='L4440')
        for clone in clones:
            num_targets = _process_clone(
                clone, pk_translator, all_mapping_clones,
                all_mapping_genes, all_mapping_targets)

            if num_targets == 0:
                num_clones_no_targets += 1

            elif num_targets > 1:
                num_clones_multiple_targets += 1

        self.stdout.write('{} clones with no targets.'
                          .format(num_clones_no_targets))
        self.stdout.write('{} clones with multiple targets.'
                          .format(num_clones_multiple_targets))
Пример #13
0
    def handle(self, **options):
        start = options['start']
        end = options['end']

        if start > end:
            raise CommandError('Start cannot be greater than end')

        if (start < 0) or (end > len(STEPS) - 1):
            raise CommandError(
                'Start and end must be in range 0-{}'.format(LAST_STEP))

        if end == LAST_STEP:
            do_last_step = True
            end = LAST_STEP - 1

        else:
            do_last_step = False

        require_db_write_acknowledgement()

        # Do the steps that involve connecting to Huey-Ling's legacy_db
        # legacy_db = MySQLdb.connect(host=LEGACY_DATABASE['HOST'],
        legacy_db = mysql.connector.connect(host=LEGACY_DATABASE['HOST'],
                                            user=LEGACY_DATABASE['USER'],
                                            passwd=LEGACY_DATABASE['PASSWORD'],
                                            db=LEGACY_DATABASE['NAME'])

        cursor = legacy_db.cursor()

        for step in range(start, end + 1):
            STEPS[step](self, cursor)

        # This step requires connecting to Kris's legacy_db_2
        if do_last_step:
            # legacy_db_2 = MySQLdb.connect(host=LEGACY_DATABASE_2['HOST'],
            legacy_db_2 = mysql.connector.connect(
                host=LEGACY_DATABASE_2['HOST'],
                user=LEGACY_DATABASE_2['USER'],
                passwd=LEGACY_DATABASE_2['PASSWORD'],
                db=LEGACY_DATABASE_2['NAME'])
            cursor = legacy_db_2.cursor()
            STEPS[LAST_STEP](self, cursor)
Пример #14
0
    def handle(self, **options):
        start = options['start']
        end = options['end']

        if start > end:
            raise CommandError('Start cannot be greater than end')

        if (start < 0) or (end > len(STEPS) - 1):
            raise CommandError('Start and end must be in range 0-{}'
                               .format(LAST_STEP))

        if end == LAST_STEP:
            do_last_step = True
            end = LAST_STEP - 1

        else:
            do_last_step = False

        require_db_write_acknowledgement()

        # Do the steps that involve connecting to Huey-Ling's legacy_db
        legacy_db = MySQLdb.connect(host=LEGACY_DATABASE['HOST'],
                                    user=LEGACY_DATABASE['USER'],
                                    passwd=LEGACY_DATABASE['PASSWORD'],
                                    db=LEGACY_DATABASE['NAME'])

        cursor = legacy_db.cursor()

        for step in range(start, end + 1):
            STEPS[step](self, cursor)

        # This step requires connecting to Kris's legacy_db_2
        if do_last_step:
            legacy_db_2 = MySQLdb.connect(host=LEGACY_DATABASE_2['HOST'],
                                          user=LEGACY_DATABASE_2['USER'],
                                          passwd=LEGACY_DATABASE_2['PASSWORD'],
                                          db=LEGACY_DATABASE_2['NAME'])
            cursor = legacy_db_2.cursor()
            STEPS[LAST_STEP](self, cursor)
Пример #15
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        tracking_numbers = options['tracking_numbers']
        genewiz_root = options['genewiz_output_root']

        if not os.path.isdir(genewiz_root):
            raise CommandError('genewiz_root directory not found')

        # legacy_db = MySQLdb.connect(host=LEGACY_DATABASE['HOST'],
        legacy_db = mysql.connector.connect(host=LEGACY_DATABASE['HOST'],
                                            user=LEGACY_DATABASE['USER'],
                                            passwd=LEGACY_DATABASE['PASSWORD'],
                                            db=LEGACY_DATABASE['NAME'])

        cursor = legacy_db.cursor()

        ####################################################
        # FIRST STAGE: Create a dictionary of which
        #   sequences correspond to which library stocks
        #
        # This information is stored in the legacy database.
        ####################################################
        seq_to_source = {}

        cursor.execute(
            'SELECT SeqPlateID, Seq96Well, RNAiPlateID, 96well, oriClone '
            'FROM SeqPlate WHERE SeqPlateID <= 55')

        for row in cursor.fetchall():
            seq_plate_number, seq_well = row[0:2]
            seq_plate = 'JL' + str(seq_plate_number)

            try:
                library_stock = get_library_stock(row[2], row[3])

            except ObjectDoesNotExist:
                raise CommandError('LibraryStock not found for {} {}\n'.format(
                    row[0], row[1]))

            # Sanity check that clone matches
            legacy_clone = row[4]
            if legacy_clone:
                clone = library_stock.intended_clone
                if (not clone or
                    (legacy_clone != clone.id and 'GHR' not in clone.id)):
                    self.stderr.write(
                        'WARNING: Legacy clone mismatch for {}: {} {}\n'.
                        format(library_stock, clone, legacy_clone))

            seq_to_source[seq_plate + '_' + seq_well] = library_stock

        ####################################
        # SECOND STAGE: Add raw genewiz data
        #   (sequences and quality scores)
        ####################################

        reader = csv.DictReader(tracking_numbers)

        for row in reader:
            tracking_number = row['tracking_number'].strip()
            order_date = row['order_date'].strip()
            process_tracking_number(tracking_number, order_date, genewiz_root,
                                    seq_to_source)
Пример #16
0
    def handle(self, **options):
        require_db_write_acknowledgement()

        tracking_numbers = options['tracking_numbers']
        genewiz_root = options['genewiz_output_root']

        if not os.path.isdir(genewiz_root):
            raise CommandError('genewiz_root directory not found')

        legacy_db = MySQLdb.connect(host=LEGACY_DATABASE['HOST'],
                                    user=LEGACY_DATABASE['USER'],
                                    passwd=LEGACY_DATABASE['PASSWORD'],
                                    db=LEGACY_DATABASE['NAME'])

        cursor = legacy_db.cursor()

        ####################################################
        # FIRST STAGE: Create a dictionary of which
        #   sequences correspond to which library stocks
        #
        # This information is stored in the legacy database.
        ####################################################
        seq_to_source = {}

        cursor.execute(
            'SELECT SeqPlateID, Seq96Well, RNAiPlateID, 96well, oriClone '
            'FROM SeqPlate WHERE SeqPlateID <= 55')

        for row in cursor.fetchall():
            seq_plate_number, seq_well = row[0:2]
            seq_plate = 'JL' + str(seq_plate_number)

            try:
                library_stock = get_library_stock(row[2], row[3])

            except ObjectDoesNotExist:
                raise CommandError('LibraryStock not found for {} {}\n'
                                   .format(row[0], row[1]))

            # Sanity check that clone matches
            legacy_clone = row[4]
            if legacy_clone:
                clone = library_stock.intended_clone
                if (not clone or (legacy_clone != clone.id and
                                  'GHR' not in clone.id)):
                    self.stderr.write(
                        'WARNING: Legacy clone mismatch for {}: {} {}\n'
                        .format(library_stock, clone, legacy_clone))

            seq_to_source[seq_plate + '_' + seq_well] = library_stock

        ####################################
        # SECOND STAGE: Add raw genewiz data
        #   (sequences and quality scores)
        ####################################

        reader = csv.DictReader(tracking_numbers)

        for row in reader:
            tracking_number = row['tracking_number'].strip()
            order_date = row['order_date'].strip()
            process_tracking_number(tracking_number, order_date,
                                    genewiz_root, seq_to_source)
Пример #17
0
    def handle(self, **options):
        require_db_write_acknowledgement()
        return

        if options['all']:
            experiments = Experiment.objects.all()
        else:
            null_devstar_experiment_pks = (DevstarScore.objects.filter(
                area_adult__isnull=True,
                area_larva__isnull=True,
                area_embryo__isnull=True).values_list('experiment', flat=True))

            null_devstar_experiments = Experiment.objects.filter(
                pk__in=null_devstar_experiment_pks)

            all_devstar_experiment_pks = (
                DevstarScore.objects.all().values_list('experiment',
                                                       flat=True))

            no_devstar_experiments = Experiment.objects.exclude(
                pk__in=all_devstar_experiment_pks)

            experiments = sorted(chain(null_devstar_experiments,
                                       no_devstar_experiments),
                                 key=lambda instance: instance.id)

        for experiment in experiments:
            path = experiment.get_devstar_count_path()

            if not os.path.isfile(path):
                self.stderr.write('WARNING: Skipping experiment {} due to '
                                  'DevStaR file not found'.format(experiment))
                continue

            counts = [None] * 6

            with open(path, 'r') as f:
                for line in f:
                    for i, pattern in enumerate(PATTERNS):
                        if line.startswith(pattern):
                            try:
                                counts[i] = int(line.split()[1])
                            except Exception:
                                raise CommandError('Error parsing line {} '
                                                   'in experiment {}'.format(
                                                       i, experiment))

            for i, count in enumerate(counts):
                if count is None:
                    self.stderr.write('WARNING: {} count is missing '
                                      'for experiment {}'.format(
                                          PATTERNS[i], experiment))

            new_score = DevstarScore(
                experiment=experiment,
                is_bacteria_present=counts[0],
                area_adult=counts[1],
                area_larva=counts[2],
                area_embryo=counts[3],
                count_adult=counts[4],
                count_larva=counts[5],
            )

            # If score already exists in database, check for match
            try:
                previous_score = DevstarScore.objects.get(
                    experiment=experiment)

                if not previous_score.matches_raw_fields(new_score):
                    raise CommandError(
                        'The DevStaR txt output does '
                        'not match the existing database '
                        'entry for Experiment {}'.format(experiment))

            except ObjectDoesNotExist:
                self.stderr.write('DevStaR object not found for {}. '
                                  'Attempting to save.'.format(experiment))
                try:
                    new_score.full_clean()

                except ValidationError:
                    raise CommandError(
                        'Cleaning DevStaR object {} '
                        'raised ValidationError'.format(new_score))
Пример #18
0
    def handle(self, **options):
        require_db_write_acknowledgement()
        return

        if options['all']:
            experiments = Experiment.objects.all()
        else:
            null_devstar_experiment_pks = (
                DevstarScore.objects.filter(
                    area_adult__isnull=True,
                    area_larva__isnull=True,
                    area_embryo__isnull=True)
                .values_list('experiment', flat=True))

            null_devstar_experiments = Experiment.objects.filter(
                pk__in=null_devstar_experiment_pks)

            all_devstar_experiment_pks = (
                DevstarScore.objects.all()
                .values_list('experiment', flat=True))

            no_devstar_experiments = Experiment.objects.exclude(
                pk__in=all_devstar_experiment_pks)

            experiments = sorted(
                chain(null_devstar_experiments, no_devstar_experiments),
                key=lambda instance: instance.id)

        for experiment in experiments:
            path = experiment.get_devstar_count_path()

            if not os.path.isfile(path):
                self.stderr.write('WARNING: Skipping experiment {} due to '
                                  'DevStaR file not found'
                                  .format(experiment))
                continue

            counts = [None] * 6

            with open(path, 'r') as f:
                for line in f:
                    for i, pattern in enumerate(PATTERNS):
                        if line.startswith(pattern):
                            try:
                                counts[i] = int(line.split()[1])
                            except Exception:
                                raise CommandError('Error parsing line {} '
                                                   'in experiment {}'
                                                   .format(i, experiment))

            for i, count in enumerate(counts):
                if count is None:
                    self.stderr.write('WARNING: {} count is missing '
                                      'for experiment {}'
                                      .format(PATTERNS[i], experiment))

            new_score = DevstarScore(
                experiment=experiment,
                is_bacteria_present=counts[0], area_adult=counts[1],
                area_larva=counts[2], area_embryo=counts[3],
                count_adult=counts[4], count_larva=counts[5],
            )

            # If score already exists in database, check for match
            try:
                previous_score = DevstarScore.objects.get(
                    experiment=experiment)

                if not previous_score.matches_raw_fields(new_score):
                    raise CommandError('The DevStaR txt output does '
                                       'not match the existing database '
                                       'entry for Experiment {}'
                                       .format(experiment))

            except ObjectDoesNotExist:
                self.stderr.write('DevStaR object not found for {}. '
                                  'Attempting to save.'
                                  .format(experiment))
                try:
                    new_score.full_clean()

                except ValidationError:
                    raise CommandError('Cleaning DevStaR object {} '
                                       'raised ValidationError'
                                       .format(new_score))