Пример #1
0
    def validate_chipseq_experiments(self):
        for rec_id in self.chipseq_experiment_ids:
            exp = models.ChipseqExperiment(rec_id)
            str_exp_id = str(exp.id)
            target = models.Target(exp.target_id)
            target_upstream = target.upstream_identifier.strip()
            if not target_upstream:
                if target.id not in self.unregistered_targets:
                    self.unregistered_targets.append(target.id)
                    msg = "Target {} not registred.".format(target.id)
                    self.fout.write(str_exp_id + "\t" + msg + "\n")
            exp_rep_ids = exp.replicate_ids
            if not exp_rep_ids:
                msg = "ChipseqExperiment missing replicates."
                self.fout.write(str_exp_id + "\t" + msg + "\n")
            else:
                for rep_id in exp_rep_ids:
                    self.validate_gm_for_crispr_biosample(chip_exp_id=exp.id,
                                                          biosample_id=rep_id)

            # Check WT input present
            if not exp.wild_type_control_id:
                msg = "ChipseqExperiment missing WT control."
                self.fout.write(str_exp_id + "\t" + msg + "\n")
            # Check Paired input present and has GM
            pi_ids = exp.control_replicate_ids
            if not pi_ids:
                msg = "ChipseqExperiment missing paired input."
                self.fout.write(str_exp_id + "\t" + msg + "\n")
            else:
                for pi_id in pi_ids:
                    self.validate_gm_for_crispr_biosample(chip_exp_id=exp.id,
                                                          biosample_id=pi_id)
        self.fout.close()
Пример #2
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    url = args.url
    infile = args.infile
    if url:
        conn = euc.Connection("prod")
        results = conn.search(url=url)
    else:
        fh = open(infile)
        results = json.load(fh)
    admin = models.User.find_by({"first_name": "Admin"})
    if not admin:
        raise Exception("Could not find the Admin user in the database, which is needed for associating with new records.")
    created = 0
    patched = 0
    total = 0
    for rec in results:
        patch = False
        total += 1
        organism = rec["organism"]["scientific_name"]
        if organism != SPECIES:
          continue
        payload = {}
        label = rec["label"]
        payload["name"] = label
        # Check if the target already exists in the database.
        pulsar_record = models.Target.find_by({"name": label})
        upstream = rec["@id"].strip("/").split("/")[-1]
        if pulsar_record and upstream != pulsar_record["upstream_identifier"]:
            patch = True
        elif pulsar_record:
            continue # Can add support for patch operation later. 
        payload["upstream_identifier"] = upstream
        payload["user_id"] = admin["id"]
        xrefs = rec["dbxref"]
        for ref in xrefs:
            tokens = ref.split(":")
            prefix, ref = ref.rsplit(":", 1)
            if prefix == "ENSEMBL":
                payload["ensembl"] = ref
            elif prefix == "UniProtKB":
                payload["uniprotkb"] = ref
            elif prefix == "RefSeq":
                payload["refseq"] = ref
        print("Creating {}".format(payload))
        if patch:
            target = models.Target(pulsar_record["id"])
            target.patch(payload)
            patched += 1
            print("Patched: {}".format(patched))
        else:
            models.Target.post(payload)
            created += 1
            print("Created: {}".format(created))
        print("Total processed: {}".format(total))
Пример #3
0
 def validate_gm_for_crispr_biosample(self, chip_exp_id, biosample_id):
     biosample = models.Biosample(biosample_id)
     gm_id = biosample.crispr_modification_id
     if not gm_id:
         msg = "Exp. biosample {} missing GM.".format(biosample.id)
         self.fout.write((str(chip_exp_id) + "\t" + msg + "\n"))
     else:
         # Verify that GM target is present.
         gm = models.CrisprModification(gm_id)
         donor_construct = models.DonorConstruct(gm.donor_construct_id)
         dc_target = models.Target(donor_construct.target_id)
         if not dc_target.upstream_identifier:
             if dc_target.id not in self.unregistered_targets:
                 self.unregistered_targets.append(dc_target.id)
                 msg = "Target {} not registred.".format(dc_target.id)
                 self.fout.write(str(chip_exp_id) + "\t" + msg + "\n")
         ccs = [models.CrisprConstruct(x) for x in gm.crispr_construct_ids]
         for c in ccs:
             cc_target = models.Target(c.target_id)
             if not cc_target.upstream_identifier:
                 if cc_target.id not in self.unregistered_targets:
                     self.unregistered_targets.append(cc_target.id)
                     msg = "Target {} not registred.".format(cc_target.id)
                     self.fout.write((str(chip_exp_id) + "\t" + msg + "\n"))