def __setCounts__(self, infile, nec): # Gets species counts first = True print("\tReading {}...".format(os.path.split(infile)[1])) for i in unixpath.readFile(infile, header=True, d=","): if not first: total = int(i[header["RecordsWithDenominators"]]) if self.min < total: tid = i[header["taxa_id"]] if tid not in self.records.keys(): sp = i[header["Species"]] if sp != "NA": self.records[tid] = Species(tid, sp) if tid in self.records.keys(): rec = self.records[tid] n = int(i[header["NeoplasiaWithDenominators"]]) p = float(i[header["NeoplasiaPrevalence"]]) if nec: rec.setNecropsy(total, n, p) if rec.necropsy and rec.other: # Calculate significance and store rec.setSignificance() else: rec.setOther(total, n, p) else: header = i first = False
def __readDeathBooks__(self): # Records totals LZ deathbooks first = True print("\n\tReading species from deathbooks...") for line in unixpath.readFile(self.infile): if not first: self.__addSpecies__(line[h["Scientific name"]]) else: h = line first = False
def format(self): # Preformats London zoo data with open(self.outfile, "w") as out: print("\n\tFormatting London Zoo data...") out.write(self.outheader) for line in readFile(self.infile): if not self.header: self.header = line else: row = self.__formatLine__(line) out.write(",".join(row) + "\n")
def __setConfig__(self): # Stores input file values first = True for i in unixpath.readFile(self.config, True, "\t"): if not first and len(i) > 0 and i[0] != "#": self.commands.append( Command(i[header["Command"]], i[header["Directory"]], self.user, self.password, self.outdir)) elif first: header = deepcopy(i) first = False
def __setAccounts__(self, infile): # Stores account names in dict first = True print("\n\tReading accounts file...") for i in unixpath.readFile(infile, d=",", header=True): if not first: self.accounts[i[head["account_id"]]] = i[ head["submitter_name"]] else: head = i first = False
def __setAccounts__(self): # Reads account names into dict print("\n\tReading accounts file...") first = True for i in unixpath.readFile(self.accountfile, header=True, d=","): if not first: if i[head["submitter_name"]] != "NA": self.accounts[i[head["account_id"]]] = i[ head["submitter_name"]] else: head = i first = False
def __setCounts__(self, infile): # Gets species counts first = True print("\n\tReading {}...".format(infile)) for i in unixpath.readFile(infile, header=True, d=","): if not first: sp = i[header[SPECIES]] self.records[sp] = Species(sp, i[header[NAME]], i[header[CASES]], i[header[ZIMS]], i[header[DEATHBOOKS]]) else: header = i first = False
def __countRecords__(self): # Counts records for each account id print("\tReading records file...") first = True for i in unixpath.readFile(self.infile, header=True, d=","): if not first: a = i[head["account_id"]] if a != "-1": if i[head["Zoo"]] == "1" or i[head["Institute"]] == "1": if a not in self.counts.keys(): # account_id: #records, approved self.counts[a] = [0, i[head["Approved"]]] self.counts[a][0] += 1 else: head = i first = False
def __setCases__(self, infile): # Stores cases by species in dict first = True print("\tReading {}...".format(infile)) for i in unixpath.readFile(infile, d = self.d): if not first: if i[h["MassPresent"]] == "1": t = i[h["Species"]].strip() if t not in self.cases.keys(): self.cases[t] = [] row = [] for c in self.columns: row.append(i[h[c]]) self.cases[t].append(row) else: h = i first = False
def __setTaxa__(self, infile): # Stores species and genus by common name first = True print("\n\tReading preformatted file...") for i in unixpath.readFile(infile, d = self.d): if not first: c = i[h["CommonName"]] if c not in self.taxa.keys(): s = i[h["ScientificName"]] if " " not in s: # Store genus only self.taxa[c] = [s] else: self.taxa[c] = [s.split()[0], s] else: h = i first = False
def __setCounts__(self, infile): # Gets species counts l = [] first = True print("\tReading {}...".format(os.path.split(infile)[1])) for i in unixpath.readFile(infile, header = True, d = ","): if not first: n = int(i[header[self.col]]) if self.min < n: for j in self.bins: if n >= j: # Append value for each cutoff less than value l.append(j) else: header = i first = False # l.sort() return l
def __appendAccounts__(self, infile, outfile): # Appends account name to records first = True with open(outfile, "w") as out: for i in unixpath.readFile(infile, d=",", header=True): if not first: aid = i[head["account_id"]] if aid in self.accounts.keys(): i.append(self.accounts[aid]) out.write(",".join(i) + "\n") else: head = i header = ["" for n in range(len(head))] for k in head.keys(): header[head[k]] = k header.append("Account") out.write(",".join(header) + "\n") first = False
def __setTable__(self, infile, columns): # Returns list of table columns first = True print("\tReading {}...".format(os.path.split(infile)[1])) for line in unixpath.readFile(infile, header = True, d = ","): if not first: pid = line[head[self.id]] if pid not in self.records.keys(): self.records[pid] = self.__newRecord__() for i in columns: val = line[head[i]] if val: try: self.records[pid][i] = int(val) except: pass else: head = line first = False
def __insertSpecies__(self): # Added species and genus to input file first = True print("\tReading input file...") with open(self.outfile, "w") as out: for i in unixpath.readFile(self.infile, d = self.d): if not first: c = i[h["Name"]] if c in self.taxa.keys(): i[h["Genus"]] = self.taxa[c][0] if len(self.taxa[c]) > 1: i[h["Species"]] = self.taxa[c][1] out.write(self.d.join(i) + "\n") else: h = i head = [-1] * len(h) for k in i.keys(): head[h[k]] = k out.write(self.d.join(head) + "\n") first = False
def __identifySuspect__(self, infile): # Attempts to identify suspect cases count = 0 first = True print("\tIdentifying suspect cases...") with open(self.outfile, "w") as out: for i in unixpath.readFile(infile, d = self.d): if not first: t = i[h["Taxa"]].strip() if t.count(" ") > 1: # Remove subspecies name t = " ".join(t.split()[:2]) if t in self.cases.keys(): name = i[h["Common_Name"]].strip() sex = i[h["Sex"]].strip() date = i[h["Death_Date"]].strip() if sex != "male" and sex != "female": sex = "NA" #diag = i[h["Diagnosis"]].strip() #loc = i[h["Neoplasia_location"]].strip() for r in self.cases[t]: if date == r[5].replace(".", "/"): #if r[0] == sex or r[4] == name: row = deepcopy(r) row.extend(i) out.write(self.d.join(row) + "\n") count += 1 else: h = i first = False head = deepcopy(self.columns) tail = [""] * len(h) for k in i.keys(): tail[h[k]] = k head.extend(tail) out.write(self.d.join(head) + "\n") print("\tFound {} potential records.".format(count))