def __readInfile__(self): # Reads diagnosis data into class first = True with open(self.infile, "r") as f: for line in f: line = line.strip() if not first: self.__parseLine__(line.split(d)) else: d = unixpath.getDelim(line) self.__setHeader__(line.split(d)) first = False
def __setData__(self, infile): # Reads file into data frame d = None rows = [] name = unixpath.getFileName(infile) with open(infile, "r") as f: for line in f: line = line.strip() if d: rows.append(line.split(d)) else: d = unixpath.getDelim(line) head = line.split(d) self.data[name] = pandas.DataFrame(rows, columns=head)
def __setTaxa__(self, infile): # Stores references and species names first = True print("\n\tReading taxonomy...") with open(infile, "r") as f: for line in f: line = line.strip() if not first: s = line.split(d) self.taxa[s[-2]] = s[1:-2] else: d = unixpath.getDelim(line) self.names = line.split(d)[1:-1] first = False
def __setDiagnoses__(self): # Reads in diagnoses as dict first = True print("\n\tReading diagnosis file...") with open(self.diagfile, "r") as f: for line in f: line = line.strip() if first == False: row = line.split(d) self.diagnoses[row[0]] = row[-1].strip() else: d = unixpath.getDelim(line) self.__setHeader__(line.split(d)) first = False print(("\tExtracted {:,} diagnosis records.").format(len(self.diagnoses)))
def setSpecies(self, indir): # Calls stores species as dict of classes print("\n\tReading species totals files...") for i in glob(indir + "*"): first = True with open(i, "r") as f: for line in f: if first == True: d = unixpath.getDelim(line) first = False s = self.__getRow__(d, line) if len(s) >= 3: if s[-1] in self.species.keys(): self.species[s[-1]].resolveSpecies(s) else: self.species[s[-1]] = Species(self.delim, s)
def __readFile__(self, infile): # Reads file and returns dict first = True ret = {} print(("\tReading {}...").format(os.path.split(infile)[1])) with open(infile, "r") as f: for line in f: line = line.strip() if not first: s = line.split(d) # Store with ID as key ret[s[0]] = s[1:] else: d = unixpath.getDelim(line) h = line.split(d) first = False return ret, h
def __readInfile__(self, infile, diagnosis, malignant): # Sorts and stores records from input file first = True with open(infile, "r") as f: for line in f: if first == False: s = line.strip().split(d) if len(s) == self.columns.length: if s[0] in self.records.keys(): self.records[s[0]].resolveRecord(self.columns, s) else: self.records[s[0]] = Record(malignant, self.delim, diagnosis) self.records[s[0]].setRecord(self.columns, s) else: d = unixpath.getDelim(line) if self.columns is None: self.columns = Columns(line.split(d)) first = False
def mergeDiagnoses(self): # Merges full data with diagnoses first = True count = 0 print("\tMerging full data file with diagnoses...") with open(self.outfile, "w") as out: with open(self.infile, "r") as f: for line in f: line = line.strip() if first == False: row = line.split(d) uid = row[self.head["UID"]].strip() if len(row) > self.head["Diagnosis"]: row[self.head["Diagnosis"]], c = self.__getDiagnosis__(uid, row[self.head["Diagnosis"]].strip()) count += c row = self.__checkQuotes__(row) out.write(",".join(row) + "\n") else: d = unixpath.getDelim(line) self.__setHeader__(line.split(d)) out.write(line + "\n") first = False print(("\tMerged {:,} diagnosis records.").format(count))