def download_pmids(ifile, ofile, odir): """Downloads the raw Pubmed XML data. Arguements: ifile -- The input file in which each line contains a SINLGE PMID/PMCID to download. ofile -- The sentinal file to touch when finished odir -- The output directoy to download all XML files into. """ needed_ids = set() with open(ifile) as handle: for line in handle: needed_ids.add(line.strip()) present_ids = set() for f in (x for x in os.listdir(odir) if x.endswith(".xml")): present_ids.add(f.split(".")[0]) name_fun = partial(os.path.join, odir) needed_ids -= present_ids pmids = (x for x in needed_ids if not x.startswith("PMC")) pmcs = (x for x in needed_ids if x.startswith("PMC")) for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db="pubmed"): with open(name_fun(pmid + ".xml"), "w") as handle: handle.write(xml) for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db="pmc"): with open(name_fun(pmid + ".xml"), "w") as handle: handle.write(xml) GeneralUtils.touch(ofile)
def process_mut_file(ifile, ofiles): """Processes mut files and checks them for mentions of protein-names. Arguments: ifile -- The input Mutation file with text and mutation mentions. ofiles -- A 2-tuple (result-file, sentinal-file) """ with open(ifile) as handle: rows = list(csv.DictReader(handle, delimiter="\t")) if rows: ofields = ("ParNum", "SentNum", "Mutation", "Swissprot", "ProtText", "Text", "Mesh") writer = csv.DictWriter(open(ofiles[0], "w"), ofields, delimiter="\t") writer.writerow(dict(zip(ofields, ofields))) sent_list = [x["Text"] for x in rows] swiss_it = WhatizitUtils.ask_whatizit(sent_list, pipeline="whatizitSwissprot") mesh_it = WhatizitUtils.ask_whatizit(sent_list, pipeline="whatizitMeshUp") try: for row, swiss_group, mesh_group in izip(rows, swiss_it, mesh_it): if swiss_group: meshterms = "|".join(x[1] for x in mesh_group) for prot_text, reslist in swiss_group: for res in reslist: row["Swissprot"] = res row["ProtText"] = prot_text row["Mesh"] = meshterms writer.writerow(row) except HTMLParseError: pass GeneralUtils.touch(ofiles[1]) else: for f in ofiles: GeneralUtils.touch(f)
def download_pmids(ifile, ofile, odir): """Downloads the raw Pubmed XML data. Arguements: ifile -- The input file in which each line contains a SINLGE PMID/PMCID to download. ofile -- The sentinal file to touch when finished odir -- The output directoy to download all XML files into. """ needed_ids = set() with open(ifile) as handle: for line in handle: needed_ids.add(line.strip()) present_ids = set() for f in (x for x in os.listdir(odir) if x.endswith('.xml')): present_ids.add(f.split('.')[0]) name_fun = partial(os.path.join, odir) needed_ids -= present_ids pmids = (x for x in needed_ids if not x.startswith('PMC')) pmcs = (x for x in needed_ids if x.startswith('PMC')) for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db = 'pubmed'): with open(name_fun(pmid+'.xml'), 'w') as handle: handle.write(xml) for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db = 'pmc'): with open(name_fun(pmid+'.xml'), 'w') as handle: handle.write(xml) GeneralUtils.touch(ofile)
def convert_results(ifiles, ofile): with open(ifiles[0]) as handle: uniprot_ids = [x['Swissprot'] for x in csv.DictReader(handle, delimiter = '\t')] uniprot2entrez = UniprotUtils.uniprot_to_entrez(uniprot_ids) print 'got entrez mapping', len(uniprot2entrez) uniprot2symbol = UniprotUtils.uniprot_to_symbol(uniprot_ids, uniprot2entrez = uniprot2entrez, with_taxid = True) print 'got symbol mapping', len(uniprot2symbol) text_normalizer = GeneralUtils.get_known_mappings(ifiles[3]) convfields = ('Article', 'ParNum', 'SentNum', 'Mutation', 'Swissprot', 'ProtText', 'GeneID', 'Symbol', 'Taxid', 'Mesh') with open(ofile, 'w') as conv_handle: conv_writer = csv.DictWriter(conv_handle, convfields, delimiter = '\t', extrasaction = 'ignore') conv_writer.writerow(dict(zip(convfields, convfields))) with open(ifiles[0]) as ihandle: for row in csv.DictReader(ihandle, delimiter = '\t'): for geneid, genesym in zip(uniprot2entrez[row['Swissprot']], uniprot2symbol[row['Swissprot']]): row['GeneID'] = geneid row['Symbol'] = genesym[0] row['Taxid'] = genesym[1] txt = row['ProtText'].lower() if row['ProtText'].endswith('s'): txt = txt[:-1] if txt in text_normalizer: txt = text_normalizer[txt] row['Symbol'] = txt row['ProtText'] = txt conv_writer.writerow(row)
def __tm2Sql(self, tm): conditions = [] if "filter" in self.queryRequirements[tm].keys(): conditions = self.__generateFilters( self.queryRequirements[tm]["filter"]) result = { "source": "", "select": [], "conditions": conditions, "alias": [] } subject = self.mapping["mappings"][tm]["s"] result["source"] = self.mapping["mappings"][tm]["sources"][0]["table"] predicateObjects = self.mapping["mappings"][tm]["po"] cols = utils.cleanColPattern(subject) #Getting Subject for col in cols: uri = tm + "_" + col result["alias"].append(uri) result["select"].append({ "type": "mandatory", "columns": cols, "variable": self.queryRequirements[tm]["subjectVar"] }) for po in predicateObjects: if (type(po) is not dict): tpoType = "mandatory" if po[0] in self.queryRequirements[tm][ "mandatory"]["predicates"] else "optional" predicatePosition = self.queryRequirements[tm][tpoType][ "predicates"].index(po[0]) cols = utils.cleanColPattern(po) for col in cols: uri = tm + "_" + col result["alias"].append(uri) if (len(cols) > 0): result["select"].append({ "type": tpoType, "columns": cols, "variable": self.queryRequirements[tm][tpoType]["objects"] [predicatePosition] }) self.sql[tm] = result
async def GetInvalidPackages(ctx): invalidList = OracleConn.GetInvalidPackages() newLine = '\n' queryTime = GeneralUtils.GetDateStrPretty(datetime.now()) if invalidList: msg = f'**Invalid Packages as of {queryTime}**```{newLine}{newLine.join(invalidList)}```' else: msg = 'Bütün paketlerin durumu iyi maşallah.' await ctx.send(msg)
def simplifyMappingAccordingToQuery(self, uris, splitedUris): self.splitedUris = splitedUris newMapping = {'prefixes': self.yarrrml['prefixes'], 'mappings': {}} #print('MAPPING:\n' + str(mapping).replace('\'', '"')) #sys.exit() if (not utils.checkEmptyUris(uris)): uris = self.__getTMsfromQueryUris(uris) for subject in uris.keys(): for tm in uris[subject]['TMs']: #print('SUBJECT:' + str(subject)) if (uris[subject]['fullTM']): #print('***********************1*******************') if (tm not in newMapping['mappings'].keys()): newMapping['mappings'][tm] = { 'sources': self.yarrrml['mappings'][tm]['sources'], 's': self.yarrrml['mappings'][tm]['s'], 'po': [] } newMapping['mappings'][tm]['po'] = self.yarrrml[ 'mappings'][tm]['po'] #print(str(newMapping).replace('\'', '"')) else: for po in self.yarrrml['mappings'][tm]['po']: if (utils.isPoInUris(po, uris[subject]['uris'])): #print('*****************2*******************+') if (tm not in newMapping['mappings'].keys()): newMapping['mappings'][tm] = { 'sources': self.yarrrml['mappings'][tm] ['sources'], 's': self.yarrrml['mappings'][tm]['s'], 'po': [] } if (po not in newMapping['mappings'][tm]['po'] ): newMapping['mappings'][tm]['po'].append(po) #print('MAPPING:\n' + str(newMapping).replace('\'', '"')) newMapping = self.__removeEmptyTM(newMapping) newMapping = self.__addReferencesOfTheJoins(newMapping) self.simplifiyedYarrrml = newMapping self.__removeDuplicatedUris(uris)
def __getTMsfromQueryUris(self, uris): for subject in uris: uris[subject]['TMs'] = [] for tm in self.yarrrml['mappings']: tmUris = utils.getUrisFromTM(self.yarrrml['mappings'][tm]) if len(list(set(tmUris) & set(uris[subject]['uris']))) == len( uris[subject]['uris']): uris[subject]['TMs'].append(tm) if (tm != subject): self.splitedUris[tm] = self.splitedUris[subject] return uris
def merge_results(ifiles, ofiles): """Merges the results of the protein-name recognition into one file. Arguments: ifiles -- A list of ALL results from process_mut_file to aggregate together. ofiles -- A 2-tuple (merged-file, sentinal-file) """ with open(ofiles[0], "w") as ohandle: ofields = ("Article", "ParNum", "SentNum", "Mutation", "Swissprot", "ProtText", "Mesh") writer = csv.DictWriter(ohandle, ofields, delimiter="\t", extrasaction="ignore") writer.writerow(dict(zip(ofields, ofields))) for f in ifiles: with open(f) as handle: rows = list(csv.DictReader(handle, delimiter="\t")) art = f.split(os.sep)[-1].split(".")[0] for row in rows: row["Article"] = art writer.writerow(row) GeneralUtils.touch(ofiles[1])
def min_duration_indicator(ts, index, thresh): """Returns groups of an indicator function that are > thresh.""" indicator = np.zeros(len(ts)) indicator[index] = 1 grouped = gu.group_consecutives(index) for g in grouped: # print(ts[g[-1]]-ts[g[0]]) if (ts[g[-1]]-ts[g[0]] < thresh): indicator[g] = 0 return indicator
def merge_results(ifiles, ofiles): """Merges the results of the protein-name recognition into one file. Arguments: ifiles -- A list of ALL results from process_mut_file to aggregate together. ofiles -- A 2-tuple (merged-file, sentinal-file) """ with open(ofiles[0], 'w') as ohandle: ofields = ('Article', 'ParNum', 'SentNum', 'Mutation', 'Swissprot', 'ProtText', 'Mesh') writer = csv.DictWriter(ohandle, ofields, delimiter = '\t', extrasaction = 'ignore') writer.writerow(dict(zip(ofields, ofields))) for f in ifiles: with open(f) as handle: rows = list(csv.DictReader(handle, delimiter = '\t')) art = f.split(os.sep)[-1].split('.')[0] for row in rows: row['Article'] = art writer.writerow(row) GeneralUtils.touch(ofiles[1])
def __checkIfReferenceIsDefined(self, storedTm, mapping, o): newMapping = mapping.copy() #print('\n\nO:\n\n' + str(o)) print(o) joinReferences = utils.getJoinReferences(o) tmName = o['mapping'] #print('\n\n\nJOIN REFERENCES:\n\n\n' + str(joinReferences)) if (tmName not in storedTm.keys()): storedTm[tmName] = self.yarrrml['mappings'][tmName] storedTm[tmName]['po'] = [] if (tmName in mapping['mappings'].keys()): storedTm[tmName] = mapping['mappings'][tmName] if ((tmName not in newMapping['mappings'].keys() or joinReferences['outerRef'] not in utils.getColPatterns( newMapping['mappings'][tmName])) and joinReferences['outerRef'] not in utils.getColPatterns( storedTm[tmName])): #print('BUSCAMOS:' + str(joinReferences['outerRef'])) for i, po in enumerate( self.yarrrml['mappings'][o['mapping']]['po']): if (joinReferences['outerRef'] in utils.getColPatterns(po)): #print('Hay que añadir a: \n' + str(po)) storedTm[tmName]['po'].append(po) return storedTm
def process_mut_file(ifile, ofiles): """Processes mut files and checks them for mentions of protein-names. Arguments: ifile -- The input Mutation file with text and mutation mentions. ofiles -- A 2-tuple (result-file, sentinal-file) """ with open(ifile) as handle: rows = list(csv.DictReader(handle, delimiter = '\t')) if rows: ofields = ('ParNum', 'SentNum', 'Mutation', 'Swissprot', 'ProtText', 'Text', 'Mesh') writer = csv.DictWriter(open(ofiles[0], 'w'), ofields, delimiter = '\t') writer.writerow(dict(zip(ofields, ofields))) sent_list = [x['Text'] for x in rows] swiss_it = WhatizitUtils.ask_whatizit(sent_list, pipeline = 'whatizitSwissprot') mesh_it = WhatizitUtils.ask_whatizit(sent_list, pipeline = 'whatizitMeshUp') try: for row, swiss_group, mesh_group in izip(rows, swiss_it, mesh_it): if swiss_group: meshterms = '|'.join(x[1] for x in mesh_group) for prot_text, reslist in swiss_group: for res in reslist: row['Swissprot'] = res row['ProtText'] = prot_text row['Mesh'] = meshterms writer.writerow(row) except HTMLParseError: pass GeneralUtils.touch(ofiles[1]) else: for f in ofiles: GeneralUtils.touch(f)
def returnSpikeLocations(spikes, moving): global ts moving = [] stopped = [] for spike in spikes: closest_t = gu.take_Closest(ts, spike) indx = np.where(ts == closest_t)[0][0] #print("i {}".format(x[i])) if indx in imoving: moving.append(indx) else: stopped.append(indx) return moving, stopped
def convert_results(ifiles, ofile): with open(ifiles[0]) as handle: uniprot_ids = [x["Swissprot"] for x in csv.DictReader(handle, delimiter="\t")] uniprot2entrez = UniprotUtils.uniprot_to_entrez(uniprot_ids) print "got entrez mapping", len(uniprot2entrez) uniprot2symbol = UniprotUtils.uniprot_to_symbol(uniprot_ids, uniprot2entrez=uniprot2entrez, with_taxid=True) print "got symbol mapping", len(uniprot2symbol) text_normalizer = GeneralUtils.get_known_mappings(ifiles[3]) convfields = ( "Article", "ParNum", "SentNum", "Mutation", "Swissprot", "ProtText", "GeneID", "Symbol", "Taxid", "Mesh", ) with open(ofile, "w") as conv_handle: conv_writer = csv.DictWriter(conv_handle, convfields, delimiter="\t", extrasaction="ignore") conv_writer.writerow(dict(zip(convfields, convfields))) with open(ifiles[0]) as ihandle: for row in csv.DictReader(ihandle, delimiter="\t"): for geneid, genesym in zip(uniprot2entrez[row["Swissprot"]], uniprot2symbol[row["Swissprot"]]): row["GeneID"] = geneid row["Symbol"] = genesym[0] row["Taxid"] = genesym[1] txt = row["ProtText"].lower() if row["ProtText"].endswith("s"): txt = txt[:-1] if txt in text_normalizer: txt = text_normalizer[txt] row["Symbol"] = txt row["ProtText"] = txt conv_writer.writerow(row)
def ___extractTriplePatternUris(self, result, el): if('triples' in el.keys()): for tm in el['triples']: subject = tm['subject']['value'] uri = tm['predicate']['value'] if(subject not in result.keys()): result[subject] = {'uris':[], 'fullTM':False} # TMOfJoin = getSubjectInsideTPO(s,query) # if(TMOfJoin != '')): # findSubjectOfJoin(TMOfJoin, mapping) # if(isUri(subject)): # subjectUri = findSubjectInMapping(subject, mapping) TODO # result[subject]['uris'].append(subject) if(utils.isUri(uri)): if(uri == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'): uri = tm['object']['value'] if not 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' in result[subject]['uris']: result[subject]['uris'].append('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') if(not uri in result[subject]['uris']): result[subject]['uris'].append(uri) else: result[subject]['fullTM'] = True return result
xsmooth = np.abs(np.convolve(x, np.ones(100, dtype=np.int), 'valid')) / 100 print(ts[0]) / 10000 print(ts[-1]) / 10000 eegfile = sys.argv[1] eegdata, eegtimestamps = eeg.readEEG(eegfile) #print(eegtimestamps[0]) #print(eegtimestamps[-1]) fs = 512 / .257552 print(len(eegdata)) f, spec_t, Sxx = signal.spectrogram(eegdata, fs, nperseg=1024, noverlap=512) #print(t[0]) #print(t[-1]) closest_start = gu.take_Closest(spec_t, ts[0] / 10000) spec_start_idx = np.where(spec_t == closest_start)[0][0] closest_stop = gu.take_Closest(spec_t, ts[-1] / 10000) spec_stop_idx = np.where(spec_t == closest_stop)[0][0] #new_spec_t is spec_t made to fit the length of xsmooth new_spec_t = np.linspace(spec_t[spec_start_idx], spec_t[spec_stop_idx], len(xsmooth)) print(len(new_spec_t)) print(len(xsmooth)) #fband=4 #theta = Sxx[4,:]+Sxx[5,:]/2 theta = Sxx[:, spec_start_idx:spec_stop_idx]
ttfile = tfile.replace(tfile[tfile.find('_'):], '.ntt') ts, waveforms = trode.readTetrode(ttfile) spikes = trode.readTFile(tfile) if len(spikes) > 500: spikes = spikes[:500] smallwave = np.zeros([4, 32, len(spikes)]) ##find the nearest waveform timestamp to tfile timestamp for indx, spike in enumerate(spikes): closest = gu.take_Closest(ts, spike * 100) ts_index = np.where(ts == closest)[0] smallwave[:, :, indx] = waveforms[:, :, ts_index[0]] ##computer mean waveform for each channel wave_mean = np.zeros([4, 32]) for i in range(4): wave_mean[i, :] = np.mean(smallwave[i, :, :], axis=1) FWHM_pre, FWHM_post, spike_width = calculate_FWHM(wave_mean) print("spike width FWHM: ", spike_width) #make the super resolution spike on four channels #this allows the FWHM points to be plotted
s1_fr = len(s1spikes) / ((vts[start] - vts[0]) / 10000) m_fr = len(mspikes) / ((vts[stop] - vts[start]) / 10000) s2_fr = len(s2spikes) / ((vts[-1] - vts[stop]) / 10000) ##segregate spikes in to s1, maze, s2 posx_left = [] posy_left = [] posx_right = [] posy_right = [] #no need to run time-consuming place analysis on interneurons (>5HZ) for spike in mspikes: closest_t = gu.take_Closest(ts, spike) indx, = np.where(ts == closest_t) #print("i {}".format(x[i])) if indx in imoving: if direction[indx] == 0: posx_left.append(x[indx]) posy_left.append(y[indx]) else: posx_right.append(x[indx]) posy_right.append(x[indx]) occhist_left, _ = np.histogram(x[imoving_left], bins=bins) occhist_right, _ = np.histogram(x[imoving_right], bins=bins) occhist_left = occhist_left * .016666 occhist_right = occhist_right * .016666
'valid')) / smoothfac #use later to detect direction #direction = np.where(np.diff(xsmooth)>0, 1, 0) cum = np.cumsum(np.abs(np.diff(xsmooth))) cum_t = np.linspace(ts[0], ts[-1], num=len(cum)) inotmoving = np.where(np.diff(cum) < .1)[0] imoving = np.where(np.diff(cum) >= .1)[0] #plt.plot(cum_t,cum) #plt.plot(cum_t[inotmoving], cum[inotmoving], 'r.') #plt.show() rests = gu.group_consecutives(inotmoving) starts = [] stops = [] seq = [] for rest in rests: #take a maximum of 100 samples at the rest point #note this kicks out a warning sometimes; mean of empty slices endpt = rest[0] + 100 if rest[-1] - rest[0] > 100 else rest[-1] if np.mean(xsmooth[rest[0]:endpt]) > 60: #print("Goal 1") seq.append(1) elif np.mean(xsmooth[rest[0]:endpt]) < 8: #print("Goal 2")
def FileIter(func_name): """A general iterator for all of the ruffus functions in the pipeline.""" if func_name == 'convert_pmids_to_pmcs': sdir = partial(os.path.join, 'Data', 'SearchResults') pmc_file = os.path.join('Data', 'PMC-ids.csv') files = [x for x in os.listdir(sdir('')) if x.endswith('.res')] for f in files: yield (sdir(f), pmc_file), sdir(f + '.conv') elif func_name == 'search_pubmed': sdir = partial(os.path.join, 'Data', 'SearchResults') queryfile = os.path.join('Data', 'QueryList.txt') with open(queryfile) as handle: for row in csv.DictReader(handle): fname = '%s--%s.res' % (GeneralUtils.slugify( row['org']), GeneralUtils.slugify(row['search'])) ofile = sdir(fname) yield queryfile, ofile, row['search'] elif func_name == 'download_pmids': sdir = partial(os.path.join, 'Data', 'SearchResults') odir = os.path.join('Data', 'RawXML') files = [x for x in os.listdir(sdir('')) if x.endswith('.conv')] for f in files: yield sdir(f), sdir(f + '.dl'), odir elif func_name == 'extract_text': sdir = partial(os.path.join, 'Data', 'RawXML') odir = partial(os.path.join, 'Data', 'SentenceFiles') files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.xml')]) for f in files: name = f.split('.')[0] if f.startswith('PMC'): typ = 'pmc' else: typ = 'pubmed' yield sdir(f), odir(name + '.sent'), typ elif func_name == 'get_mutations': sdir = partial(os.path.join, 'Data', 'SentenceFiles') odir = partial(os.path.join, 'Data', 'MutFiles') finder = None #mutfinder_gen('regex.txt') files = sorted( [x for x in os.listdir(sdir('')) if x.endswith('.sent')]) for f in files: name = f.split('.')[0] yield sdir(f), odir(name + '.mut') elif func_name == 'process_mut_file': sdir = partial(os.path.join, 'Data', 'MutFiles') odir = partial(os.path.join, 'Data', 'ProteinFiles') files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.mut')]) for f in files: name = f.split('.')[0] yield sdir(f), (odir(name + '.prot'), odir(name + '.sen')) elif func_name == 'mapping_files': path = 'Data/Mapping/' items = (( 'ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz', 'idmapping.dat.sort'), ('ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz', 'gene_info'), ('ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/PMC-ids.csv.gz', 'PMC-ids.csv'), ('ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/d2011.bin', 'd2011.bin')) for url, ofile in items: yield None, os.path.join(path, ofile), url, path
import VideoUtils as vu import numpy as np import GeneralUtils as gu from matplotlib import pyplot as plt makeplot = 0 #pvdfile = './RawData/dwP.pvd' pvdfile = './RawData/sleep_dwPout.pvd' ts, x, y = vu.readPVDfile(pvdfile) x /= 8.2 xsmooth = np.abs(np.convolve(x, np.ones(100, dtype=np.int), 'valid')) / 100 cum = np.cumsum(np.abs(np.diff(xsmooth))) inotmoving = np.where(np.diff(cum) < .025)[0] grouped = gu.group_consecutives(inotmoving) starts = [] stops = [] for group in grouped: if group[-1] - group[0] > 1000: starts.append(ts[group[0]]) stops.append(ts[group[-1]]) # minix = x[group[0]:group[-1]] if makeplot == 1: plt.plot(x[group[0]:group[-1]], y[group[0]:group[-1]], 'b.') plt.ylim([0, 480]) plt.xlim([0, 640]) plt.show()
def FileIter(func_name): """A general iterator for all of the ruffus functions in the pipeline.""" if func_name == 'convert_pmids_to_pmcs': sdir = partial(os.path.join,'Data', 'SearchResults') pmc_file = os.path.join('Data', 'PMC-ids.csv') files = [x for x in os.listdir(sdir('')) if x.endswith('.res')] for f in files: yield (sdir(f), pmc_file), sdir(f+'.conv') elif func_name == 'search_pubmed': sdir = partial(os.path.join,'Data', 'SearchResults') queryfile = os.path.join('Data', 'QueryList.txt') with open(queryfile) as handle: for row in csv.DictReader(handle): fname = '%s--%s.res' % (GeneralUtils.slugify(row['org']), GeneralUtils.slugify(row['search'])) ofile = sdir(fname) yield queryfile, ofile, row['search'] elif func_name == 'download_pmids': sdir = partial(os.path.join,'Data', 'SearchResults') odir = os.path.join('Data', 'RawXML') files = [x for x in os.listdir(sdir('')) if x.endswith('.conv')] for f in files: yield sdir(f), sdir(f+'.dl'), odir elif func_name == 'extract_text': sdir = partial(os.path.join, 'Data', 'RawXML') odir = partial(os.path.join, 'Data', 'SentenceFiles') files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.xml')]) for f in files: name = f.split('.')[0] if f.startswith('PMC'): typ = 'pmc' else: typ = 'pubmed' yield sdir(f), odir(name+'.sent'), typ elif func_name == 'get_mutations': sdir = partial(os.path.join, 'Data', 'SentenceFiles') odir = partial(os.path.join, 'Data', 'MutFiles') finder = None#mutfinder_gen('regex.txt') files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.sent')]) for f in files: name = f.split('.')[0] yield sdir(f), odir(name + '.mut') elif func_name == 'process_mut_file': sdir = partial(os.path.join, 'Data', 'MutFiles') odir = partial(os.path.join, 'Data', 'ProteinFiles') files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.mut')]) for f in files: name = f.split('.')[0] yield sdir(f), (odir(name + '.prot'), odir(name + '.sen')) elif func_name == 'mapping_files': path = 'Data/Mapping/' items = (('ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz', 'idmapping.dat.sort'), ('ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz', 'gene_info'), ('ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/PMC-ids.csv.gz', 'PMC-ids.csv'), ('ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/d2011.bin', 'd2011.bin')) for url, ofile in items: yield None, os.path.join(path, ofile), url, path
duration_index = np.where(amp_env > rip_duration_thresh) #deblipping eliminates short off conditions when the signal is high deblipped_detection = removeBlips(ts, detection_index, 50) deblipped_duration = removeBlips(ts, duration_index, 50) #grouping keeps indicators of at a minimum duration (35ms) detection_indicator = min_duration_indicator(ts,np.where(deblipped_detection==1)[0], 35) duration_indicator = min_duration_indicator(ts, np.where(deblipped_duration==1)[0], 85) #rip_label indentifies each separate ripple; the height of the square reps the rip number #like an indicator function with labels rip_labels = np.zeros(len(ts), dtype=np.uint16) #group duration_indicators grouped_rips = gu.group_consecutives(np.where(duration_indicator==1)[0]) for i,g in enumerate(grouped_rips): rip_labels[g] = i durations = duration_indicator * rip_labels detections = detection_indicator * rip_labels unique_durations = np.unique(durations) unique_detections = np.unique(detections) rips = np.intersect1d(unique_durations, unique_detections) print(len(rips)) ''' #remove detections that are not in rips for rip in rips:
def download_files(ifile, ofile, url, path): """Downloads the mapping files needed for various steps""" GeneralUtils.download_file(path, url, sort = ofile.endswith('.sort')) GeneralUtils.touch(ofile)
def Log(message, user='******'): dateStr = GeneralUtils.GetDateStrPretty(datetime.now()) with open(LOGFILENAME, 'a') as file: file.write(' - '.join((dateStr, user, message))) file.write('\n')
def download_files(ifile, ofile, url, path): """Downloads the mapping files needed for various steps""" GeneralUtils.download_file(path, url, sort=ofile.endswith(".sort")) GeneralUtils.touch(ofile)