示例#1
0
def download_pmids(ifile, ofile, odir):
    """Downloads the raw Pubmed XML data.

    Arguements:
    ifile -- The input file in which each line contains a SINLGE PMID/PMCID to download.
    ofile -- The sentinal file to touch when finished
    odir -- The output directoy to download all XML files into.
    """

    needed_ids = set()
    with open(ifile) as handle:
        for line in handle:
            needed_ids.add(line.strip())

    present_ids = set()
    for f in (x for x in os.listdir(odir) if x.endswith(".xml")):
        present_ids.add(f.split(".")[0])

    name_fun = partial(os.path.join, odir)
    needed_ids -= present_ids
    pmids = (x for x in needed_ids if not x.startswith("PMC"))
    pmcs = (x for x in needed_ids if x.startswith("PMC"))
    for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db="pubmed"):
        with open(name_fun(pmid + ".xml"), "w") as handle:
            handle.write(xml)
    for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db="pmc"):
        with open(name_fun(pmid + ".xml"), "w") as handle:
            handle.write(xml)

    GeneralUtils.touch(ofile)
示例#2
0
def process_mut_file(ifile, ofiles):
    """Processes mut files and checks them for mentions of protein-names.

    Arguments:
    ifile -- The input Mutation file with text and mutation mentions.
    ofiles -- A 2-tuple (result-file, sentinal-file)
    """

    with open(ifile) as handle:
        rows = list(csv.DictReader(handle, delimiter="\t"))
    if rows:
        ofields = ("ParNum", "SentNum", "Mutation", "Swissprot", "ProtText", "Text", "Mesh")
        writer = csv.DictWriter(open(ofiles[0], "w"), ofields, delimiter="\t")
        writer.writerow(dict(zip(ofields, ofields)))
        sent_list = [x["Text"] for x in rows]

        swiss_it = WhatizitUtils.ask_whatizit(sent_list, pipeline="whatizitSwissprot")
        mesh_it = WhatizitUtils.ask_whatizit(sent_list, pipeline="whatizitMeshUp")
        try:
            for row, swiss_group, mesh_group in izip(rows, swiss_it, mesh_it):
                if swiss_group:
                    meshterms = "|".join(x[1] for x in mesh_group)
                    for prot_text, reslist in swiss_group:
                        for res in reslist:
                            row["Swissprot"] = res
                            row["ProtText"] = prot_text
                            row["Mesh"] = meshterms
                            writer.writerow(row)
        except HTMLParseError:
            pass
        GeneralUtils.touch(ofiles[1])
    else:
        for f in ofiles:
            GeneralUtils.touch(f)
示例#3
0
def download_pmids(ifile, ofile, odir):
    """Downloads the raw Pubmed XML data.

    Arguements:
    ifile -- The input file in which each line contains a SINLGE PMID/PMCID to download.
    ofile -- The sentinal file to touch when finished
    odir -- The output directoy to download all XML files into.
    """
   
    needed_ids = set()
    with open(ifile) as handle:
        for line in handle:
            needed_ids.add(line.strip())

    present_ids = set()
    for f in (x for x in os.listdir(odir) if x.endswith('.xml')):
        present_ids.add(f.split('.')[0])

    name_fun = partial(os.path.join, odir)
    needed_ids -= present_ids
    pmids = (x for x in needed_ids if not x.startswith('PMC'))
    pmcs = (x for x in needed_ids if x.startswith('PMC'))
    for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db = 'pubmed'):
        with open(name_fun(pmid+'.xml'), 'w') as handle:
            handle.write(xml)
    for xml, pmid in PubmedUtils.GetXMLfromList(pmids, db = 'pmc'):
        with open(name_fun(pmid+'.xml'), 'w') as handle:
            handle.write(xml)
            
    GeneralUtils.touch(ofile)
示例#4
0
def convert_results(ifiles, ofile):
    
    with open(ifiles[0]) as handle:
        uniprot_ids = [x['Swissprot'] for x in csv.DictReader(handle, delimiter = '\t')]
    uniprot2entrez = UniprotUtils.uniprot_to_entrez(uniprot_ids)    
    print 'got entrez mapping', len(uniprot2entrez)    
    uniprot2symbol = UniprotUtils.uniprot_to_symbol(uniprot_ids, 
                                                    uniprot2entrez = uniprot2entrez, 
                                                    with_taxid = True)
    print 'got symbol mapping', len(uniprot2symbol)
    text_normalizer = GeneralUtils.get_known_mappings(ifiles[3])
    convfields = ('Article', 'ParNum', 'SentNum', 'Mutation', 'Swissprot', 'ProtText', 'GeneID', 'Symbol', 'Taxid', 'Mesh')
    with open(ofile, 'w') as conv_handle:
        conv_writer = csv.DictWriter(conv_handle, convfields, delimiter = '\t', extrasaction = 'ignore')
        conv_writer.writerow(dict(zip(convfields, convfields)))
        with open(ifiles[0]) as ihandle:
            for row in csv.DictReader(ihandle, delimiter = '\t'):
                for geneid, genesym in zip(uniprot2entrez[row['Swissprot']], uniprot2symbol[row['Swissprot']]):
                    row['GeneID'] = geneid
                    row['Symbol'] = genesym[0]
                    row['Taxid'] = genesym[1]
                    txt = row['ProtText'].lower()
                    if row['ProtText'].endswith('s'):
                        txt = txt[:-1]
                    if txt in text_normalizer:
                        txt = text_normalizer[txt]
                        row['Symbol'] = txt
                    row['ProtText'] = txt
                    conv_writer.writerow(row)
示例#5
0
 def __tm2Sql(self, tm):
     conditions = []
     if "filter" in self.queryRequirements[tm].keys():
         conditions = self.__generateFilters(
             self.queryRequirements[tm]["filter"])
     result = {
         "source": "",
         "select": [],
         "conditions": conditions,
         "alias": []
     }
     subject = self.mapping["mappings"][tm]["s"]
     result["source"] = self.mapping["mappings"][tm]["sources"][0]["table"]
     predicateObjects = self.mapping["mappings"][tm]["po"]
     cols = utils.cleanColPattern(subject)  #Getting Subject
     for col in cols:
         uri = tm + "_" + col
         result["alias"].append(uri)
     result["select"].append({
         "type":
         "mandatory",
         "columns":
         cols,
         "variable":
         self.queryRequirements[tm]["subjectVar"]
     })
     for po in predicateObjects:
         if (type(po) is not dict):
             tpoType = "mandatory" if po[0] in self.queryRequirements[tm][
                 "mandatory"]["predicates"] else "optional"
             predicatePosition = self.queryRequirements[tm][tpoType][
                 "predicates"].index(po[0])
             cols = utils.cleanColPattern(po)
             for col in cols:
                 uri = tm + "_" + col
                 result["alias"].append(uri)
             if (len(cols) > 0):
                 result["select"].append({
                     "type":
                     tpoType,
                     "columns":
                     cols,
                     "variable":
                     self.queryRequirements[tm][tpoType]["objects"]
                     [predicatePosition]
                 })
     self.sql[tm] = result
示例#6
0
async def GetInvalidPackages(ctx):
    invalidList = OracleConn.GetInvalidPackages()
    newLine = '\n'
    queryTime = GeneralUtils.GetDateStrPretty(datetime.now())
    if invalidList:
        msg = f'**Invalid Packages as of {queryTime}**```{newLine}{newLine.join(invalidList)}```'
    else:
        msg = 'Bütün paketlerin durumu iyi maşallah.'
    await ctx.send(msg)
示例#7
0
 def simplifyMappingAccordingToQuery(self, uris, splitedUris):
     self.splitedUris = splitedUris
     newMapping = {'prefixes': self.yarrrml['prefixes'], 'mappings': {}}
     #print('MAPPING:\n' + str(mapping).replace('\'', '"'))
     #sys.exit()
     if (not utils.checkEmptyUris(uris)):
         uris = self.__getTMsfromQueryUris(uris)
         for subject in uris.keys():
             for tm in uris[subject]['TMs']:
                 #print('SUBJECT:' + str(subject))
                 if (uris[subject]['fullTM']):
                     #print('***********************1*******************')
                     if (tm not in newMapping['mappings'].keys()):
                         newMapping['mappings'][tm] = {
                             'sources':
                             self.yarrrml['mappings'][tm]['sources'],
                             's': self.yarrrml['mappings'][tm]['s'],
                             'po': []
                         }
                     newMapping['mappings'][tm]['po'] = self.yarrrml[
                         'mappings'][tm]['po']
                     #print(str(newMapping).replace('\'', '"'))
                 else:
                     for po in self.yarrrml['mappings'][tm]['po']:
                         if (utils.isPoInUris(po, uris[subject]['uris'])):
                             #print('*****************2*******************+')
                             if (tm not in newMapping['mappings'].keys()):
                                 newMapping['mappings'][tm] = {
                                     'sources':
                                     self.yarrrml['mappings'][tm]
                                     ['sources'],
                                     's':
                                     self.yarrrml['mappings'][tm]['s'],
                                     'po': []
                                 }
                             if (po not in newMapping['mappings'][tm]['po']
                                 ):
                                 newMapping['mappings'][tm]['po'].append(po)
         #print('MAPPING:\n' + str(newMapping).replace('\'', '"'))
     newMapping = self.__removeEmptyTM(newMapping)
     newMapping = self.__addReferencesOfTheJoins(newMapping)
     self.simplifiyedYarrrml = newMapping
     self.__removeDuplicatedUris(uris)
示例#8
0
 def __getTMsfromQueryUris(self, uris):
     for subject in uris:
         uris[subject]['TMs'] = []
         for tm in self.yarrrml['mappings']:
             tmUris = utils.getUrisFromTM(self.yarrrml['mappings'][tm])
             if len(list(set(tmUris) & set(uris[subject]['uris']))) == len(
                     uris[subject]['uris']):
                 uris[subject]['TMs'].append(tm)
                 if (tm != subject):
                     self.splitedUris[tm] = self.splitedUris[subject]
     return uris
示例#9
0
def merge_results(ifiles, ofiles):
    """Merges the results of the protein-name recognition into one file.

    Arguments:
    ifiles -- A list of ALL results from process_mut_file to aggregate together.
    ofiles -- A 2-tuple (merged-file, sentinal-file)
    """

    with open(ofiles[0], "w") as ohandle:
        ofields = ("Article", "ParNum", "SentNum", "Mutation", "Swissprot", "ProtText", "Mesh")
        writer = csv.DictWriter(ohandle, ofields, delimiter="\t", extrasaction="ignore")
        writer.writerow(dict(zip(ofields, ofields)))
        for f in ifiles:
            with open(f) as handle:
                rows = list(csv.DictReader(handle, delimiter="\t"))
            art = f.split(os.sep)[-1].split(".")[0]
            for row in rows:
                row["Article"] = art
                writer.writerow(row)

    GeneralUtils.touch(ofiles[1])
示例#10
0
def min_duration_indicator(ts, index, thresh):
  """Returns groups of an indicator function that are  > thresh."""
  indicator = np.zeros(len(ts))
  indicator[index] = 1
 
  grouped = gu.group_consecutives(index)
  for g in grouped:
#    print(ts[g[-1]]-ts[g[0]])
    if (ts[g[-1]]-ts[g[0]] < thresh):
      indicator[g] = 0
 
  return indicator
示例#11
0
def merge_results(ifiles, ofiles):
    """Merges the results of the protein-name recognition into one file.

    Arguments:
    ifiles -- A list of ALL results from process_mut_file to aggregate together.
    ofiles -- A 2-tuple (merged-file, sentinal-file)
    """

    with open(ofiles[0], 'w') as ohandle:
        ofields = ('Article', 'ParNum', 'SentNum', 'Mutation', 'Swissprot', 'ProtText', 'Mesh')
        writer = csv.DictWriter(ohandle, ofields, delimiter = '\t',
                                    extrasaction = 'ignore')
        writer.writerow(dict(zip(ofields, ofields)))
        for f in ifiles:
            with open(f) as handle:
                rows = list(csv.DictReader(handle, delimiter = '\t'))
            art = f.split(os.sep)[-1].split('.')[0]
            for row in rows:
                row['Article'] = art
                writer.writerow(row)
    
    GeneralUtils.touch(ofiles[1])    
示例#12
0
 def __checkIfReferenceIsDefined(self, storedTm, mapping, o):
     newMapping = mapping.copy()
     #print('\n\nO:\n\n' + str(o))
     print(o)
     joinReferences = utils.getJoinReferences(o)
     tmName = o['mapping']
     #print('\n\n\nJOIN REFERENCES:\n\n\n' + str(joinReferences))
     if (tmName not in storedTm.keys()):
         storedTm[tmName] = self.yarrrml['mappings'][tmName]
         storedTm[tmName]['po'] = []
         if (tmName in mapping['mappings'].keys()):
             storedTm[tmName] = mapping['mappings'][tmName]
     if ((tmName not in newMapping['mappings'].keys()
          or joinReferences['outerRef'] not in utils.getColPatterns(
              newMapping['mappings'][tmName]))
             and joinReferences['outerRef'] not in utils.getColPatterns(
                 storedTm[tmName])):
         #print('BUSCAMOS:' + str(joinReferences['outerRef']))
         for i, po in enumerate(
                 self.yarrrml['mappings'][o['mapping']]['po']):
             if (joinReferences['outerRef'] in utils.getColPatterns(po)):
                 #print('Hay que añadir a: \n' + str(po))
                 storedTm[tmName]['po'].append(po)
     return storedTm
示例#13
0
def process_mut_file(ifile, ofiles):
    """Processes mut files and checks them for mentions of protein-names.

    Arguments:
    ifile -- The input Mutation file with text and mutation mentions.
    ofiles -- A 2-tuple (result-file, sentinal-file)
    """
    
    with open(ifile) as handle:
        rows = list(csv.DictReader(handle, delimiter = '\t'))
    if rows:
        ofields = ('ParNum', 'SentNum', 'Mutation', 'Swissprot', 'ProtText', 'Text', 'Mesh')
        writer = csv.DictWriter(open(ofiles[0], 'w'), ofields, delimiter = '\t')
        writer.writerow(dict(zip(ofields, ofields)))
        sent_list = [x['Text'] for x in rows]

        swiss_it = WhatizitUtils.ask_whatizit(sent_list, 
                            pipeline = 'whatizitSwissprot')
        mesh_it = WhatizitUtils.ask_whatizit(sent_list, 
                            pipeline = 'whatizitMeshUp')
        try:
            for row, swiss_group, mesh_group in izip(rows, swiss_it, mesh_it):
                if swiss_group:
                    meshterms = '|'.join(x[1] for x in mesh_group)
                    for prot_text, reslist in swiss_group:
                        for res in reslist:
                            row['Swissprot'] = res
                            row['ProtText'] = prot_text
                            row['Mesh'] = meshterms
                            writer.writerow(row)
        except HTMLParseError:
            pass
        GeneralUtils.touch(ofiles[1])
    else:
        for f in ofiles:
            GeneralUtils.touch(f)
示例#14
0
def returnSpikeLocations(spikes, moving):
    global ts
    moving = []
    stopped = []

    for spike in spikes:
        closest_t = gu.take_Closest(ts, spike)
        indx = np.where(ts == closest_t)[0][0]
        #print("i {}".format(x[i]))
        if indx in imoving:
            moving.append(indx)
        else:
            stopped.append(indx)

    return moving, stopped
示例#15
0
def convert_results(ifiles, ofile):

    with open(ifiles[0]) as handle:
        uniprot_ids = [x["Swissprot"] for x in csv.DictReader(handle, delimiter="\t")]
    uniprot2entrez = UniprotUtils.uniprot_to_entrez(uniprot_ids)
    print "got entrez mapping", len(uniprot2entrez)
    uniprot2symbol = UniprotUtils.uniprot_to_symbol(uniprot_ids, uniprot2entrez=uniprot2entrez, with_taxid=True)
    print "got symbol mapping", len(uniprot2symbol)
    text_normalizer = GeneralUtils.get_known_mappings(ifiles[3])
    convfields = (
        "Article",
        "ParNum",
        "SentNum",
        "Mutation",
        "Swissprot",
        "ProtText",
        "GeneID",
        "Symbol",
        "Taxid",
        "Mesh",
    )
    with open(ofile, "w") as conv_handle:
        conv_writer = csv.DictWriter(conv_handle, convfields, delimiter="\t", extrasaction="ignore")
        conv_writer.writerow(dict(zip(convfields, convfields)))
        with open(ifiles[0]) as ihandle:
            for row in csv.DictReader(ihandle, delimiter="\t"):
                for geneid, genesym in zip(uniprot2entrez[row["Swissprot"]], uniprot2symbol[row["Swissprot"]]):
                    row["GeneID"] = geneid
                    row["Symbol"] = genesym[0]
                    row["Taxid"] = genesym[1]
                    txt = row["ProtText"].lower()
                    if row["ProtText"].endswith("s"):
                        txt = txt[:-1]
                    if txt in text_normalizer:
                        txt = text_normalizer[txt]
                        row["Symbol"] = txt
                    row["ProtText"] = txt
                    conv_writer.writerow(row)
示例#16
0
 def ___extractTriplePatternUris(self, result, el):
     if('triples' in el.keys()):
         for tm in el['triples']:
             subject = tm['subject']['value']
             uri = tm['predicate']['value']
             if(subject not in result.keys()):
                 result[subject] = {'uris':[], 'fullTM':False}
 #                TMOfJoin = getSubjectInsideTPO(s,query)
 #                if(TMOfJoin != '')):
 #                    findSubjectOfJoin(TMOfJoin, mapping)
 #            if(isUri(subject)):
 #               subjectUri = findSubjectInMapping(subject, mapping) TODO
 #                result[subject]['uris'].append(subject)
             if(utils.isUri(uri)):
                 if(uri == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'):
                     uri = tm['object']['value']
                     if not  'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' in result[subject]['uris']:
                         result[subject]['uris'].append('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
                 if(not uri in result[subject]['uris']):
                     result[subject]['uris'].append(uri)
             else:
                 result[subject]['fullTM'] = True
     return result
xsmooth = np.abs(np.convolve(x, np.ones(100, dtype=np.int), 'valid')) / 100
print(ts[0]) / 10000
print(ts[-1]) / 10000

eegfile = sys.argv[1]
eegdata, eegtimestamps = eeg.readEEG(eegfile)
#print(eegtimestamps[0])
#print(eegtimestamps[-1])

fs = 512 / .257552
print(len(eegdata))
f, spec_t, Sxx = signal.spectrogram(eegdata, fs, nperseg=1024, noverlap=512)
#print(t[0])
#print(t[-1])

closest_start = gu.take_Closest(spec_t, ts[0] / 10000)
spec_start_idx = np.where(spec_t == closest_start)[0][0]

closest_stop = gu.take_Closest(spec_t, ts[-1] / 10000)
spec_stop_idx = np.where(spec_t == closest_stop)[0][0]

#new_spec_t is spec_t made to fit the length of xsmooth
new_spec_t = np.linspace(spec_t[spec_start_idx], spec_t[spec_stop_idx],
                         len(xsmooth))

print(len(new_spec_t))
print(len(xsmooth))

#fband=4
#theta = Sxx[4,:]+Sxx[5,:]/2
theta = Sxx[:, spec_start_idx:spec_stop_idx]
示例#18
0
    ttfile = tfile.replace(tfile[tfile.find('_'):], '.ntt')
    ts, waveforms = trode.readTetrode(ttfile)

    spikes = trode.readTFile(tfile)

    if len(spikes) > 500:
        spikes = spikes[:500]

    smallwave = np.zeros([4, 32, len(spikes)])

    ##find the nearest waveform timestamp to tfile timestamp

    for indx, spike in enumerate(spikes):

        closest = gu.take_Closest(ts, spike * 100)
        ts_index = np.where(ts == closest)[0]

        smallwave[:, :, indx] = waveforms[:, :, ts_index[0]]

    ##computer mean waveform for each channel
    wave_mean = np.zeros([4, 32])

    for i in range(4):
        wave_mean[i, :] = np.mean(smallwave[i, :, :], axis=1)

    FWHM_pre, FWHM_post, spike_width = calculate_FWHM(wave_mean)
    print("spike width FWHM: ", spike_width)

    #make the super resolution spike on four channels
    #this allows the FWHM points to be plotted
s1_fr = len(s1spikes) / ((vts[start] - vts[0]) / 10000)
m_fr = len(mspikes) / ((vts[stop] - vts[start]) / 10000)
s2_fr = len(s2spikes) / ((vts[-1] - vts[stop]) / 10000)

##segregate spikes in to s1, maze, s2

posx_left = []
posy_left = []

posx_right = []
posy_right = []

#no need to run time-consuming place analysis on interneurons (>5HZ)

for spike in mspikes:
    closest_t = gu.take_Closest(ts, spike)
    indx, = np.where(ts == closest_t)
    #print("i {}".format(x[i]))
    if indx in imoving:
        if direction[indx] == 0:
            posx_left.append(x[indx])
            posy_left.append(y[indx])
        else:
            posx_right.append(x[indx])
            posy_right.append(x[indx])

occhist_left, _ = np.histogram(x[imoving_left], bins=bins)
occhist_right, _ = np.histogram(x[imoving_right], bins=bins)
occhist_left = occhist_left * .016666
occhist_right = occhist_right * .016666
示例#20
0
                             'valid')) / smoothfac

#use later to detect direction
#direction = np.where(np.diff(xsmooth)>0, 1, 0)

cum = np.cumsum(np.abs(np.diff(xsmooth)))
cum_t = np.linspace(ts[0], ts[-1], num=len(cum))

inotmoving = np.where(np.diff(cum) < .1)[0]
imoving = np.where(np.diff(cum) >= .1)[0]

#plt.plot(cum_t,cum)
#plt.plot(cum_t[inotmoving], cum[inotmoving], 'r.')
#plt.show()

rests = gu.group_consecutives(inotmoving)

starts = []
stops = []
seq = []

for rest in rests:
    #take a maximum of 100 samples at the rest point
    #note this kicks out a warning sometimes; mean of empty slices
    endpt = rest[0] + 100 if rest[-1] - rest[0] > 100 else rest[-1]

    if np.mean(xsmooth[rest[0]:endpt]) > 60:
        #print("Goal 1")
        seq.append(1)
    elif np.mean(xsmooth[rest[0]:endpt]) < 8:
        #print("Goal 2")
示例#21
0
def FileIter(func_name):
    """A general iterator for all of the ruffus functions in the pipeline."""

    if func_name == 'convert_pmids_to_pmcs':
        sdir = partial(os.path.join, 'Data', 'SearchResults')
        pmc_file = os.path.join('Data', 'PMC-ids.csv')
        files = [x for x in os.listdir(sdir('')) if x.endswith('.res')]
        for f in files:
            yield (sdir(f), pmc_file), sdir(f + '.conv')

    elif func_name == 'search_pubmed':
        sdir = partial(os.path.join, 'Data', 'SearchResults')
        queryfile = os.path.join('Data', 'QueryList.txt')
        with open(queryfile) as handle:
            for row in csv.DictReader(handle):
                fname = '%s--%s.res' % (GeneralUtils.slugify(
                    row['org']), GeneralUtils.slugify(row['search']))
                ofile = sdir(fname)
                yield queryfile, ofile, row['search']

    elif func_name == 'download_pmids':

        sdir = partial(os.path.join, 'Data', 'SearchResults')
        odir = os.path.join('Data', 'RawXML')
        files = [x for x in os.listdir(sdir('')) if x.endswith('.conv')]

        for f in files:
            yield sdir(f), sdir(f + '.dl'), odir

    elif func_name == 'extract_text':

        sdir = partial(os.path.join, 'Data', 'RawXML')
        odir = partial(os.path.join, 'Data', 'SentenceFiles')

        files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.xml')])
        for f in files:
            name = f.split('.')[0]
            if f.startswith('PMC'):
                typ = 'pmc'
            else:
                typ = 'pubmed'

            yield sdir(f), odir(name + '.sent'), typ

    elif func_name == 'get_mutations':

        sdir = partial(os.path.join, 'Data', 'SentenceFiles')
        odir = partial(os.path.join, 'Data', 'MutFiles')
        finder = None  #mutfinder_gen('regex.txt')

        files = sorted(
            [x for x in os.listdir(sdir('')) if x.endswith('.sent')])

        for f in files:
            name = f.split('.')[0]
            yield sdir(f), odir(name + '.mut')

    elif func_name == 'process_mut_file':

        sdir = partial(os.path.join, 'Data', 'MutFiles')
        odir = partial(os.path.join, 'Data', 'ProteinFiles')

        files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.mut')])

        for f in files:
            name = f.split('.')[0]
            yield sdir(f), (odir(name + '.prot'), odir(name + '.sen'))
    elif func_name == 'mapping_files':
        path = 'Data/Mapping/'
        items = ((
            'ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz',
            'idmapping.dat.sort'),
                 ('ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz',
                  'gene_info'),
                 ('ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/PMC-ids.csv.gz',
                  'PMC-ids.csv'),
                 ('ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/d2011.bin',
                  'd2011.bin'))
        for url, ofile in items:
            yield None, os.path.join(path, ofile), url, path
import VideoUtils as vu
import numpy as np
import GeneralUtils as gu
from matplotlib import pyplot as plt

makeplot = 0
#pvdfile = './RawData/dwP.pvd'
pvdfile = './RawData/sleep_dwPout.pvd'
ts, x, y = vu.readPVDfile(pvdfile)
x /= 8.2
xsmooth = np.abs(np.convolve(x, np.ones(100, dtype=np.int), 'valid')) / 100

cum = np.cumsum(np.abs(np.diff(xsmooth)))
inotmoving = np.where(np.diff(cum) < .025)[0]

grouped = gu.group_consecutives(inotmoving)

starts = []
stops = []

for group in grouped:
    if group[-1] - group[0] > 1000:
        starts.append(ts[group[0]])
        stops.append(ts[group[-1]])
        # minix = x[group[0]:group[-1]]
        if makeplot == 1:
            plt.plot(x[group[0]:group[-1]], y[group[0]:group[-1]], 'b.')
            plt.ylim([0, 480])
            plt.xlim([0, 640])
            plt.show()
示例#23
0
def FileIter(func_name):
    """A general iterator for all of the ruffus functions in the pipeline."""
    
    if func_name == 'convert_pmids_to_pmcs':
        sdir = partial(os.path.join,'Data', 'SearchResults')
        pmc_file = os.path.join('Data', 'PMC-ids.csv')
        files = [x for x in os.listdir(sdir('')) if x.endswith('.res')]
        for f in files:
            yield (sdir(f), pmc_file), sdir(f+'.conv')

    elif func_name == 'search_pubmed':
        sdir = partial(os.path.join,'Data', 'SearchResults')
        queryfile = os.path.join('Data', 'QueryList.txt')
        with open(queryfile) as handle:
            for row in csv.DictReader(handle):
                fname = '%s--%s.res' % (GeneralUtils.slugify(row['org']), 
                                        GeneralUtils.slugify(row['search']))
                ofile = sdir(fname)
                yield queryfile, ofile, row['search']

    elif func_name == 'download_pmids':
        
        sdir = partial(os.path.join,'Data', 'SearchResults')
        odir = os.path.join('Data', 'RawXML')
        files = [x for x in os.listdir(sdir('')) if x.endswith('.conv')]
        
        for f in files:
            yield sdir(f), sdir(f+'.dl'), odir

    elif func_name == 'extract_text':
        
        sdir = partial(os.path.join, 'Data', 'RawXML')
        odir = partial(os.path.join, 'Data', 'SentenceFiles')

        files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.xml')])
        for f in files:
            name = f.split('.')[0]
            if f.startswith('PMC'):
                typ = 'pmc'
            else:
                typ = 'pubmed'

            yield sdir(f), odir(name+'.sent'), typ

    elif func_name == 'get_mutations':
        
        sdir = partial(os.path.join, 'Data', 'SentenceFiles')
        odir = partial(os.path.join, 'Data', 'MutFiles')
        finder = None#mutfinder_gen('regex.txt')

        files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.sent')])

        for f in files:
            name = f.split('.')[0]
            yield sdir(f), odir(name + '.mut')
        
    elif func_name == 'process_mut_file':
        
        sdir = partial(os.path.join, 'Data', 'MutFiles')
        odir = partial(os.path.join, 'Data', 'ProteinFiles')

        files = sorted([x for x in os.listdir(sdir('')) if x.endswith('.mut')])

        for f in files:
            name = f.split('.')[0]
            yield sdir(f), (odir(name + '.prot'), odir(name + '.sen'))
    elif func_name == 'mapping_files':
        path = 'Data/Mapping/'
        items = (('ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz', 'idmapping.dat.sort'),
                    ('ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz', 'gene_info'),
                    ('ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/PMC-ids.csv.gz', 'PMC-ids.csv'),
                    ('ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/d2011.bin', 'd2011.bin'))
        for url, ofile in items:
            yield None, os.path.join(path, ofile), url, path
示例#24
0
duration_index = np.where(amp_env > rip_duration_thresh)

#deblipping eliminates short off conditions when the signal is high
deblipped_detection = removeBlips(ts, detection_index, 50)
deblipped_duration  = removeBlips(ts, duration_index, 50)

#grouping keeps indicators of at a minimum duration (35ms)
detection_indicator = min_duration_indicator(ts,np.where(deblipped_detection==1)[0], 35)
duration_indicator = min_duration_indicator(ts, np.where(deblipped_duration==1)[0], 85)

#rip_label indentifies each separate ripple; the height of the square reps the rip number
#like an indicator function with labels
rip_labels = np.zeros(len(ts), dtype=np.uint16)

#group duration_indicators 
grouped_rips = gu.group_consecutives(np.where(duration_indicator==1)[0])

for i,g in enumerate(grouped_rips):
  rip_labels[g] = i
   
durations = duration_indicator * rip_labels
detections = detection_indicator * rip_labels

unique_durations = np.unique(durations)
unique_detections = np.unique(detections)

rips = np.intersect1d(unique_durations, unique_detections)
print(len(rips))
'''
#remove detections that are not in rips
for rip in rips:
示例#25
0
def download_files(ifile, ofile, url, path):
    """Downloads the mapping files needed for various steps"""
    
    GeneralUtils.download_file(path, url, sort = ofile.endswith('.sort'))
    GeneralUtils.touch(ofile)
示例#26
0
def Log(message, user='******'):
    dateStr = GeneralUtils.GetDateStrPretty(datetime.now())

    with open(LOGFILENAME, 'a') as file:
        file.write(' - '.join((dateStr, user, message)))
        file.write('\n')
示例#27
0
def download_files(ifile, ofile, url, path):
    """Downloads the mapping files needed for various steps"""

    GeneralUtils.download_file(path, url, sort=ofile.endswith(".sort"))
    GeneralUtils.touch(ofile)