Пример #1
0
def main():
    args = getargv()

    #quantify output
    matrix_map = map.map(args['fi'])

    matrix_inten_Top = TopInten(matrix_map, args['method'], args['n'],
                                args['rank'])

    tag = args['method'] + '_' + str(args['n']) + '_' + args['rank']

    writematrix(matrix_inten_Top, args, tag)

    #probe information

    probe_fo = args['fopath'] + args[
        'label'] + '_probe_information.txt' if 'label' in args else args[
            'fopath'] + 'probe_information.txt'
    annot_dir = probe(matrix_map, probe_fo)

    annot_file = args['fopath'] + args[
        'label'] + '_annotation.txt' if 'label' in args else args[
            'fopath'] + 'annotation.txt'
    label = args['label'] if 'label' in args else ''
    annotate(annot_dir, annot_file, label)

    print('Done!')
Пример #2
0
def main():
    # parse command
    command_log = 'CIRCexplorer parameters: ' + ' '.join(sys.argv)
    if len(sys.argv) == 1:
        sys.exit(help_doc)
    elif sys.argv[1] == '--version' or sys.argv[1] == '-v':
        sys.exit(__version__)
    elif sys.argv[1] == 'align':
        import align
        align.align(docopt(align.__doc__, version=__version__),
                    command=command_log,
                    name='align')
    elif sys.argv[1] == 'parse':
        import parse
        parse.parse(docopt(parse.__doc__, version=__version__),
                    command=command_log,
                    name='parse')
    elif sys.argv[1] == 'annotate':
        import annotate
        annotate.annotate(docopt(annotate.__doc__, version=__version__),
                          command=command_log,
                          name='annotate')
    elif sys.argv[1] == 'assemble':
        import assemble
        assemble.assemble(docopt(assemble.__doc__, version=__version__),
                          command=command_log,
                          name='assemble')
    elif sys.argv[1] == 'denovo':
        import denovo
        denovo.denovo(docopt(denovo.__doc__, version=__version__),
                      command=command_log,
                      name='denovo')
    else:
        sys.exit(help_doc)
def test_annotate():
    path = 'images/for_compression/marmite_500x500.jpg'
    path_annotated = 'images/for_compression/marmite_500x500_annotated.jpg'
    copyfile(path, path_annotated)
    annotate(path_annotated, terms=10, compression=12.2, fontsize=30)
    assert os.path.exists(path_annotated)
    os.remove(path_annotated)
Пример #4
0
def main():
    # parse command
    command_log = 'CIRCexplorer parameters: ' + ' '.join(sys.argv)
    if len(sys.argv) == 1:
        sys.exit(help_doc)
    elif sys.argv[1] == '--version' or sys.argv[1] == '-v':
        sys.exit(__version__)
    elif sys.argv[1] == 'align':
        import align
        align.align(docopt(align.__doc__, version=__version__),
                    command=command_log, name='align')
    elif sys.argv[1] == 'parse':
        import parse
        parse.parse(docopt(parse.__doc__, version=__version__),
                    command=command_log, name='parse')
    elif sys.argv[1] == 'annotate':
        import annotate
        annotate.annotate(docopt(annotate.__doc__, version=__version__),
                          command=command_log, name='annotate')
    elif sys.argv[1] == 'assemble':
        import assemble
        assemble.assemble(docopt(assemble.__doc__, version=__version__),
                          command=command_log, name='assemble')
    elif sys.argv[1] == 'denovo':
        import denovo
        denovo.denovo(docopt(denovo.__doc__, version=__version__),
                      command=command_log, name='denovo')
    else:
        sys.exit(help_doc)
Пример #5
0
def start():
    setgx(newgx())

    print '*** SHED SKIN Python-to-C++ Compiler 0.7.1 ***'
    print 'Copyright 2005-2010 Mark Dufour; License GNU GPL version 3 (See LICENSE)'
    print

    # --- some checks
    major, minor = sys.version_info[:2]
    if (major, minor) not in [(2, 4), (2, 5), (2, 6), (2, 7)]:
        print '*ERROR* Shed Skin is not compatible with this version of Python'
        sys.exit(1)
    if sys.platform == 'win32' and os.path.isdir('c:/mingw'):
        print '*ERROR* please rename or remove c:/mingw, as it conflicts with Shed Skin'
        sys.exit()

    # --- command-line options
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'vbchef:wad:m:rolsp', ['help', 'extmod', 'nobounds', 'nowrap', 'flags=', 'dir=', 'makefile=', 'random', 'noassert', 'long', 'msvc', 'ann', 'strhash', 'iterations=', 'pypy'])
    except getopt.GetoptError:
        usage()

    for o, a in opts:
        if o in ['-h', '--help']: usage()
        if o in ['-b', '--nobounds']: getgx().bounds_checking = False
        if o in ['-e', '--extmod']: getgx().extension_module = True
        if o in ['-a', '--ann']: getgx().annotation = True
        if o in ['-d', '--dir']: getgx().output_dir = a
        if o in ['-l', '--long']: getgx().longlong = True
        if o in ['-w', '--nowrap']: getgx().wrap_around_check = False
        if o in ['-r', '--random']: getgx().fast_random = True
        if o in ['-o', '--noassert']: getgx().assertions = False
        if o in ['-p', '--pypy']: getgx().pypy = True
        if o in ['-m', '--makefile']: getgx().makefile_name = a
        if o in ['-s', '--strhash']: getgx().fast_hash = True
        if o in ['-v', '--msvc']: getgx().msvc = True
        if o in ['-i', '--iterations']: getgx().max_iterations = int(a)
        if o in ['-f', '--flags']:
            if not os.path.isfile(a):
                print "*ERROR* no such file: '%s'" % a
                sys.exit(1)
            getgx().flags = a

    # --- argument
    if len(args) != 1:
        usage()
    name = args[0]
    if not name.endswith('.py'):
        name += '.py'
    if not os.path.isfile(name):
        print "*ERROR* no such file: '%s'" % name
        sys.exit(1)
    getgx().main_mod = name[:-3]

    # --- analyze & annotate
    infer.analyze(name)
    annotate.annotate()
    cpp.generate_code()
    shared.print_errors()
Пример #6
0
def plotMap(im, vmin=0,vmax=1, cmap='bwr', nocbar=False, title=None,
            freqmask=slice(0,2**63-1)):
    #see http://matplotlib.org/1.4.2/mpl_toolkits/axes_grid/users/overview.html
    pylab.figure(figsize=(16,12))

    axmap = pylab.subplot(111)
    divider = make_axes_locatable(axmap)
    if not nocbar:
        cax = divider.append_axes("right", size="5%", pad=0.5)
    axins = divider.append_axes("left", size=0.5, pad=0.5, sharey=axmap)
    axdel = divider.append_axes("left", size=0.5, pad=0.15, sharey=axmap)
    axunr = divider.append_axes("left", size=0.5, pad=0.15, sharey=axmap)

    im = axmap.imshow(im, origin='lower', extent=(+0.5,L+0.5,+0.5,L+0.5), 
                      interpolation='nearest', cmap=cmap, 
                      vmin=vmin, vmax=vmax, aspect='equal')
    annotate(regions, lobediv, L, axmap)
    axmap.set_aspect(1)
    
    cbar = None
    if not nocbar:
        cbar = pylab.colorbar(im, cax=cax)
    if title:
        axmap.set_title(title)
    
    def wmean(s):
        num = sum((s*weights[:,newaxis])[freqmask], axis=0)
        return num/sum(freqweights[freqmask])

    #axins.barh(arange(L)+1, wmean(inserts), height=1, color='gray', ec='none')
    #axins.set_xlim(0,6)
    #axins.set_xticks([0,3,6])
    #axins.invert_xaxis()
    #axins.text(6*0.9, 1, 'Mean Number of Insertions', rotation='vertical',
    #           verticalalignment='bottom')
    #pylab.setp(axins.get_yticklabels(), visible=False)


    #axdel.barh(arange(L)+1, wmean(deletes), height=1, color='gray', ec='none')
    #axdel.set_xlim(0,1)
    #axdel.set_xticks([0,1])
    #axdel.invert_xaxis()
    #axdel.text(0.9, 1, 'Frequency of Deletions', rotation='vertical',
    #           verticalalignment='bottom')
    #pylab.setp(axdel.get_yticklabels(), visible=False)

    #axunr.barh(arange(L)+1, wmean(unresolveds), height=1,
    #           color='gray', ec='none')
    #axunr.set_xlim(0,1)
    #axunr.set_xticks([0,1])
    #axunr.invert_xaxis()
    #axunr.text(0.9, 1, 'Frequency of Unresolved', rotation='vertical',
    #           verticalalignment='bottom')
    #pylab.setp(axunr.get_yticklabels(), visible=False)

    #axmap.set_yticks(axmap.get_xticks())
    axmap.set_xlim(0.5,L+0.5)
    axmap.set_ylim(0.5,L+0.5)
    return axmap, axins, axdel, cbar
Пример #7
0
def start(gx, main_module_name):
    # --- analyze & annotate
    t0 = time.time()
    analyze(gx, main_module_name)
    annotate(gx)
    generate_code(gx)
    print_errors()
    logging.info('[elapsed time: %.2f seconds]', (time.time() - t0))
Пример #8
0
def start(gx, main_module_name):
    # --- analyze & annotate
    t0 = time.time()
    analyze(gx, main_module_name)
    annotate(gx)
    generate_code(gx)
    print_errors()
    logging.info('[elapsed time: %.2f seconds]', (time.time() - t0))
Пример #9
0
def start(gx, main_module_name):
    # --- analyze & annotate
    t0 = time.time()
    analyze(gx, main_module_name)
    annotate(gx)
    generate_code(gx)
    print_errors()
    if not gx.silent:
        print '[elapsed time: %.2f seconds]' % (time.time() - t0)
Пример #10
0
def main():
    setgx(newgx())

    print '*** SHED SKIN Python-to-C++ Compiler 0.5 ***'
    print 'Copyright 2005-2010 Mark Dufour; License GNU GPL version 3 (See LICENSE)'
    print

    # --- some checks
    major, minor = sys.version_info[:2]
    if (major, minor) not in [(2, 4), (2, 5), (2, 6)]:
        print '*ERROR* Shed Skin is not compatible with this version of Python'
        sys.exit(1)
    if sys.platform == 'win32' and os.path.isdir('c:/mingw'):
        print '*ERROR* please rename or remove c:/mingw, as it conflicts with Shed Skin'
        sys.exit()

    # --- command-line options
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'vbchef:wad:m:rl', [
            'extmod', 'nobounds', 'nowrap', 'flags=', 'dir=', 'makefile=',
            'random', 'long', 'msvc'
        ])
    except getopt.GetoptError:
        usage()

    for o, a in opts:
        if o in ['-h', '--help']: usage()
        if o in ['-b', '--nobounds']: getgx().bounds_checking = False
        if o in ['-e', '--extmod']: getgx().extension_module = True
        if o in ['-a', '--ann']: getgx().annotation = True
        if o in ['-d', '--dir']: getgx().output_dir = a
        if o in ['-l', '--long']: getgx().longlong = True
        if o in ['-w', '--nowrap']: getgx().wrap_around_check = False
        if o in ['-r', '--random']: getgx().fast_random = True
        if o in ['-m', '--makefile']: getgx().makefile_name = a
        if o in ['-v', '--msvc']: getgx().msvc = True
        if o in ['-f', '--flags']:
            if not os.path.isfile(a):
                print "*ERROR* no such file: '%s'" % a
                sys.exit(1)
            getgx().flags = a

    # --- argument
    if len(args) != 1:
        usage()
    name = args[0]
    if not name.endswith('.py'):
        name += '.py'
    if not os.path.isfile(name):
        print "*ERROR* no such file: '%s'" % name
        sys.exit(1)
    getgx().main_mod = name[:-3]

    # --- analyze & annotate
    infer.analyze(name)
    annotate.annotate()
    cpp.generate_code()
Пример #11
0
 def test_annotate(self):
     self.assertEqual(
         self.createSetFromString(
             annotate.annotate(
                 "SEM:TRA(0:) SEM:TRA(1:) SEM:TRA(1:) SEM:TRA(1:)",
                 "漢 漢10漢", "漢 漢 10 漢")),
         self.createSetFromString("0-0[sure] 1-1[sure]"))
     self.assertEqual(
         self.createSetFromString(
             annotate.annotate(
                 "SEM:TRA(0:) GIS:DEM(1:) GIS:DEM(1:) GIS:DEM(1:)",
                 "漢 漢10漢", "漢 漢 10 漢")),
         self.createSetFromString("0-0[sure] 1-1[possible]"))
Пример #12
0
def generate_features(thread, postNum):
    dataset = Dataset('convinceme',annotation_list=['topic'])
    directory = "{}/convinceme/output_by_thread".format(data_root_dir)
    topic = dataset.discussion_annotations[int(thread)]['topic']
    discussion = dataset.load_discussion(thread)
    post = discussion.posts[int(postNum)]
    try:
       post.loadPostStruct({"site": "/convinceme", "thread": int(thread)})
       text = post.pst.text
    except:
       text = post.text.replace('\r', '')
    try:
       parent = discussion.posts[int(post.parent_id)]
       parentText = parent.text
       parentAuthor = parent.author
       parentSide = parent.side
       parentId = "<a href=\"?thread=%s&post=%s\">[see post]</a>" % (thread, post.parent_id)
    except:
       parentText = "No parent"
       parentAuthor = parentSide = parentId = ""
    author = post.author
    side = post.side
    try:
      tups = tuples[thread][postNum][:-1] #lose the last one because of an odd spurious bound
    except KeyError:
      tups = []
    for tup in tups:
        tup[-1] = tup[-1].replace(' ', '_')
    try:
      annotatedText = annotate.annotate(text, tups)
      return produceHTML(author, side, tups, parentId, parentText, parentAuthor, parentSide, annotatedText)
    except:
      pdb.set_trace()
      return ""
Пример #13
0
    def extractTranscriptAndConcepts(self, video_path, ocr_on):

        aux = ""
        f2 = open(video_path + "transcript/transcript" + str(self.id) + ".txt")
        a = f2.read()

        words = tokenize.word_tokenize(a, language='english')
        words = [word.lower() for word in words if word.isalpha()]
        transcript = ' '.join(words)
        if not transcript:
            transcript = ''

        self.transcript = transcript
        f2.close()

        if (ocr_on):
            aux = ""
            f2 = open(video_path + "slides/frameb" + str(self.id) + ".txt")
            a = f2.read()

            words = tokenize.word_tokenize(a, language='english')
            words = [word.lower() for word in words if word.isalpha()]
            ocr = ' '.join(words)
            if not ocr:
                ocr = ''

            self.ocr = ocr
            f2.close()
        annotatedTerms = None
        if (ocr_on):
            annotatedTerms, depth = annotate.annotate(self.transcript,
                                                      self.ocr)
        else:
            annotatedTerms, depth = annotate.annotate(self.transcript,
                                                      self.transcript)

        conceptNodeList = []
        for j in range(len(annotatedTerms)):
            conceptNodeList.append(Concept_Node(j, annotatedTerms[j],
                                                depth[j]))
        self.stopwords = []

        self.conceptNodes = conceptNodeList
Пример #14
0
 def test_annotate(self):
     self.assertEqual(self.createSetFromString(annotate.annotate("SEM:TRA(0:) SEM:TRA(1:) SEM:TRA(1:) SEM:TRA(1:)", "漢 漢10漢", "漢 漢 10 漢")), self.createSetFromString("0-0[sure] 1-1[sure]"))
     self.assertEqual(self.createSetFromString(annotate.annotate("SEM:TRA(0:) GIS:DEM(1:) GIS:DEM(1:) GIS:DEM(1:)", "漢 漢10漢", "漢 漢 10 漢")), self.createSetFromString("0-0[sure] 1-1[possible]"))
Пример #15
0
import pandas as pd
import annotate
import genome_browser

snap_data_notin_grasp = pd.read_csv('snap_data_notin_grasp.txt', sep='\t').rename(columns={'pos_hg38': 'pos', 'rsID': 'rsid'})

# remove protein-coding SNPs
annotate_ob = annotate.annotate(snap_data_notin_grasp, 'hg38')
snap_data_w_annot = annotate_ob.fast_annotate_snp_list()
snap_data_w_annot = snap_data_w_annot.loc[snap_data_w_annot['annotation'] != 'pcexon']
snap_data_w_annot = snap_data_w_annot.sort_values(['query_snp_rsid', 'rsid']).reset_index(drop=True)
snap_data_w_annot.to_csv('snap_data_w_annot.txt', sep='\t', index=False)
print('[INFO] snap_data_w_annot.txt saved!')
Пример #16
0
def annotate(cmdline):
    from annotate import annotate

    survey = model.survey.Survey.load(cmdline['project'])

    return annotate(survey)
Пример #17
0
def annotate_command(options, command_log):
    from annotate import annotate
    options['--no-fix'] = False
    options['--low-confidence'] = False
    annotate(options, command=command_log, name='annotate')
Пример #18
0
def plotContacts(name):
    # map from pdb seq index to structural index (ie, from seqres index, to
    # resolved residue #)
    pdbseq2structMap = indmaps[name]
    fullalnseq = fullalnseqs[name]  #seq after alignment

    aln2fullalnMap = where([c.isupper() or c == '-' for c in fullalnseq])[0]
    pdbseq2fullalnMap = where([c not in ".-" for c in fullalnseq])[0]
    fullaln2pdbseqMap = -ones(len(fullalnseq), dtype=int)
    fullaln2pdbseqMap[pdbseq2fullalnMap] = arange(len(pdbseq2fullalnMap))
    aln2structMap = [
        pdbseq2structMap[fullaln2pdbseqMap[i]]
        if fullaln2pdbseqMap[i] != -1 else -1 for i in aln2fullalnMap
    ]

    pdbseq = [x for x in fullalnseq if x not in ".-"]
    alnseq = [x for x in fullalnseq if x not in "." and not x.islower()]

    distances = load(os.path.join(distpath, name + '.npy'))
    L = int(((1 + sqrt(1 + 8 * len(distances))) / 2) + 0.5)

    stroke = PathEffects.withStroke(linewidth=3, foreground="w")

    #form matrix
    distancesM = zeros((L, L), dtype=float)
    distancesM[triu_indices(L, k=1)] = distances
    distancesM = distancesM + distancesM.T

    ########################################
    # first, plot the raw contact map for the entire sequence
    # that is, the contact map size will be equal to the length of the seqres
    # header (minus junk) #shows inserts as red, missing residues as blue, and
    # deletes as dotted lines (or colored lines?)

    pylab.figure(figsize=(12, 12))
    ax = pylab.axes()
    trans = transforms.blended_transform_factory(ax.transAxes, ax.transData)

    sL = len(pdbseq2structMap)
    pairs = ((i, j) for i in range(sL - 1) for j in range(i + 1, sL))
    distmat = zeros((sL, sL)) * nan
    for n, (i, j) in enumerate(pairs):
        if pdbseq2structMap[i] == -1 or pdbseq2structMap[j] == -1:
            continue
        distmat[i, j] = distancesM[pdbseq2structMap[i], pdbseq2structMap[j]]
        distmat[j, i] = distancesM[pdbseq2structMap[j], pdbseq2structMap[i]]
    contacts = distmat < cutoff
    contacts[diag([i != -1 for i in pdbseq2structMap])] = True

    cim = ones(distmat.shape + (3, ))
    cim[contacts] = ones(3) * 0.4
    pylab.imshow(cim,
                 origin='lower',
                 extent=(+0.5, sL + 0.5, +0.5, sL + 0.5),
                 interpolation='nearest')

    #annotate inserts relative to alignment (red)
    inserts = where([c.islower() for c in pdbseq])[0]
    fillGroups(inserts, sL, (0.7, 0.4, 0.4, 0.3))

    #annotate missign residues (blue)
    missing = where([i == -1 for i in pdbseq2structMap])[0]
    fillGroups(missing, sL, (0.4, 0.4, 0.7, 0.3))

    #map from alignment index to *closest* pdb seq index
    def remap(i, s):
        return searchsorted(pdbseq2fullalnMap, aln2fullalnMap[i], side=s)

    #annotate regions
    xregions = [(n, remap(s, 'left'), remap(e - 1, 'left') + 1)
                for n, s, e in regions]
    annotate(xregions, remap(lobediv, 'left'), sL, pylab.gca(), zorder=1)

    #plot lines at deletion points (green)
    deletepos = searchsorted(pdbseq2fullalnMap,
                             where([x == '-' for x in fullalnseq])[0])
    #for d in set(deletepos):
    #    pylab.axvline(d+0.5, color='g', zorder=9)
    #    pylab.axhline(d+0.5, color='g', zorder=9)
    transY = transforms.blended_transform_factory(ax.transAxes, ax.transData)
    altern = False
    for d, g in groupby(deletepos):
        pylab.axvline(d + 0.5, color='g', zorder=9)
        pylab.axhline(d + 0.5, color='g', zorder=9)
        pylab.text(0.005 + altern * 0.01,
                   d + 0.5,
                   str(len(list(g))),
                   verticalalignment='center',
                   horizontalalignment='right',
                   transform=transY,
                   path_effects=[stroke],
                   color='g',
                   zorder=100)
        altern = not altern

    #plot seqeunce
    altern = 0
    for n, c in enumerate(pdbseq):
        pylab.text(1.02 + altern * 0.014,
                   n + 1,
                   c,
                   verticalalignment='center',
                   horizontalalignment='center',
                   transform=transY,
                   zorder=100)
        altern = (altern + 1) % 6

    #plot dotted line around alignment region
    start = remap(0, 'left') - 0.5
    end = remap(-1, 'right') + 1.5
    pylab.fill([start, start, end, end], [start, end, end, start],
               fill=False,
               ls='dashed',
               zorder=10)

    pylab.title(name + ", full PDB sequence ({}A)".format(cutoff))
    pylab.subplots_adjust(bottom=0.05, right=0.90, top=0.95, left=0.05)

    ########################################
    #next, plot just the aligned part

    pylab.figure(figsize=(12, 12))
    ax = pylab.axes()
    trans = transforms.blended_transform_factory(ax.transAxes, ax.transData)

    aL = len(aln2fullalnMap)

    #first, get the aligned distance map
    #need map from aln index to structure index
    adistmat = zeros((aL, aL)) * nan
    for i, j in [(i, j) for i in range(aL - 1) for j in range(i + 1, aL)]:
        xi = aln2structMap[i]
        xj = aln2structMap[j]
        if xi == -1 or xj == -1:
            continue
        adistmat[i, j] = distancesM[xi, xj]
        adistmat[j, i] = distancesM[xi, xj]

    #set diagonal for aligned residues
    for i in range(aL):
        if aln2structMap[i] != -1:
            adistmat[i, i] = 0
    acontacts = adistmat < cutoff

    acim = ones(adistmat.shape + (3, ))
    acim[acontacts] = ones(3) * 0.4
    pylab.imshow(acim,
                 origin='lower',
                 extent=(+0.5, aL + 0.5, +0.5, aL + 0.5),
                 interpolation='nearest')

    #plot unresolved and deleted regions
    unresolved = where([aln2structMap[n] == -1 for n in range(len(alnseq))])[0]
    fillGroups(unresolved, aL, (0.4, 0.4, 0.7, 0.3))
    deleted = where([c == '-' for c in alnseq])[0]
    fillGroups(deleted, aL, (0.4, 0.7, 0.4, 0.3))
    with open(os.path.join(outpath, 'unresolvedCounts'), 'at') as f:
        print(repr((name, list(unresolved))), file=f)
    with open(os.path.join(outpath, 'deleteCounts'), 'at') as f:
        print(repr((name, list(deleted))), file=f)

    #plot insertions (lines, red)
    insertpos = searchsorted(aln2fullalnMap,
                             where([x.islower() for x in fullalnseq])[0])
    #for d in set(insertpos):
    #    pylab.axvline(d+0.5, color='r', zorder=9)
    #    pylab.axhline(d+0.5, color='r', zorder=9)
    transY = transforms.blended_transform_factory(ax.transAxes, ax.transData)
    altern = False
    insertlist = [(d, len(list(g))) for d, g in groupby(insertpos)]
    for d, n in insertlist[1:-1]:  #first and last are just sequence extension
        pylab.axvline(d + 0.5, color='r', zorder=9)
        pylab.axhline(d + 0.5, color='r', zorder=9)
        pylab.text(0.005 + altern * 0.01,
                   d + 0.5,
                   str(n),
                   verticalalignment='center',
                   horizontalalignment='right',
                   transform=transY,
                   path_effects=[stroke],
                   color='r',
                   zorder=100)
        altern = not altern

    with open(os.path.join(outpath, 'insertCounts'), 'at') as f:
        print(repr((name, insertlist)), file=f)

    #annotate regions
    annotate(regions, lobediv, aL, pylab.gca())
    pylab.title(name + ", aligned sequence only ({}A)".format(cutoff))
    pylab.subplots_adjust(bottom=0.05, right=0.90, top=0.95, left=0.05)
    return adistmat
Пример #19
0
import annotate
import pandas as pd

# annotate SNPs in GRASP database
grasp_sub = pd.read_csv("grasp_sub.txt",
                        sep="\t",
                        converters={
                            "PMID": str,
                            "NHLBIkey": str
                        }).rename(columns={
                            'chr(hg19)': 'chr',
                            'pos(hg19)': 'pos'
                        })
annotate_ob = annotate.annotate(grasp_sub, 'hg19')
grasp_sub_w_annot = annotate_ob.fast_annotate_snp_list()
grasp_sub_w_annot = grasp_sub_w_annot.sort_values(["ID"
                                                   ]).reset_index(drop=True)
grasp_sub_w_annot.to_csv("grasp_sub_w_annot.txt", sep="\t", index=False)
print("[INFO] save to grasp_sub_w_annot.txt")
Пример #20
0
def annotate(args):

    import annotate
    annotate.annotate(args)
Пример #21
0
def annotate(cmdline):
    from annotate import annotate

    survey = model.survey.Survey.load(cmdline['project'])

    return annotate(survey)
Пример #22
0
def SCanalysis(Ur, I, m, S, text1, text2, text3, text4, text5, files,
               dataFunct, LimUr, LimUc, pf1, pf2):
    """
    SCanalysis Calculate supercapacitor capacitance and equivalent serial resistance

    SCanalysis(I,Ur,m,S,'text1','text2','text3','text4','text5',files,[Ur1 Ur2],[Uc1 Uc2],pf1,pf2)

    Parameters:
       Ur[V] - Nominal voltage (to which SC is charged)
       I [mA] - Discharge current
           a)	Scalar (e.g.21.5) Analysis for single current is performed
           b)	Vector (e.g.[10 17.8 31.6 56.2 100]) Performs multi cycle - multi current analysis.
       m[mg] - Mass of active material. Should be zero if not used.
       S[cm2] - Area of one electrode. Should be zero if not used.
       text1..text5 - Five comment lines that will be printed at plots
       files - File name filter.Files that match the filter are sorted according to the file name.
               Each file should contain data for one discharge cycle.
               Care should be taken to ensure that sorting order corresponds to the cycles order,
               e.g.name files as '17_04_25_Data_001.txt' ... '17_04_25_Data_100.txt'.In this case
               'fnames' field should be set to *Data*.txt,where '*' matches any number of characters.
               If vector of discharge currents is defined,files field should be defined as cell array,
               having same number of elements as the I vector (and optionally one additional element),
               where each element defines file name filter for corresponding current measurements,
                   e.g.: {‘I_15mA*.txt’ ‘I_22mA*.txt’}
               If additional file is defined, it should contain repeated measurement for the first current.
               Data from that file is shown with red cross at plots.
       dataFunct - Handle of function for reading data from file. Function should accept two parameters,
               file name and discharge current. If data contains measured current, discharge current
               could be discarded. Function should return N-by-3 array, where first column contains
               time in seconds, second voltage in volts and third current in amperes.
       [Ur1 Ur2] - Resistance calculation start and end voltage.Voltage is specified as a percentage of
               nominal voltage Ur,e.g.[0.9 0.7]. Straight-line approximation is applied to the discharge
               curve between Ur1 and Ur2. Equivalent serial resistance is calculated from the voltage drop
               at the discharge start time, which is the determined from the value of the straight line
               at the discharge start time.
               Special cases:
                   [1 t] - Ur1 is voltage at first sample after the discharge current was applied;
                           Ur2 is voltage t seconds later.  3rd order polynomial approximation is used to
                           determine the voltage drop at the discharge start time.
       [Uc1 Uc2] - Capacitance calculation start and end voltage.Voltage is specified as a percentage
               of nominal voltage Ur,e.g.[0.9 0.7].
               Special cases:
                   [1 Uc2] - Uc1 is voltage at first sample after the discharge current was applied
                   [1 0] - Uc1 is voltage drop at the discharge start time,Uc2 is voltage of last
                           recorded discharge curve sample
       pf1 - Plot Frequency. Discharge curve plot will be generated each pF1 cycles.
       pf2 - Plot Frequency. Cumulative discharge curve plot will be generated and discharge curve will be
             plotted each pf2 cycles.

    Stored values (in file 'Results.mat'):
       C - Capacitance
       Cs_m - Specific capacitance per mass
       Cs_a - Specific capacitance per area
       R - Equivalent serial resistance
       Rd - Discharge resistance,calculated from self-discharge curve during initial 5s rest period
       nl - Discharge curve nonlinearity,calculated as mean square deviation from the ideal (linear)
            discharge curve.
       Pd - Specific power density, per mass.
       E  - Specific energy, per mass.
    """

    ############# ADD VALIDATION HERE

    noCurrents = len(I) if type(I) is tuple else 1
    noFileGroups = len(files) if type(files) is tuple else 1

    # Determine number of cycles
    f = glob.glob(files[0]) if type(files) is tuple else glob.glob(files)
    noCycles = len(f)
    if noCycles == 0:
        print(
            'No data files selected, check current folder and input number 10, files.'
        )
        return

    # Prealocate result arrays
    C = np.zeros((noCycles, noFileGroups))
    Cs_m = np.zeros((noCycles, noFileGroups))
    Cs_a = np.zeros((noCycles, noFileGroups))
    R = np.zeros((noCycles, noFileGroups))
    Rd = np.zeros((noCycles, noFileGroups))
    nl = np.zeros((noCycles, noFileGroups))
    Pd = np.zeros((noCycles, noFileGroups))
    E = np.zeros((noCycles, noFileGroups))

    # Currents
    for fileIdx in range(noFileGroups):
        if fileIdx < noCurrents:  # if noFileGroups == noCurrents + 1, then measurement is repeated for the first current (repeatability)
            I0 = I[fileIdx] if type(I) is tuple else I
            fileSuffix = str(I0) + "_mA"
            additionalCurrent = False
        else:  # repeated measurement for first current
            I0 = I[0]
            fileSuffix = str(I0) + "_mA_2nd"
            additionalCurrent = True
        # Replace '.' with '_'
        fileSuffix = fileSuffix.replace('.', '_')

        # File list
        f = glob.glob(
            files[fileIdx]) if type(files) is tuple else glob.glob(files)
        if len(f) != noCycles:
            raise ValueError(
                "All currents should have the same number of cycles (" +
                str(I0) + " mA: " + str(len(f)) + ", should be " +
                str(noCycles))
        f.sort()  # sorted list of file names

        for n in range(noCycles):
            (c1, r1, rd1, nl1, pd,
             e) = C_R.C_R(f[n], dataFunct, I0 / 1000, n + 1, noCycles, fileIdx,
                          noCurrents, LimUr, LimUc, pf1, pf2)
            C[n, fileIdx] = c1
            R[n, fileIdx] = r1
            Rd[n, fileIdx] = rd1
            nl[n, fileIdx] = nl1
            Pd[n, fileIdx] = pd
            E[n, fileIdx] = e

        if m > 0:
            Cs_m[:, fileIdx] = C[:, fileIdx] / (m / 1000)  # mass in mg
            Pd[:, fileIdx] = Pd[:, fileIdx] / (m / 1000)
            E[:, fileIdx] = E[:, fileIdx] / (m / 1000)
        if S > 0:
            Cs_a[:, fileIdx] = C[:, fileIdx] / S

        baseX = 0.7
        baseY = 0.4
        fontSize = 11

        # CAPACITANCE PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(C[:, fileIdx], 'b', linewidth=2)
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("Capacitance")
        ax.set_xlabel("Cycle")
        ax.set_ylabel("C[F]")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_C_" + fileSuffix, dpi=300)
        if noCurrents > 1:
            plt.close(h)

        # ESR PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(R[:, fileIdx], 'b', linewidth=2)
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("ESR")
        ax.set_xlabel("Cycle")
        ax.set_ylabel("R[\u03A9]")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_ESR_" + fileSuffix, dpi=300)
        if noCurrents > 1:
            plt.close(h)

        # SELF-DISCHARGE RESISTANCE PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(Rd[:, fileIdx], 'b', linewidth=2)
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("Self-discharge resistance")
        ax.set_xlabel("Cycle")
        ax.set_ylabel("Rd[\u03A9]")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_Rd_" + fileSuffix, dpi=300)
        if noCurrents > 1:
            plt.close(h)

        # DISCHARGE CURVE NON-LINEARITY PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(nl[:, fileIdx], 'b', linewidth=2)
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("Discharge curve non-linearity")
        ax.set_xlabel("Cycle")
        ax.set_ylabel("MSE")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_Nl_" + fileSuffix, dpi=300)
        if noCurrents > 1:
            plt.close(h)

        # Cumulative discharge plot by currents
        h = plt.figure(num=C_R.C_R.cumulativeDischargePlot.number)
        plt.legend(loc='upper right')
        plt.savefig(fname="_discharge_" + fileSuffix, dpi=300)
        del C_R.C_R.cumulativeDischargePlot
        if noCurrents > 1:
            plt.close(h)

        if m > 0:
            # SPECIFIC CAPACITANCE PLOT (by mass)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(Cs_m[:, fileIdx], 'b', linewidth=2)
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Specific capacitance")
            ax.set_xlabel("Cycle")
            ax.set_ylabel("Cs_m[F/g]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Cs_m_" + fileSuffix, dpi=300)
            if noCurrents > 1:
                plt.close(h)

            # MAXIMUM POWER DENSITY PLOT (by mass)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(Pd[:, fileIdx], 'b', linewidth=2)
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Maximum power density")
            ax.set_xlabel("Cycle")
            ax.set_ylabel("Pdm[W/g]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Pdm_" + fileSuffix, dpi=300)
            if noCurrents > 1:
                plt.close(h)

            # ENERGY DENSITY PLOT (by mass)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(E[:, fileIdx], 'b', linewidth=2)
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Energy density")
            ax.set_xlabel("Cycle")
            ax.set_ylabel("Ed[J/g]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Ed_" + fileSuffix, dpi=300)
            if noCurrents > 1:
                plt.close(h)

        if S > 0:
            # SPECIFIC CAPACITANCE PLOT (by area)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(Cs_a[:, fileIdx], 'b', linewidth=2)
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Specific capacitance")
            ax.set_xlabel("Cycle")
            ax.set_ylabel("Cs_a[F/cm\u00B2]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Cs_m_" + fileSuffix, dpi=300)
            if noCurrents > 1:
                plt.close(h)

    # CURRENT INFLUENCE ANALYSIS
    if noCurrents > 1:
        baseX = 0.7
        baseY = 0.4
        fontSize = 11

        # CAPACITANCE PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(I, C[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
        if additionalCurrent:
            ax.plot(I[0], C[noCycles - 1, noFileGroups - 1], 'rx')
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("Capacitance")
        ax.set_xlabel("I[mA]")
        ax.set_ylabel("C[F]")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_C", dpi=300)

        # ESR PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(I, R[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
        if additionalCurrent:
            ax.plot(I[0], R[noCycles - 1, noFileGroups - 1], 'rx')
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("ESR")
        ax.set_xlabel("I[mA]")
        ax.set_ylabel("R[\u03A9]")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_ESR", dpi=300)

        # SELF-DISCHARGE RESISTANCE PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(I, Rd[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
        if additionalCurrent:
            ax.plot(I[0], Rd[noCycles - 1, noFileGroups - 1], 'rx')
        bottom, top = plt.ylim()
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("Self-discharge resistance")
        ax.set_xlabel("I[mA]")
        ax.set_ylabel("Rd[\u03A9]")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_Rd", dpi=300)

        # DISCHARGE CURVE NON-LINEARITY PLOT
        h = plt.figure(figsize=[10, 6], dpi=96)
        ax = h.add_subplot(1, 1, 1)
        ax.plot(I, nl[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
        if additionalCurrent:
            ax.plot(I[0], nl[noCycles - 1, noFileGroups - 1], 'rx')
        bottom, top = plt.ylim()
        plt.ylim(0, 1.05 * top)

        ax.set_title("Discharge curve non-linearity")
        ax.set_xlabel("I[mA]")
        ax.set_ylabel("MSE")
        plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

        annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1, text2,
                          text3, text4, text5)

        plt.savefig(fname="_Nl", dpi=300)

        if m > 0:
            # SPECIFIC CAPACITANCE PLOT (by mass)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(I, Cs_m[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
            if additionalCurrent:
                ax.plot(I[0], Cs_m[noCycles - 1, noFileGroups - 1], 'rx')
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Specific capacitance")
            ax.set_xlabel("I[mA]")
            ax.set_ylabel("Cs_m[F/g]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Cs_m", dpi=300)

            # MAXIMUM POWER DENSITY PLOT (by mass)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(I, Pd[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
            if additionalCurrent:
                ax.plot(I[0], Pd[noCycles - 1, noFileGroups - 1], 'rx')
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Maximum power density")
            ax.set_xlabel("I[mA]")
            ax.set_ylabel("Pdm[W/g]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Pdm", dpi=300)
            if noCurrents > 1:
                plt.close(h)

            # ENERGY DENSITY PLOT (by mass)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(I, E[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
            if additionalCurrent:
                ax.plot(I[0], E[noCycles - 1, noFileGroups - 1], 'rx')
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Energy density")
            ax.set_xlabel("I[mA]")
            ax.set_ylabel("Ed[J/g]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Ed", dpi=300)
            if noCurrents > 1:
                plt.close(h)

        if S > 0:
            # SPECIFIC CAPACITANCE PLOT (by area)
            h = plt.figure(figsize=[10, 6], dpi=96)
            ax = h.add_subplot(1, 1, 1)
            ax.plot(I, Cs_a[noCycles - 1, 0:noCurrents], 'b', linewidth=2)
            if additionalCurrent:
                ax.plot(I[0], Cs_a[noCycles - 1, noFileGroups - 1], 'rx')
            bottom, top = plt.ylim()
            plt.ylim(0, 1.05 * top)

            ax.set_title("Specific capacitance")
            ax.set_xlabel("I[mA]")
            ax.set_ylabel("Cs_a[F/cm\u00B2]")
            plt.grid(axis="both", color=(0.25, 0.25, 0.25), linestyle=':')

            annotate.annotate(baseX, baseY, fontSize, I0, Ur, m, S, text1,
                              text2, text3, text4, text5)

            plt.savefig(fname="_Cs_m", dpi=300)
            if noCurrents > 1:
                plt.close(h)

        # Cumulative discharge plot
        plt.figure(num=C_R.C_R.dischargeIplot.number)
        plt.legend(loc='upper right')
        plt.savefig(fname="_discharge_I_", dpi=300)

    plt.show()  # display all figures
Пример #23
0
def start():
    setgx(newgx())

    # --- command-line options
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'vbchef:wad:m:rolspxngL:', [
            'help', 'extmod', 'nobounds', 'nowrap', 'flags=', 'debug=',
            'makefile=', 'random', 'noassert', 'long', 'msvc', 'ann',
            'strhash', 'pypy', 'traceback', 'silent', 'nogcwarns', 'lib'
        ])
    except getopt.GetoptError:
        usage()

    for o, a in opts:
        if o in ['-h', '--help']: usage()
        if o in ['-b', '--nobounds']: getgx().bounds_checking = False
        if o in ['-e', '--extmod']: getgx().extension_module = True
        if o in ['-a', '--ann']: getgx().annotation = True
        if o in ['-d', '--debug']: getgx().debug_level = int(a)
        if o in ['-l', '--long']: getgx().longlong = True
        if o in ['-g', '--nogcwarns']: getgx().gcwarns = False
        if o in ['-w', '--nowrap']: getgx().wrap_around_check = False
        if o in ['-r', '--random']: getgx().fast_random = True
        if o in ['-o', '--noassert']: getgx().assertions = False
        if o in ['-p', '--pypy']: getgx().pypy = True
        if o in ['-m', '--makefile']: getgx().makefile_name = a
        if o in ['-n', '--silent']: getgx().silent = True
        if o in ['-s', '--strhash']: getgx().fast_hash = True
        if o in ['-v', '--msvc']: getgx().msvc = True
        if o in ['-x', '--traceback']: getgx().backtrace = True
        if o in ['-L', '--lib']: getgx().libdirs = [a] + getgx().libdirs
        if o in ['-f', '--flags']:
            if not os.path.isfile(a):
                print "*ERROR* no such file: '%s'" % a
                sys.exit(1)
            getgx().flags = a

    if not getgx().silent:
        print '*** SHED SKIN Python-to-C++ Compiler 0.9.2 ***'
        print 'Copyright 2005-2011 Mark Dufour; License GNU GPL version 3 (See LICENSE)'
        print

    # --- some checks
    major, minor = sys.version_info[:2]
    if (major, minor) not in [(2, 4), (2, 5), (2, 6), (2, 7)]:
        print '*ERROR* Shed Skin is not compatible with this version of Python'
        sys.exit(1)
    if sys.platform == 'win32' and os.path.isdir('c:/mingw'):
        print '*ERROR* please rename or remove c:/mingw, as it conflicts with Shed Skin'
        sys.exit()

    # --- argument
    if len(args) != 1:
        usage()
    name = args[0]
    if not name.endswith('.py'):
        name += '.py'
    if not os.path.isfile(name):
        print "*ERROR* no such file: '%s'" % name
        sys.exit(1)
    getgx().main_mod = name[:-3]

    # --- analyze & annotate
    t0 = time.time()
    infer.analyze(name)
    annotate.annotate()
    cpp.generate_code()
    shared.print_errors()
    if not getgx().silent:
        print '[elapsed time: %.2f seconds]' % (time.time() - t0)
Пример #24
0
def annotate_command(options, command_log):
    from annotate import annotate
    options['--no-fix'] = False
    options['--low-confidence'] = False
    options['<circ_dir>'] = options['--output']
    annotate(options, command=command_log, name='annotate')
Пример #25
0
def computeMatrix(dir_path, num_chunks):
    #stopwords = nltk.corpus.stopwords.words('english')
    '''features from annotation'''
    '''i = 0
	docs = []
	for i in range(0, num_chunks):
		aux =""
		auxList = []
		f = open(dir_path+"annotation/anotation"+str(i)+".txt")
		a  = f.readlines()
		for y in a:
			if(y != "--"):
				#print(y)
				auxList = auxList + dbpedia.getResourcesAndCategories(y.replace("\n",""))
				#print(auxList)
			else:
				auxList.append(y)
		for x in auxList:
			x = x.replace("http://pt.dbpedia.org/resource/","")
			x = x.replace("http://dbpedia.org/resource/", "")
			x = x.replace("http://dbpedia.org/resource/Category:","")
			x = x.replace("_","")
			x = x.replace(" ","")
			aux =  aux +" "+ x
		aux = aux.replace("\n","")
		docs.append(aux)
		#print(docs)
		#print("aaa")
	vectorizer = TfidfVectorizer(encoding='utf-8',sublinear_tf=True)
	X = vectorizer.fit_transform(docs)
	print('saiu')
	afinity_matrix2 = cosine_distances(X)
'''

    # Using the pre-trained word2vec model trained using Google news corpus of 3 billion running words.
    # The model can be downloaded here: https://bit.ly/w2vgdrive (~1.4GB)
    # Feel free to use to your own model.
    googlenews_model_path = 'document_similarity/data/GoogleNews-vectors-negative300.bin'
    stopwords_path = "document_similarity/data/stopwords_en.txt"
    stopwords = []
    model = KeyedVectors.load_word2vec_format(googlenews_model_path,
                                              binary=True)
    with open(stopwords_path, 'r') as fh:
        stopwords = fh.read().split(",")
    ds = DocSim.DocSim(model, stopwords=stopwords)
    '''features from transcription'''
    docsA = []
    docs2 = []
    simMatrixT = []
    avg_depths = []
    previousAnnotation = ['empty']
    previousDepth = [1000]
    stemmer = PorterStemmer()
    docsT = []
    for i in range(0, num_chunks):
        aux = ""
        f2 = open(dir_path + "transcript/transcript" + str(i) + ".txt")
        a = f2.read()

        words = tokenize.word_tokenize(a, language='english')
        words = [word.lower() for word in words if word.isalpha()]
        preAnnotateText = ' '.join(words)
        if (not preAnnotateText):
            preAnnotateText = 'chemestry dog wolf bug'
        docsT.append(preAnnotateText)
        annotatedTerms, depth = annotate.annotate(preAnnotateText)
        if not annotatedTerms:
            annotatedTerms = previousAnnotation
            depth = previousDepth

        else:
            previousAnnotation = annotatedTerms
            previousDepth = depth
        print(annotatedTerms)
        docsA.append(annotatedTerms)
        avg_depths.append(depth)
    for i in range(0, num_chunks):
        source_doc = docsT[i]
        target_docs = []
        auxSimMT = []
        for j in range(0, num_chunks):
            target_docs.append(docsT[j])

        sim_scores = ds.calculate_similarity(source_doc, target_docs)
        for sim in sim_scores:
            auxSimMT.append(float(sim['score']))
        #print(len(auxSimMT))
        simMatrixT.append(auxSimMT)

        #words=[stemmer.stem(word) for word in words ]
        #aux = ' '.join(words)

        #docs2.append(aux)
    #source_doc = "how to delete an invoice"
    #target_docs = ['delete a invoice', 'how do i remove an invoice', "purge an invoice"]

    #vectorizer2 = TfidfVectorizer(stop_words=stopwords, encoding='utf-8',sublinear_tf=True)
    #X2 = vectorizer2.fit_transform(docs2)
    '''distance_matrix_A = []
	auxVector = []
	for i in range(0, num_chunks):
		for j in range(0, num_chunks):
			intersection_size = len(set(docsA[i]).intersection(docsA[j]))
			auxVector.append(float(intersection_size/len(docsA[i])))
		distance_matrix_A.append(auxVector)'''

    #afinity_matrix = cosine_distances(X2)

    #print(distance_matrix_A)
    #print(simMatrixT)
    return simMatrixT, docsA, avg_depths