Exemplos de tokenize em Python

Exemplo n.º 1

0

Exibir arquivo

def main( ):

    # Program data
    pvars = { 
             'outputDir' : None,
             'clobber' :  False,
             'executeScripts' : False,
             'tokenFile' : None,
             'inputScripts' : [],
             'tokSuffix' : '.tok',
             'breakOnError' : False
             }
    tokenizedFileNames = []
    
    # Read input
    #try:
    readCommandLine( sys.argv[1:], pvars )
    tokens = readTokenFile( pvars['tokenFile'] )
    
    # Apply tokens to scripts
    for scriptName in pvars[ 'inputScripts' ]:    
        tokenizedFileName, tokenizedFile = makeTokenizedFile( scriptName, pvars['clobber'], \
                                                                  pvars['outputDir'], pvars['tokSuffix'] )
        tokenizedFileNames.append( tokenizedFileName )
        tokenize( tokenizedFile, tokens, scriptName )      
        tokenizedFile.close( )

    # Run tokenized files
    if( pvars['executeScripts'] ):
        for tokenizedFileName in tokenizedFileNames:
            ret = os.system( tokenizedFileName )
            if ret and pvars[ 'breakOnError' ]:
                print >> sys.stderr, "Script " + tokenizedFileName + \
                    " failed with exit code " + str( ret ) +". Aborting."
                sys.exit( 1 )

Exemplo n.º 2

0

Exibir arquivo

Arquivo: parse.py Projeto: bjthinks/grapher

 def errors(self, input_str, pos):
     try:
         Parse(tokenize(input_str)).go()
         self.fail('ParseError not raised: {0}'.format(input_str))
     except ParseError, e:
         if e.position != pos:
             self.fail('ParseError at wrong position: expected {0}, got {1}'.format(pos, e.position))

Exemplo n.º 3

0

Exibir arquivo

 def tokenize_fn(c):
     try:
         tokens = list(tokenize(BytesIO(c.encode('utf-8')).readline))
     except Exception as e:
         # print(e)
         return None
     return tokens

Exemplo n.º 4

0

Exibir arquivo

Arquivo: decimal_.py Projeto: niezhiyuan1/learngit

    def decistmt(s):
        """Substitute Decimals for floats in a string of statements.

        >>> from decimal import Decimal
        >>> s = 'print(+21.3e-5*-.1234/81.7)'
        >>> decistmt(s)
        "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"

        The format of the exponent is inherited from the platform C library.
        Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
        we're only showing 12 digits, and the 13th isn't close to 5, the
        rest of the output should be platform-independent.

        >>> exec(s) #doctest: +ELLIPSIS
        -3.217160342717258e-0...7

        Output from calculations with Decimal should be identical across all
        platforms.

        >>> exec(decistmt(s))
        -3.217160342717258261933904529E-7
        """
        result = []
        g = tokenize(BytesIO(
            s.encode('utf-8')).readline)  # tokenize the string
        for toknum, tokval, _, _, _ in g:
            if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
                result.extend([(NAME, 'Decimal'), (OP, '('),
                               (STRING, repr(tokval)), (OP, ')')])
            else:
                result.append((toknum, tokval))
        return untokenize(result).decode('utf-8')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: decimal_.py Projeto: coopengo/trytond

    def decistmt(s):
        """Substitute Decimals for floats in a string of statements.

        >>> from decimal import Decimal
        >>> s = 'print(+21.3e-5*-.1234/81.7)'
        >>> decistmt(s)
        "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"

        The format of the exponent is inherited from the platform C library.
        Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
        we're only showing 12 digits, and the 13th isn't close to 5, the
        rest of the output should be platform-independent.

        >>> exec(s) #doctest: +ELLIPSIS
        -3.217160342717258e-0...7

        Output from calculations with Decimal should be identical across all
        platforms.

        >>> exec(decistmt(s))
        -3.217160342717258261933904529E-7
        """
        result = []
        g = tokenize(BytesIO(s.encode('utf-8')).readline)  # tokenize the string
        for toknum, tokval, _, _, _ in g:
            if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
                result.extend([
                    (NAME, 'Decimal'),
                    (OP, '('),
                    (STRING, repr(tokval)),
                    (OP, ')')
                ])
            else:
                result.append((toknum, tokval))
        return untokenize(result).decode('utf-8')

Exemplo n.º 6

0

Exibir arquivo

def batch_tokenize_process(source_list):
    tmp_sentences = []
    tmp_index = [ele[0] for ele in source_list]
    for index, ele in enumerate(source_list):
        sentence = ele[1]
        #print sentence
        tmp_sentences.append(tokenize(sentence).strip() + "\n")
    return tmp_index, tmp_sentences

Exemplo n.º 7

0

Exibir arquivo

Arquivo: djolt_expression.py Projeto: bcampbell/newscredit_api

def parse(program):
	## Log("HERE:D.1", program = program)
	global token, next
	token = None
	next = None
	next = tokenize(program).next
	token = next()
	return expression()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: parse.py Projeto: xupingmao-old/venus

def parse(v):
    r = tokenize(v)
    p = ParserCtx(r)
    x = do_prog(p)
    # except:
    if x == None:
        print(" at line " + str( p.token.pos ) + " unknown error")
    return x

Exemplo n.º 9

0

Exibir arquivo

Arquivo: nonecoa.py Projeto: pombredanne/nonehack

 def source_to_code(self, data, path, *, _optimize=-1):
     print(path)
     source = importlib._bootstrap.decode_source(data)
     tokens = tokenize(io.BytesIO(source.encode('utf-8')).readline)
     tokens = retokenize(tokens)
     source = untokenize(tokens).decode('utf-8')
     return _call_with_frames_removed(compile, source, path, 'exec',
                                      dont_inherit=True,
                                      optimize=_optimize)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: example.pratt.py Projeto: joyjding/spot

def parse(program):
	global token, next 
	next = tokenize(program).next
	token = next()
	return expression()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: Tokenizer_standard.py Projeto: HimmelreichMarcel/DSL4ML

 def visit(self, featureset):
     try:
         _result = []
         for text in featureset.get_column_values(self._column):
             if isinstance(text, list):
                 _preprocessed = []
                 for word in text:
                     _preprocessed.append(tokenize(word))
                 _result.append(_preprocessed)
             else:
                 _preprocessed = tokenize(text)
                 _result.append(_preprocessed)
         _new_result = np.asarray(list(_result))[:, np.newaxis]
         _new_result = _new_result.reshape(
             featureset.get_column_values(self._column).shape)
         featureset.set_featureset_column(self._column, _new_result)
     except Exception as error:
         util.print_error("Unable to tokenize column")
         util.print_error(error)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: PolynomeSolveur.class.py Projeto: cpoulet/ComputerV2

 def parse(self, equa):
     self.token_generator = tokenize(equa, self.TOKENS_SPEC)
     self.current_token = None
     self.next_token = None
     self._next()
     self._tab(self._prob())
     if self.next_token:
         raise Exception(
             'Wrong token sequence busted. Processing stopped at : ' +
             self.next_token.value)

Exemplo n.º 13

0

Exibir arquivo

	def feature_extract(self, tweets):
		preproc_tweets = map(lambda t: tokenize(t), tweets)
		model = Word2Vec(preproc_tweets)
		word2vec = dict(zip(model.wv.index2word, model.wv.syn0))
		dim = len(word2vec.itervalues().next())
		mean_embeds = np.array([
				        np.mean([self.word2vec[w] for w in words if w in self.word2vec]
				                or [np.zeros(self.dim)], axis=0)
				        for words in tweets
				    ])

Exemplo n.º 14

0

Exibir arquivo

def parse(content):
    r = tokenize(content)
    p = ParserCtx(r, content)
    p.next()
    try:
        while p.token.type != 'eof':
            parse_block(p)
        x = p.tree
        # except:
        if x == None:
            p.error()
        return x
    except Exception as e:
        # print(e, v)
        compile_error("parse", content, p.token, str(e))
        raise (e)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: macro_parser.py Projeto: yangluoshen/subpy

def parse_file(fname):
    customize_symbols(
        ['function', 'var', 'end', 'append'], '-=[];,./!%*()+{}:<>@^$&', [
            '-', '+', '*', '**', '/', '%', '<<', '>>', '-=', '+=', '*=', '/=',
            '=', '==', '!=', '<', '>', '<=', '>=', '[', ']', '{', '}', '(',
            ')', '.', ':', ',', ';', '&', '|', '!', '@', '^', '$'
        ])
    disable_tk_indent()
    list = tokenize(load(fname))
    list = list_to_chain(list)
    if len(list) == 0: return
    item = list[0]
    #while item != None:
    #print(item.val)
    #item = item.next
    parse_macro(item)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: nblearn.py Projeto: lastmansleeping/hindi-toolkit

def read_input(f1_name,f2_name):
	
	#read line by line from file 1 and file 2
	with open(f1_name) as f1, open(f2_name) as f2: 
		for line1, line2 in zip(f1,f2):
			#get the classes the line belongs to
			class1 = line1.strip()
			priors[class1]+=1
			#tokenize each line
			token_list = tokenize(line2)
			#store the tokens in a dictionary
			#update freq of token
			for token in token_list:
				if token not in tfreq_dict:
					tfreq_dict[token] = {'1':0,'-1':0,'0':0}
				tfreq_dict[token][class1]+=1

Exemplo n.º 17

0

Exibir arquivo

Arquivo: TwitterTesting.py Projeto: avivabelete/GenderClassification

def cleanAndNormalizeText(data):
    tokens = tokenize(data)
    tokens = [
        token if emoticon_re.search(token) else token.lower()
        for token in tokens
    ]
    filterText = [w for w in tokens if w not in stop]
    filterText = [w for w in filterText if not len(w) <= 1]
    # stem
    ps = PorterStemmer()
    for i in range(len(filterText) - 1):
        if len(filterText[i]) > 1:
            try:
                filterText[i] = ps.stem(filterText[i])
            except Exception as e:
                filterText[i] = filterText[i]

    return filterText

Exemplo n.º 18

0

Exibir arquivo

Arquivo: main.py Projeto: GrahamGoudeau/Lang_Compiler

def main():
    file = get_source(argv[INPUT_INDEX])

    code = file.read()
    check_parens(code)

    tokenized = tokenize(code)

    func_map = make_function_map(iter(tokenized), tokenized)
    if False:
        for f in func_map:
            for t in func_map[f].def_block:
                print(t.symbol)

    # check if the parse succeeds;
    # is_parsed fails with sys.exit() and message if parse error
    if is_parsed(func_map):
        #compile(func_map)
        pass

    file.close()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: transform.py Projeto: yangluoshen/subpy

def transform(fname):
    s = load(fname)
    tokenList = tokenize(s)
    indents = 0
    idx = 0
    lastisnl = False
    needspace = False
    while hasnext(tokenList, idx):
        i = tokenList[idx]
        next = getnext(tokenList, idx)
        idx += 1
        if i.type in _ws_after:
            printf("%s ", i.val)
        elif i.type in _ws_both:
            printf(" %s ", i.val)
        elif i.type == 'nl':
            printf('\n')
            if next == None:
                pass
            elif next.type == 'indent':
                printf(' ' * (indents + 4))
            elif next.type == 'dedent':
                #printf(' '*(indents - 4))
                pass
            else:
                printf(' ' * indents)
        elif i.type == 'indent':
            indents += 4
        elif i.type == 'dedent':
            indents -= 4
            if next == None:
                pass
            elif next.type != 'dedent':
                printf(' ' * indents)
        elif i.type == 'string':
            printf(get_printable_str(i.val))
        elif i.type == 'notin':
            printf(' not in ')
        else:
            printf(i.val)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: transform.py Projeto: yangluoshen/subpy

def transform(fname):
    s = load(fname)
    tokenList = tokenize(s)
    indents = 0
    idx = 0
    lastisnl = False
    needspace = False
    while hasnext(tokenList, idx):
        i = tokenList[idx]
        next = getnext(tokenList, idx)
        idx += 1
        if i.type in _ws_after:
            printf("%s ", i.val)
        elif i.type in _ws_both:
            printf(" %s ", i.val)
        elif i.type == 'nl':
            printf('\n')
            if next == None:
                pass
            elif next.type == 'indent':
                printf(' '* (indents + 4))
            elif next.type == 'dedent':
                #printf(' '*(indents - 4))
                pass
            else:
                printf(' '* indents)
        elif i.type == 'indent':
            indents += 4
        elif i.type == 'dedent':
            indents -= 4
            if next == None:
                pass
            elif next.type != 'dedent':
                printf(' ' * indents)
        elif i.type == 'string':
            printf(get_printable_str(i.val))
        elif i.type == 'notin':
            printf(' not in ')
        else:
            printf(i.val)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: vectorize.py Projeto: ruiyuh/ruiyu_projs

def calc_sim(query, threshold=0):
    '''
    calculate similarity scores between documents and the query
    '''
    query = clean_token(query)
    file_list = get_file_names()
    documents = {}

    for i in range(len(file_list)):
        documents[file_list[i]] = tokenize(convert(file_list[i]))

    query_vec = vectorize(query)
    results = {}

    for name, doc in documents.items():
        doc_vec = vectorize(doc)
        sim_score = cos_sim(query_vec, doc_vec)
        if sim_score > threshold:
            results[name] = sim_score
            sort_result = sorted(results.items(),
                                 key=operator.itemgetter(1),
                                 reverse=True)
    return sort_result

Exemplo n.º 22

0

Exibir arquivo

Arquivo: djolt_expression.py Projeto: bcampbell/newscredit_api

	def test1():
		test("1")
		test("+1")
		test("-1")
		test("1+2")
		test("1+2+3")
		test("1+2*3")
		test("(1+2)*3")
		test("()")
		test("(1)")
		test("(1,)")
		test("(1, 2)")
		test("[1, 2, 3]")
		test("{}")
		test("{1: 'one', 2: 'two'}")
		test("1.0*2+3")
		test("'hello'+'world'")
		test("2**3**4")
		test("1 and 2")
		test("foo.bar")
		test("1 + hello")
		test("1 if 2 else 3")
		test("'hello'[0]")
		test("hello()")
		test("hello(1,2,3)")
		test("lambda: 1")
		test("lambda a, b, c: a+b+c")
		test("True")
		test("True or False")
		test("1 in 2")
		test("1 not in 2")
		test("1 is 2")
		test("1 is not 2")
		test("1 is (not 2)")

		print
		print list(tokenize("1 not in 2"))

Exemplo n.º 23

0

Exibir arquivo

        arg = arg.replace(bracks, '')
        arg = arg.strip()
        arg = re.sub(' +', ' ', arg)
        t = ' '.join(arg.split(' ')[:-1] + [bracks])
        n = arg.split(' ')[-1]
        types.append(t)
        names.append(n)
    return types, names


if __name__ == '__main__':
    # parser
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', default='',
                        help='The file to strip comments from.')
    parser.add_argument('--l', default='python',
                        choices=['python', 'java'], help='language of input code')
    args = parser.parse_args()
    assert args.input_file == '' or os.path.isfile(args.input_file)

    # read from standard input, or from input file
    if args.input_file == '':
        source = sys.stdin.read()
    else:
        with io.open(args.input_file, encoding='utf-8') as f:
            source = f.read()

    tokenize = globals()[f"tokenize_{args.l}"]
    # tokenize
    print(tokenize(source), end='')

Exemplo n.º 24

0

Exibir arquivo

Arquivo: meech.py Projeto: KshitijBudhani/CodeKatas

def parse(string):
    tokenize(string)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: file_parser.py Projeto: casics/extractor

def file_elements(filename, filtering='normal'):
    '''Take a Python file, return a tuple of contents.
    Argument 'filterint' determines how much filtering is applied to symbols
    that may be uninteresting.  Possible values are 'minimal' or 'normal'.
    '''
    header = ''
    comments = []
    tmp_file = None
    full_path = os.path.join(os.getcwd(), filename)

    def cleanup():
        stream.close()
        if tmp_file:
            log.debug('closing {}'.format(tmp_file))
            tmp_file.close()

    # Set up the dictionary.  We may end up returning only part of this
    # filled out, if we encounter errors along the way.

    elements = {}
    elements['header'] = ''
    elements['comments'] = []
    elements['docstrings'] = []
    elements['imports'] = []
    elements['classes'] = []
    elements['functions'] = []
    elements['variables'] = []
    elements['strings'] = []
    elements['calls'] = []
    elements['parse_result'] = 'success'

    # Open the file for reading.  FileIO is needed for the Python 'ast' module.

    log = Logger('file_parser').get_log()
    log.info('parsing Python file {}'.format(full_path))
    stream = io.FileIO(filename)

    # Pass #0: account for Python 2 vs 3 syntax.
    # I haven't found another way to detect whether a script uses Python 2 or
    # 3 syntax other than to try to parse it and test for failure.  We need
    # to use ast later below, and if an input file needs Python 2, we have to
    # convert it first.  So we test first and convert at the beginning.

    if assumes_python2(stream):
        try:
            # This creates a temporary file that must be deleted later.
            log.debug('attempting to convert from Python 2')
            tmp_file = convert_python2_file(filename)
            if tmp_file:
                log.debug('conversion successful'.format(full_path))
                log.debug('closing file {}'.format(full_path))
                stream.close()
                log.debug('opening file {}'.format(tmp_file.name))
                stream = io.FileIO(tmp_file.name)
            else:
                # We thought it was Python 2 but couldn't convert it.
                # Something is wrong. Bail.
                log.warn(
                    'conversion failed -- giving up on {}'.format(full_path))
                # At this point, we still have an empty elements dictionary.
                elements['parse_result'] = 'error'
                return elements
        except Exception as err:
            log.error(
                'error trying to detect if {} uses Python 2'.format(full_path))
            log.error(err)
            elements['parse_result'] = 'error'
            cleanup()
            return elements

    # Pass #1: use tokenize to find and store headers and comments.

    log.debug('tokenizing {}'.format(full_path))
    try:
        tokens = tokenize(stream.readline)
    except Exception as err:
        log.error('error trying to tokenize {}'.format(full_path))
        log.error(err)
        elements['parse_result'] = 'error'
        cleanup()
        return elements

    # Look for a header at the top, if any.  There are two common forms in
    # Python: a string, and a comment block.  The heuristic used here is that
    # if the first thing after any ignorable comments is a string, it's
    # assumed to be the doc string; else, any initial comments (after certain
    # special case comments, such as Unix hash-bang lines) are taken to be
    # the header; else, no header.

    for kind, thing, _, _, line in tokens:
        if kind == ENCODING:
            continue
        if ignorable_comment(thing):
            continue
        if kind != COMMENT and kind != NL:
            break
        header += strip_comment_char(thing)

    # When the above ends, 'thing' & 'kind' will be the next values to examine.
    # If it's a string, it's assumed to be the file doc string.
    # Once we do this, we'll have read the header comment or the doc string and
    # the file position will be immediately after that point.  When we do our
    # 2nd pass, we don't want to read that stuff again.  Back up over the last
    # non-string/comment thing we read, and remember where we are.

    if kind == STRING:
        restart_point = stream.tell()
        header = header + ' ' + thing.replace('"', '')
        (kind, thing, _, _, line) = next(tokens)
    else:
        restart_point = stream.tell() - len(line)

    # Iterate through the rest of the file, looking for comments.
    # This gathers consecutive comment lines together, on the premise that
    # they may contain sentences split across multiple comment lines.

    chunk = ''
    while thing != ENDMARKER:
        try:
            if kind == NL:
                pass
            elif kind == COMMENT and not ignorable_comment(thing):
                chunk = chunk + strip_comment_char(thing) + '\n'
            elif chunk:
                comments.append(chunk.strip())
                chunk = ''
            (kind, thing, _, _, _) = next(tokens)
        except StopIteration:
            break
        except Exception:
            # Unicode decoding problems can cause exceptions.
            log.error('tokenization failed for {}'.format(full_path))
            break

    # This concludes what we gather without parsing the file into an AST.
    # Store the header and comments, if any.

    elements['header'] = clean_plain_text(header)
    elements['comments'] = clean_plain_text_list(comments)

    # Pass #2: pull out remaining elements separately using the AST.  This is
    # inefficient, because we're iterating over the file a 2nd time, but our
    # efforts right now are about getting things to work any way possible.

    # AST parsing failures are possible here, particularly if the file was
    # converted from Python 2.  Some programs do stuff you can't automatically
    # convert with 2to3.  If that happens, bail and return what we can.

    stream.seek(restart_point)
    try:
        log.debug('parsing into AST')
        tree = ast.parse(stream.read())
    except Exception as err:
        log.error('AST parsing failed; returning what we have so far'.format(
            full_path))
        cleanup()
        elements['parse_result'] = 'error'
        return elements

    # We were able to parse the file into an AST.

    try:
        collector = ElementCollector(filtering)
        collector.visit(tree)
    except Exception as err:
        log.error('internal AST code walking error'.format(full_path))
        cleanup()
        elements['parse_result'] = 'error'
        return elements

    # We store the names of variables we find temporarily as paths separated
    # by '|' so that we can find unique variable name assignments within each
    # function or class context.  E.g., variable x in function foo is "foo|x".
    # Remove the paths now, leaving just the variable names.
    # Also filter the variables to remove things we don't bother with.

    unique_var_paths = list(set(collector.variables))
    collector.variables = [x[x.rfind('|') + 1:] for x in unique_var_paths]
    filtered_calls = filter_variables(collector.calls, collector.variables)

    # We are done.  Do final cleanup and count up frequencies of some things.

    # Note that docstrings don't get frequencies associated with them.
    elements['docstrings'] = clean_plain_text_list(collector.docstrings)
    # The rest are turned into ('string', frequency) tuples.
    elements['imports'] = countify(collector.imports)
    elements['classes'] = countify(collector.classes)
    elements['functions'] = countify(collector.functions)
    elements['variables'] = countify(collector.variables)
    elements['strings'] = countify(clean_plain_text_list(collector.strings))
    elements['calls'] = countify(filtered_calls)

    cleanup()
    return elements

Exemplo n.º 26

0

Exibir arquivo

    
    "multiline
    "multi
    
    " Func x does stuff
    " to x
    func xx()
    foo()
    end
    
    a += 3 >= 4
    
    loop i in 1:10
        foo()
    
    
    '''
    '''
    "a
    "b'''

    EXAMPLE = EXAMPLE1

    print('py')
    for token in tokenize_py(EXAMPLE):
        print(repr(token))

    print('zoof')
    for token in tokenize(EXAMPLE, __file__, 286):
        print(token)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: twitBot.py Projeto: MakeSchool-17/twitter-bot-python-joshuarcher

import sys
from tokenize import *
from dictionary_words_2 import *
from stochastic_sampling import *

# [brian] Usually `import *` is bad form in python.
# One of the best things that distinguishes it from
# ruby is that it's always easy to tell where some
# behavior comes from. If you `import *` you'll
# later have a hard time figuring out which module
# a given function lives in.

if __name__ == '__main__':
    source = open(sys.argv[1]).read()
    tokens = tokenize(source)
    a_dictionary = list_to_dictionary(tokens)
    stochastic_list = new_list(a_dictionary)
    root_node = construct_tree(stochastic_list)
    first_word = random_word(root_node)

Exemplo n.º 28

0

Exibir arquivo

    printf("%-10s%-10s:debug source code\n", "-debug", "[file]")
    printf("%-10s%-10s:disassemble builtin-func\n", "-dis-bf", "[file]")


argc = len(ARGV)
if argc == 1:
    if ARGV[0] == '-help':
        print_usage()
    else:
        print_usage()
elif argc > 2:
    opt = ARGV[1]
    name = ARGV[2]
    if opt == '-tk':
        from tokenize import *
        r = tokenize(load(name))
        for i in r:
            printf("%s := %s\n", i.type, i.val)
    elif opt == '-src':
        printSource(name)
    elif opt == '-p':
        _execute_file(name)
        input("press any key to quit")
    elif opt == '-dis':
        from dis import dissimple
        argv = ARGV.clone()
        del argv[0]
        dissimple(argv)
    elif opt == '-dump':
        compilefile(name, name + '.bin')
    elif opt == '-ast':

Exemplo n.º 29

0

Exibir arquivo

Arquivo: tdop4.py Projeto: wangfuli217/study

def parse(program):
    global curr, next_token
    next_token = tokenize(program).next
    curr = next_token()
    return expression()

Exemplo n.º 30

0

Exibir arquivo

from tokenize import *
from getLexicon import *
from symScoreClassify import *
from splitData import *
from naiveBayesClassify import *

import numpy as np

posindir = os.path.abspath('') + '\\POS'
negindir = os.path.abspath('') + '\\NEG'

posDocs = tokenize(posindir)
negDocs = tokenize(negindir)

nfold = 10
posLexicon, negLexicon, posLexiconWeights, negLexiconWeights = getLexicon()
len(posLexicon), len(negLexicon), len(posLexiconWeights), len(
    negLexiconWeights)
resultsBow = np.zeros((10, 8))
resultsSig2nonW = np.zeros((10, 198))
resultsSig2W = np.zeros((10, 198))

for iteration in range(0, nfold):
    print iteration
    trainPosDocs, trainNegDocs, testPosDocs, testNegDocs = splitData(
        posDocs, negDocs, nfold, iteration)
    resultsIteration = symScoreClassify(testPosDocs, posLexicon, negLexicon,
                                        posLexiconWeights, negLexiconWeights,
                                        True)
    print resultsIteration[4:12]
    resultsBow[iteration, :] += np.array(resultsIteration[4:12])

Exemplo n.º 31

0

Exibir arquivo

Arquivo: test-tokenize.py Projeto: yangluoshen/subpy

def tk_test(string, types, vals):
    r = tokenize(string)
    assert tk_types(r) == types
    assert tk_vals(r) == vals

Exemplo n.º 32

0

Exibir arquivo

Arquivo: tools.py Projeto: yangluoshen/subpy

    printf("%-10s%-10s:print abstract syntax tree\n", "-printast", "[file]")
    printf("%-10s%-10s:debug source code\n", "-debug", "[file]")
    printf("%-10s%-10s:disassemble builtin-func\n", "-dis-bf", "[file]")
    
argc = len(ARGV)
if argc == 1:
    if ARGV[0] == '-help':
        print_usage()
    else:
        print_usage()
elif argc > 2:
    opt = ARGV[1]
    name = ARGV[2]
    if opt == '-tk':
        from tokenize import *
        r = tokenize(load(name))
        for i in r:
            printf("%s := %s\n", i.type, i.val)
    elif opt == '-src':
        printSource(name)
    elif opt == '-p':
        _execute_file(name)
        input("press any key to quit")
    elif opt == '-dis':
        from dis import dissimple
        argv = ARGV.clone()
        del argv[0]
        dissimple(argv)
    elif opt == '-dump':
        compilefile(name, name + '.bin')
    elif opt == '-ast':

Exemplo n.º 33

0

Exibir arquivo

Arquivo: test-tokenize.py Projeto: yangluoshen/subpy

def tk_test(string, types, vals):
    r = tokenize(string)
    assert tk_types(r) == types
    assert tk_vals(r) == vals

Exemplo n.º 34

0

Exibir arquivo

Arquivo: completeparser.py Projeto: shashankgupta12/code-archives-and-projects

        pattern = r"\s*(?:(<=|>=|\W)|([a-zA-Z]\w*)|(\d+(?:\.\d*)?))"
        for operator, name, literal in re.findall(pattern, program):
            if operator:
                yield "(operator)", operator
            elif name:
                yield "(name)", name
            elif literal:
                yield "(literal)", literal
            else:
                raise SyntaxError
        yield "(end)", "(end)"

    import time

    print len(program), "bytes"
    print len(list(tokenize(program))), "tokens"

    def bench(name, func):
        t0 = time.clock()
        for i in xrange(1000):
            func(program)
        print name, time.clock() - t0

    import parser, compiler

    program_list = list(tokenize_python(program))

    bench("topdown", parse)
    bench("topdown pretokenized", lambda program: parse(program_list))

    tokenize_python = custom_tokenize_python

Exemplo n.º 35

0

Exibir arquivo

Arquivo: parse.py Projeto: bjthinks/grapher

 def same(self, input_str1, input_str2):
     self.assertEqual(str(Parse(tokenize(input_str1)).go()),
                      str(Parse(tokenize(input_str2)).go()))

Exemplo n.º 36

0

Exibir arquivo

Arquivo: parse.py Projeto: bjthinks/grapher

 def matches(self, input_str, desired_function):
     self.assertEqual(str(Parse(tokenize(input_str)).go()),
                      str(desired_function))

Exemplo n.º 37

0

Exibir arquivo

Arquivo: tdop-4.py Projeto: kopchik/truthon

        pattern = r"\s*(?:(<=|>=|\W)|([a-zA-Z]\w*)|(\d+(?:\.\d*)?))"
        for operator, name, literal in re.findall(pattern, program):
            if operator:
                yield "(operator)", operator
            elif name:
                yield "(name)", name
            elif literal:
                yield "(literal)", literal
            else:
                raise SyntaxError
        yield "(end)", "(end)"

    import time

    print len(program), "bytes"
    print len(list(tokenize(program))), "tokens"

    def bench(name, func):
        t0 = time.clock()
        for i in xrange(1000):
            func(program)
        print name, time.clock() - t0

    import parser, compiler

    program_list = list(tokenize_python(program))

    bench("topdown", parse)
    bench("topdown pretokenized", lambda program: parse(program_list))

    tokenize_python = custom_tokenize_python

Exemplo n.º 38

0

Exibir arquivo

Arquivo: completeparser.py Projeto: shashankgupta12/code-archives-and-projects

 def parse(program):
     global token, next
     next = tokenize(program).next
     token = next()
     return expression()

Exemplo n.º 39

0

Exibir arquivo

Arquivo: meech.py Projeto: dmgctrl/CodeKatas

def parse(string):
    tokenize(string)