Exemplos de lines em Python, exemplos de util.lines em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: types.py Projeto: andrewebert/pokemon-online-editor

def to_csv():
    # read type names
    with open(TYPES + 'types.txt') as f:
        types = lines(f)

    # read type effectiveness, put it into table format
    # [["",    type1, type2],
    #  [type1, 1,     2    ],
    #  [type2, 4,     2    ]]
    with open(TYPES + 'typestable.txt') as f:
        headers = [""] + types
        # effectiveness is a 2d array
        effectiveness = [line.split(" ") for line in lines(f)]

        table = [headers] +\
            [[types[attacker]] +\
                [effectiveness[attacker][defender]
                    for defender in range(len(effectiveness[attacker]))]
                        for attacker in range(len(effectiveness))]

        #print table

    # write the type effectiveness to a csv
    with open(TYPES + 'types.csv', 'wb') as f:
        typesWriter = csv.writer(f)
        typesWriter.writerows(table)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: process.py Projeto: david-hock/ToMaTo

def processRunning(host, pidfile, name=""):
	cmdline = util.lines(host.execute("[ -f %(pidfile)s ] && (cat %(pidfile)s | xargs -r ps --no-headers --format cmd --pid); true" % {"pidfile": util.escape(pidfile)}))
	if not len(cmdline):
		return False
	if name:
		return name in cmdline[0]

Exemplo n.º 3

0

Exibir arquivo

Arquivo: playlists.py Projeto: mizlan/featherweight

def playlist_contains(song_id, playlist_name):
    if not playlist_exists(playlist_name):
        raise KeyError(f'playlist {playlist_name} doesn\'t exist')

    playlist_dir = file_management.get_playlists_path()
    playlist_dest = os.path.join(playlist_dir, playlist_name)

    return song_id in util.lines(open(playlist_dest))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: data.py Projeto: bheinzerling/subword-sequence-tagging

 def load_splits_raw(self):
     self.split_files = [
         (
             self.conf.data_dir / self.name / split_name / self.lang
             ).with_suffix(".conllu")
         for split_name in self.split_names]
     self.splits_raw = [
         conllu.parse("\n".join(lines(f)))
         for f in self.split_files]

Exemplo n.º 5

0

Exibir arquivo

Arquivo: 071.py Projeto: afmarsal/adventofcode

def do_it(filename):
    for line in util.lines(filename):
        split = line.split()
        n = Node.get_node(split[0] + split[1])
        for inner in range(4, len(split), 4):
            if split[inner] != "no":
                n.add(split[inner + 1] + split[inner + 2], int(split[inner]))

    return len(Node.nodes['shinygold'].distinct_parents())

Exemplo n.º 6

0

Exibir arquivo

def do_it(filename):
    program = [[l.split()[0], int(l.split()[1])] for l in util.lines(filename)]
    result = NOT_FOUND
    pos = 0
    while result == NOT_FOUND:
        curr, pos = replace_nop_jmp(program, pos)
        result = parse_and_run(curr)

    return result

Exemplo n.º 7

0

Exibir arquivo

Arquivo: 072.py Projeto: afmarsal/adventofcode

def do_it(filename):
    for line in util.lines(filename):
        split = line.split()
        n = Node.get_node(''.join(split[0:2]))
        for inner in range(4, len(split), 4):
            if split[inner] != "no":
                n.add(''.join(split[inner + 1:inner + 3]), int(split[inner]))

    return Node.get_node('shinygold').inner_bags() - 1

Exemplo n.º 8

0

Exibir arquivo

def read_sift(sift_fname):
    """ Feature format: [[x, y, scale, orientation], ...] """
    lines = ut.lines(sift_fname)
    if len(lines):
        fd = np.array([map(float, line.split()) for line in lines])
        f = fd[:, :4]
        d = np.uint8(fd[:, 4:])
        return f, d
    else:
        return np.zeros((4, 0)), np.uint8(np.zeros((128, 0)))

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_util.py Projeto: deokgonkim/python-example

 def testLines(self):
     pass
     from util import lines
     f = open('test_input.txt')
     try:
         for line, item in enumerate(lines(f)):
             print('%d\t %s' % (line, item))
     finally:
         f.close()
     print("test")
     self.failUnless(1 == 1, "Failed")

Exemplo n.º 10

0

Exibir arquivo

Arquivo: data.py Projeto: bheinzerling/subword-sequence-tagging

    def parse_file(self, file):
        sents = split_iter(lines(file), lambda line: line == "")
        sents = islice(filter(bool, sents), self.conf.max_ninst)

        def parse_sent(sent):
            parts = map_assert(
                str.split, lambda parts: len(parts) in {3, 7}, sent)
            forms, tags = zip(*map(lambda ps: (ps[0], ps[-1]), parts))
            assert len(forms) == len(tags) == len(sent)
            return [
                {"form": form, "ner": tag} for form, tag in zip(forms, tags)]

        return list(map_skip_assert_error(parse_sent, sents, verbose=True))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: tcpdump.py Projeto: m3z/ToMaTo

def downloadCaptureUri(host, name, onlyLatest=False):
	filename = "%s.pcap" % name
	path = host.getHostServer().randomFilename()
	if onlyLatest:
		print path
		latest = util.lines(host.execute("ls -t1 %s | head -n1" % _remoteDir(name)))[0]
		if latest:
			fileutil.copy(host, "%s/%s" % (_remoteDir(name), latest), path)
	else:
		host.execute("tcpslice -w %s %s/*" % (path, _remoteDir(name)))
	if not fileutil.existsFile(host, path) or not fileutil.fileSize(host, path):
		raise fault.new("No packages captured yet")
	return host.getHostServer().downloadGrant(path, filename=filename)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: playlists.py Projeto: mizlan/featherweight

def show_playlist(playlist_name):
    filename = f'{playlist_name}.playlist'
    playlist_dir = file_management.get_playlists_path()
    playlist_dest = os.path.join(playlist_dir, filename)

    if not os.path.exists(playlist_dest):
        print(f'playlist does not exist: {playlist_dest}')
        raise KeyError

    res = [playlist_name]
    for song_id in util.lines(open(playlist_dest)):
        res.append(f'[{song_id}] {songs.get_song_info(song_id)["title"]}')

    return res

Exemplo n.º 13

0

Exibir arquivo

Arquivo: simple_markup.py Projeto: msiahaan/sword_modules

def run():
    print '''<div type="book" osisID="%s" canonical="true">
<title type="main">%s</title>''' % (bookID, title)

    for line in lines(sys.stdin):
        line = re.sub(
            chapterPattern,
            r'<chapter osisID="%s." chapterTitle="\1">\n<title type="chapter">\1</title>'
            % bookID, line)
        line = re.sub(versePattern,
                      r'<verse osisID="%s.\1.\2">\3</verse>' % bookID, line)
        line = re.sub(r'\*\*', r'</chapter>', line)
        print line.strip()
    print '</div>'

Exemplo n.º 14

0

Exibir arquivo

Arquivo: embedding.py Projeto: Willinki/Just-News

 def __init__(self, path, words, dim=300, normalize=True, **kwargs):
     seen = []
     vs = {}
     for line in lines(path):
         split = line.split()
         w = split[0]
         if w in words:
             seen.append(w)
             vs[w] = np.array(list(map(float, split[1:])), dtype='float32')
     self.iw = seen
     self.wi = {w: i for i, w in enumerate(self.iw)}
     self.m = np.vstack(vs[w] for w in self.iw)
     if normalize:
         self.normalize()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: playlists.py Projeto: mizlan/featherweight

def add_song_to_playlist(song_id: str, playlist_name):
    filename = f'{playlist_name}.playlist'
    playlist_dir = file_management.get_playlists_path()
    playlist_dest = os.path.join(playlist_dir, filename)

    if not os.path.exists(playlist_dest):
        raise KeyError(f'playlist does not exist: {playlist_dest}')

    if song_id not in file_management.get_song_ids():
        raise KeyError('song id does not exist')

    current_songs = util.lines(open(playlist_dest))

    if song_id in current_songs:
        print('warning! song {song_id} already in current songs! type "y" to continue')
        if input('>') != 'y':
            return

    with open(playlist_dest, 'a') as playlist:
        playlist.write(song_id + '\n')

Exemplo n.º 16

0

Exibir arquivo

 def parse(filename):
     return ContinuousWood(util.lines(filename))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: ifaceutil.py Projeto: joanmarkt/ToMaTo

def interfaceExists(host, iface):
	return util.lines(host.execute("[ -d /sys/class/net/%s ]; echo $?" % iface))[0] == "0"

Exemplo n.º 18

0

Exibir arquivo

Arquivo: ifaceutil.py Projeto: joanmarkt/ToMaTo

def interfaceBridge(host, iface):
	return util.lines(host.execute("[ -d /sys/class/net/%s/brport/bridge ] && basename $(readlink /sys/class/net/%s/brport/bridge)" % (iface, iface)))[0]

Exemplo n.º 19

0

Exibir arquivo

Arquivo: ifaceutil.py Projeto: joanmarkt/ToMaTo

def bridgeExists(host, bridge):
	return util.lines(host.execute("[ -d /sys/class/net/%s/brif ]; echo $?" % bridge))[0] == "0"

Exemplo n.º 20

0

Exibir arquivo

Arquivo: file_management.py Projeto: mizlan/featherweight

def get_song_ids():

    dest = get_ids_path()

    with open(dest) as f:
        return util.lines(f)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: searchmeasure.py Projeto: Tenbatsu24/TCS

Usage: 'measure dict-file'
Pearls of Computer Science, Week 2
"""

# standard module to access command-line parameter list sys.argv
import sys
# standard module containing process_time function
import time

from ordsearch import linear
from ordsearch import binary

# read words from dictionary file
import util

words = util.lines(sys.argv[0])

# ask for the first word
value = input("Search for first word? ")
# continue as long as a word was typed
while value != "":
    # measure time for linear searching
    lstart = time.process_time()
    lresult = linear(words, value)
    lend = time.process_time()
    # time values are fractions of seconds;
    # multiply by a million and round to get microseconds
    ltime = round((lend - lstart) * 1000000)

    # measure time for binary searching
    bstart = time.process_time()

Exemplo n.º 22

0

Exibir arquivo

    parR = np.empty(numBins, np.dtype('float64'))
    parRerr = np.empty(numBins, np.dtype('float64'))
    parL = np.empty(numBins, np.dtype('float64'))
    parLerr = np.empty(numBins, np.dtype('float64'))

    for ibin in range(0, numBins):
        hists.append(
            r.TH1D("tmp_" + str(ibin), "tmp_" + str(ibin), 400, 0., 2.))
        energies.append(int(ibin * energyBin))
    energies.append(maxEnergy)

    if numBins > len(ut.colors()):
        for i in range(0, numBins - len(ut.colors())):
            ut.colors().append(ut.colors()[i])
            ut.alpha().append(0.3)
            ut.lines().append(ut.lines()[i])
            ut.width().append(ut.width()[i])
            ut.fill().append(3001)

    leg = r.TLegend(0.25, 0.55, 0.3, 0.85)
    leg.SetTextFont(132)
    leg.SetTextSize(0.05)
    leg.SetFillColor(0)
    leg.SetFillStyle(0)

    ifile = "/eos/user/c/cneubuse/miniCalo2/pred/stage" + str(st) + "/out.root"

    miX = 0.8
    maX = 1.18
    #  if args.stage>1:
    #    miX=0.8

Exemplo n.º 23

0

Exibir arquivo

Arquivo: data.py Projeto: bheinzerling/subword-sequence-tagging

 def parse_file(self, file):
     sents = split_iter(lines(file), lambda line: line == "")
     sents = islice(filter(bool, sents), self.conf.max_ninst)
     sents = map("\n".join, sents)
     return [conllu.parse(sent)[0] for sent in sents]

Exemplo n.º 24

0

Exibir arquivo

Arquivo: subreddit_run.py Projeto: clear-datacenter/socialsent

            continue
        print proc_num, "Running", name
        subredditgen.main(name)
        word_dict = util.load_pickle(DICTS.format(name))
        word_dict.filter_extremes(no_above=0.1, no_below=100)
        to_keep = sorted(word_dict.dfs, key=lambda w : word_dict.dfs[w], reverse=True)[:5000]
        word_dict.filter_tokens(good_ids=to_keep)
        sub_vecs = create_representation("SVD", constants.SUBREDDIT_EMBEDDINGS.format(name))
        pos_seeds, neg_seeds = seeds.twitter_seeds()
        sub_vecs = sub_vecs.get_subembed(set(word_dict.token2id.keys()).union(pos_seeds).union(neg_seeds))
        pols = polarity_induction_methods.bootstrap(sub_vecs, pos_seeds, neg_seeds, return_all=True,
                nn=25, beta=0.9, num_boots=50, n_procs=10)
        util.write_pickle(pols, POLARITIES + name + ".pkl")

if __name__ == "__main__":
    queue = Queue()
    id = int(sys.argv[1])
    valid_ids = set(range(250, 256))
    for i, line in enumerate(util.lines(NAMES)):
        if i in valid_ids:
            name = line.split()[0]
            queue.put(name)
    print queue.qsize()
    procs = [Process(target=worker, args=[i, queue]) for i in range(1)]
    for p in procs:
        p.start()
    for p in procs:
        p.join()

Exemplo n.º 25

0

Exibir arquivo

Arquivo: playlists.py Projeto: mizlan/featherweight

    group.add_argument('-l', '--list', action="store_true")
    group.add_argument('-c', '--create', type=str, metavar='<PLAYLIST>')
    group.add_argument('-d', '--delete', type=str, metavar='<PLAYLIST>')
    group.add_argument('-a', '--add-song', type=str, metavar='<PLAYLIST>')
    group.add_argument('-r', '--remove-song', type=str, metavar='<PLAYLIST>')

    parser.add_argument('-s', '--song', type=str, metavar='<SONG NAME>')
    args = parser.parse_args()
    # print(args)
    if args.list:
        playlist_dir = file_management.get_playlists_path()
        for filename in os.listdir(playlist_dir):
            abspath = os.path.join(playlist_dir, filename)
            print(os.path.splitext(filename)[0])
            for song_id in util.lines(open(abspath)):
                print(f'[{song_id}] {songs.get_song_info(song_id)["title"]}')

    elif args.create is not None:
        create_playlist(args.create)

    elif args.delete is not None:
        delete_playlist(args.delete)

    elif args.add_song is not None:
        if args.song is None:
            sys.exit('provide a song (-s)')
        add_song_to_playlist(args.song, args.add_song)

    elif args.remove_song is not None:
        if args.song is None:

Exemplo n.º 26

0

Exibir arquivo

Arquivo: 2.10.py Projeto: eggied97/inf_pearl2

__author__ = 'Egbert'

from ordsearch import binary
from ordsearch import linear
from util import lines
import searchmeasure


print(binary(lines("Unabr.dict"),"eagle"))
print(binary(lines("Unabr.dict"),"zygose"))

searchmeasure.search("Unabr.dict","eagle")

Exemplo n.º 27

0

Exibir arquivo

Arquivo: songs.py Projeto: mizlan/featherweight

    songs_dir = file_management.get_songs_path()
    for filepath in glob.glob(os.path.join(songs_dir, f'{song_id}.*')):
        abspath = os.path.join(songs_dir, filepath)
        print(f'deleting {abspath}')
        os.remove(abspath)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Song configuration')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-l', '--list', action="store_true")
    group.add_argument('-a', '--add-url', type=str, metavar='<URL>')
    group.add_argument('-d', '--delete', type=str, metavar='<ID>')
    args = parser.parse_args()
    # print(args)
    if args.list:
        ids_path = file_management.get_ids_path()
        all_ids = util.lines(open(ids_path))
        if len(all_ids) == 0:
            print('no songs!')
        for song_id in all_ids:
            json_data = get_song_info(song_id)
            print(f'[{song_id}] {json_data["title"]}')

    elif args.add_url is not None:
        link = args.add_url
        add_song(link)

    elif args.delete is not None:
        song_id = args.delete
        remove_song(song_id)

Exemplo n.º 28

0

Exibir arquivo

Arquivo: ifaceutil.py Projeto: m3z/ToMaTo

def interfaceBridge(host, iface):
	try:
		return util.lines(host.execute("[ -d /sys/class/net/%s/brport/bridge ] && basename $(readlink /sys/class/net/%s/brport/bridge)" % (util.identifier(iface), util.identifier(iface))))[0]
	except exceptions.CommandError:
		return False

Exemplo n.º 29

0

Exibir arquivo

class Transformer():

    nspecial_symbols_segment1 = 2  # [CLS] sent1... [SEP]
    nspecial_symbols_segment2 = 1  # sent2... [SEP]
    add_tokens_key = 'additional_special_tokens'
    supported_langs = set(
        lines(Path(__file__).parent / "data" / "bert_langs.wiki"))

    def __init__(self,
                 model_name,
                 device=None,
                 max_len=None,
                 auto_model_cls=AutoModel,
                 only_tokenizer=False,
                 custom_n_hidden=None,
                 custom_n_layers=None):
        super().__init__()
        self.randinit = model_name.endswith('-randinit')
        if self.randinit:
            model_name = model_name[:-len('-randinit')]
        self.model_name = model_name
        self.device = device or _device
        do_lower_case = "uncased" in model_name
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_name, do_lower_case=do_lower_case)
        for name in 'mask cls sep bos eos'.split():
            token = getattr(self.tokenizer, name + '_token')
            setattr(self, name.upper(), token)
        # self.begin_mention_idx = self.tokenizer.convert_tokens_to_ids(
        #     self.BEGIN_MENTION)

        if self.model_name.startswith('roberta'):
            self.BEGIN_MENTION = 'madeupword0000'
            self.END_MENTION = 'madeupword0001'
            self.add_special_symbols = self.add_special_symbols_roberta
        else:
            self.BEGIN_MENTION = '[unused0]'
            self.END_MENTION = '[unused1]'
            self.add_special_symbols = self.add_special_symbols_bert
        self.BEGIN_MENTION_IDX = self.tokenizer.convert_tokens_to_ids(
            self.BEGIN_MENTION)
        self.begin_mention_idx = self.BEGIN_MENTION_IDX
        self.END_MENTION_IDX = self.tokenizer.convert_tokens_to_ids(
            self.END_MENTION)
        additional_special_tokens = [self.BEGIN_MENTION, self.END_MENTION]
        self.tokenizer.add_special_tokens(
            {self.add_tokens_key: additional_special_tokens})
        self.max_len = max_len or self.tokenizer.max_len
        self.pad_idx = self.tokenizer.pad_token_id
        self.mask_idx = self.tokenizer.mask_token_id
        self.vocab_size = len(self.tokenizer)

        if not only_tokenizer:
            if self.randinit:
                model_config = AutoConfig.from_pretrained(self.model_name)
                print('creating model with random init', self.model_name)
                if custom_n_hidden:
                    ratio = model_config.intermediate_size // model_config.hidden_size
                    model_config.hidden_size = custom_n_hidden
                    model_config.intermediate_size = ratio * custom_n_hidden
                if custom_n_layers:
                    model_config.num_hidden_layers = custom_n_layers
                self.model = auto_model_cls.from_config(model_config)
                print('custom model_config:', model_config)
            else:
                print('loading model', self.model_name)
                self.model = auto_model_cls.from_pretrained(model_name)
            word_emb = self.model.get_input_embeddings().weight
            self.dim = word_emb.size(1)
            device_count = torch.cuda.device_count()
            self.model.to(device=self.device)

    def update_special_tokens(self, additional_special_tokens):
        current = self.tokenizer.special_tokens_map[self.add_tokens_key]
        self.tokenizer.add_special_tokens(
            {self.add_tokens_key: current + additional_special_tokens})

    def __call__(self, *args, **kwargs):
        return self.model(*args, **kwargs)

    def tokenize(self, text, masked_idxs=None):
        if isinstance(text, str):
            tokenized_text = self.tokenizer.tokenize(text)
            if masked_idxs is not None:
                for idx in masked_idxs:
                    tokenized_text[idx] = self.MASK
            tokenized = self.add_special_symbols(tokenized_text)
            return tokenized
        return list(map(self.tokenize, text))

    def add_special_symbols_bert(self, tokenized_text):
        return [self.CLS] + tokenized_text + [self.SEP]

    def add_special_symbols_roberta(self, tokenized_text):
        return [self.BOS] + tokenized_text + [self.EOS]

    def tokenize_sentence_pair(self, sent1, sent2):
        tokenized_sent1 = self.tokenizer.tokenize(sent1)
        tokenized_sent2 = self.tokenizer.tokenize(sent2)
        return self.add_special_symbols_sent_pair(tokenized_sent1,
                                                  tokenized_sent2)

    def add_special_symbols_sent_pair(self, tokenized_sent1, tokenized_sent2):
        return ([self.CLS] + tokenized_sent1 + [self.SEP] + tokenized_sent2 +
                [self.SEP])

    def tokenize_to_ids(self,
                        text,
                        masked_idxs=None,
                        pad=True,
                        max_len=None,
                        clip_long_seq=False):
        tokens = self.tokenize(text, masked_idxs)
        return self.convert_tokens_to_ids(tokens,
                                          pad=pad,
                                          max_len=max_len,
                                          clip_long_seq=clip_long_seq)

    def tokenize_sentence_pair_to_ids(self, sent1, sent2):
        tokenized_sent1 = self.tokenizer.tokenize(sent1)
        segment1_len = len(tokenized_sent1) + self.nspecial_symbols_segment1
        tokenized_sent2 = self.tokenizer.tokenize(sent2)
        segment2_len = len(tokenized_sent2) + self.nspecial_symbols_segment2
        tokenized_sents = self.add_special_symbols(tokenized_sent1,
                                                   tokenized_sent2)
        padded_ids, padding_mask = self.convert_tokens_to_ids(tokenized_sents)
        segment_ids = self.segment_ids(segment1_len, segment2_len)
        return padded_ids, padding_mask, segment_ids

    def mask_mention_and_tokenize_context(self, collapse_mask, *, left_ctx,
                                          mention, right_ctx, **kwargs):
        left_ctx_tokenized = self.tokenize(left_ctx)[:-1]  # remove [SEP]
        if collapse_mask:
            masked_mention = [self.MASK]
        else:
            mention_tokenized = self.tokenize(mention)
            masked_mention = [self.MASK] * len(mention_tokenized)
        right_ctx_tokenized = self.tokenize(right_ctx)[1:]  # remove [CLS]
        tokens = left_ctx_tokenized + masked_mention + right_ctx_tokenized
        return tokens

    def mask_mention_and_tokenize_context_to_ids(self,
                                                 left_ctx,
                                                 mention,
                                                 right_ctx,
                                                 collapse_mask=True,
                                                 pad=True):
        tokens = self.mask_mention_and_tokenize_context(
            collapse_mask=collapse_mask,
            left_ctx=left_ctx,
            mention=mention,
            right_ctx=right_ctx)
        return tokens, self.convert_tokens_to_ids(tokens, pad=pad)

    def mask_mentions_and_tokenize_contexts_to_ids(self,
                                                   mentions_and_contexts,
                                                   collapse_mask=True):
        tokens = [
            self.mask_mention_and_tokenize_context(collapse_mask=collapse_mask,
                                                   **ment_ctx)
            for ment_ctx in mentions_and_contexts
        ]
        return tokens, self.convert_tokens_to_ids(tokens)

    def convert_tokens_to_ids(self,
                              tokens,
                              pad=True,
                              max_len=None,
                              clip_long_seq=False):
        max_len = max_len or self.max_len
        if not tokens:
            dummy = torch.tensor([]).to(device=self.device)
            if pad:
                return dummy.to(dtype=torch.long), dummy.to(dtype=torch.uint8)
            return dummy
        elif isinstance(tokens[0], list):
            token_idss = map(self.tokenizer.convert_tokens_to_ids, tokens)
            padded_ids = torch.zeros(
                (len(tokens, ), max_len), dtype=torch.long) + self.pad_idx
            for row_idx, token_ids in enumerate(token_idss):
                token_ids = torch.tensor(token_ids)
                if clip_long_seq:
                    token_ids = token_ids[:max_len]
                padded_ids[row_idx, :len(token_ids)] = token_ids
            padded_ids = padded_ids.to(device=self.device)
            mask = padded_ids != self.pad_idx
            return padded_ids, mask
        token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
        ids = torch.tensor([token_ids]).to(device=self.device)
        if clip_long_seq:
            ids = ids[:, :max_len]
        else:
            assert ids.size(
                1
            ) <= max_len, f'{ids.size(1)} > {max_len}\n{len(tokens)} {tokens}'
        if pad:
            padded_ids = torch.zeros(1, max_len).to(ids) + self.pad_idx
            padded_ids[0, :ids.size(1)] = ids
            mask = torch.zeros(1, max_len).to(ids)
            mask[0, :ids.size(1)] = 1
            return padded_ids, mask
        else:
            return ids

    def subword_tokenize(self,
                         tokens,
                         mask_start_idx=None,
                         mask_end_idx=None,
                         add_mask_start_end_markers=False,
                         collapse_mask=True,
                         apply_mask=True,
                         add_special_symbols=True):
        """Segment each token into subwords while keeping track of
        token boundaries.

        Parameters
        ----------
        tokens: A sequence of strings, representing input tokens.

        Returns
        -------
        A tuple consisting of:
            - A list of subwords, flanked by the required special symbols.
            - An array of indices into the list of subwords, indicating
                that the corresponding subword is the start of a new
                token. For example, [1, 3, 4, 7] means that the subwords
                1, 3, 4, 7 are token starts, while all other subwords
                (0, 2, 5, 6, 8...) are in or at the end of tokens.
                This list allows selecting Bert hidden states that
                represent tokens, which is necessary in sequence
                labeling.
        """
        if mask_start_idx is not None:
            try:
                mask_starts = list(iter(mask_start_idx))
            except TypeError:
                mask_starts = [mask_start_idx]
            if mask_end_idx is None:
                assert len(mask_starts) == 1
                mask_ends = [mask_starts[0] + 1]
            else:
                try:
                    mask_ends = list(iter(mask_end_idx))
                except TypeError:
                    mask_ends = [mask_end_idx]

            mask_start_ends = list(reversed(list(zip(mask_starts, mask_ends))))
            if apply_mask:
                for mask_start, mask_end in mask_start_ends:
                    if collapse_mask:
                        mask_len = 1
                    else:
                        mention = ' '.join(tokens[mask_start:mask_end])
                        mention_subw = self.tokenize(mention)[1:-1]
                        mask_len = len(mention_subw)
                    tokens = (tokens[:mask_start] + [self.MASK] * mask_len +
                              tokens[mask_end:])
            if add_mask_start_end_markers:
                for mask_start, mask_end in mask_start_ends:
                    if apply_mask:
                        if collapse_mask:
                            mask_len = 1
                        else:
                            mention = ' '.join(tokens[mask_start:mask_end])
                            mention_subw = self.tokenize(mention)[1:-1]
                            mask_len = len(mention_subw)
                        mention = [self.MASK] * mask_len
                    else:
                        mention = tokens[mask_start:mask_end]
                    tokens = (tokens[:mask_start] + [self.BEGIN_MENTION] +
                              mention + [self.END_MENTION] + tokens[mask_end:])
                # account for inserted mention markers
                new_mask_starts = [
                    i for i, t in enumerate(tokens) if t == self.BEGIN_MENTION
                ]
                new_mask_ends = [
                    i + 1 for i, t in enumerate(tokens)
                    if t == self.END_MENTION
                ]
                mask_start_ends = list(
                    reversed(list(zip(new_mask_starts, new_mask_ends))))
        subwords = list(map(self.tokenizer.tokenize, tokens))
        subword_lengths = list(map(len, subwords))
        subwords = list(flatten(subwords))
        if add_special_symbols:
            subwords = self.add_special_symbols(subwords)
            offset = 1
            # + 1: assumes one special symbol is prepended to the input sequence
        else:
            offset = 0
        token_start_idxs = offset + np.cumsum([0] + subword_lengths[:-1])
        if mask_start_idx is not None:
            return subwords, token_start_idxs, mask_start_ends
        return subwords, token_start_idxs, None

    def subword_tokenize_to_ids(self,
                                tokens,
                                mask_start_idx=None,
                                mask_end_idx=None,
                                add_mask_start_end_markers=False,
                                collapse_mask=True,
                                apply_mask=True,
                                return_mask_mask=False,
                                return_mask_start_end=False,
                                max_len=None,
                                add_special_symbols=True):
        """Segment each token into subwords while keeping track of
        token boundaries and convert subwords into IDs.

        Parameters
        ----------
        tokens: A sequence of strings, representing input tokens.

        Returns
        -------
        A tuple consisting of:
            - A list of subword IDs, including IDs of the required
                special symbols.
            - A mask indicating padding tokens.
            - An array of indices into the list of subwords. See
                doc of subword_tokenize.
        """
        max_len = max_len or self.max_len
        subwords, token_start_idxs, mask_start_ends = self.subword_tokenize(
            tokens,
            mask_start_idx=mask_start_idx,
            mask_end_idx=mask_end_idx,
            add_mask_start_end_markers=add_mask_start_end_markers,
            collapse_mask=collapse_mask,
            apply_mask=apply_mask,
            add_special_symbols=add_special_symbols)
        subword_ids, padding_mask = self.convert_tokens_to_ids(subwords,
                                                               max_len=max_len)
        token_starts = torch.zeros(1, max_len).to(subword_ids)
        token_starts[0, token_start_idxs] = 1
        if return_mask_mask:
            mask_mask = torch.zeros(1, max_len).to(subword_ids)
            for mask_start, mask_end in mask_start_ends:
                token_mask_idxs = list(range(mask_start, mask_end))
                subw_mask_idxs = token_start_idxs[token_mask_idxs]
                mask_mask[0, subw_mask_idxs] = 1
            if return_mask_start_end:
                mask_start_end = torch.zeros(1, max_len).to(subword_ids)
                # this only works if there are fewer than seq_len // 2 masks
                for i, (mask_start, mask_end) in enumerate(mask_start_ends):
                    token_mask_idxs = list(range(mask_start, mask_end))
                    subw_mask_idxs = token_start_idxs[token_mask_idxs]
                    mask_start_end[0, 2 * i] = int(subw_mask_idxs[0])
                    mask_start_end[0, 2 * i + 1] = int(subw_mask_idxs[-1])
                return (subword_ids, padding_mask, token_starts, mask_mask,
                        mask_start_end)
            else:
                return subword_ids, padding_mask, token_starts, mask_mask
        return subword_ids, padding_mask, token_starts

    def segment_ids(self, segment1_len, segment2_len, pad=True, max_len=None):
        max_len = max_len or self.max_len
        npad = max_len - segment1_len - segment2_len
        ids = [0] * segment1_len + [1] * segment2_len + [0] * npad
        assert len(ids) == max_len
        return torch.tensor([ids]).to(device=self.device)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: defines.py Projeto: ponty/softusbduino

def _intdef_ids():
    intdef_ids = Bunch([(x, i) for i, x in enumerate(
        lines(INTDEFS_CSV)) if x.strip()])
    return intdef_ids

Exemplo n.º 31

0

Exibir arquivo

Arquivo: registers.py Projeto: ponty/softusbduino

def _register_id_map():
    return Bunch([(x, i) for i, x in enumerate(lines(REGISTERS_CSV))])

Exemplo n.º 32

0

Exibir arquivo

import argparse
import os
import util
import subprocess
import file_management

plp = file_management.get_playlists_path()
sgp = file_management.get_songs_path()

parser = argparse.ArgumentParser()
parser.add_argument('-p', '--playlist', type=str, metavar='<TITLE>')
args = parser.parse_args()

playlist = args.playlist

aspath = os.path.join(plp, f'{playlist}.playlist')

with open(aspath) as f:
    for line in util.lines(f):
        song_dest = os.path.join(sgp, f'{line}.m4a')
        # subprocess.run(['ffplay', '-nodisp', '-nostats', '-hide_banner', song_dest])
        subprocess.run(['afplay', song_dest])

Exemplo n.º 33

0

Exibir arquivo

Arquivo: subreddit_run.py Projeto: wusix2011/socialsent

        sub_vecs = create_representation(
            "SVD", constants.SUBREDDIT_EMBEDDINGS.format(name))
        pos_seeds, neg_seeds = seeds.twitter_seeds()
        sub_vecs = sub_vecs.get_subembed(
            set(word_dict.token2id.keys()).union(pos_seeds).union(neg_seeds))
        pols = polarity_induction_methods.bootstrap(sub_vecs,
                                                    pos_seeds,
                                                    neg_seeds,
                                                    return_all=True,
                                                    nn=25,
                                                    beta=0.9,
                                                    num_boots=50,
                                                    n_procs=10)
        util.write_pickle(pols, POLARITIES + name + ".pkl")


if __name__ == "__main__":
    queue = Queue()
    id = int(sys.argv[1])
    valid_ids = set(range(250, 256))
    for i, line in enumerate(util.lines(NAMES)):
        if i in valid_ids:
            name = line.split()[0]
            queue.put(name)
    print queue.qsize()
    procs = [Process(target=worker, args=[i, queue]) for i in range(1)]
    for p in procs:
        p.start()
    for p in procs:
        p.join()