예제 #1
0
    def reload_init(self, first_time, users):
        try:
            try:
                self.Config = utils.Config("debug.json")
            except FileNotFoundError:
                self.Config = utils.Config("config.json")
        except FileNotFoundError:
            self.Config = utils.Config("config.json.template")

        self.nickname = self.Config().main.nick
        self.password = self.Config().osu.irc if self.Config(
        ).osu.irc else None

        self.UPDATE_MSG = self.Config().main.update_msg
        self.FIRST_TIME_MSG = self.Config().main.first_time_msg

        self.user_pref = utils.Utils.create_sqlite_dict(
            "./userpref.db", "userpref")
        self.recommend = utils.Utils.create_sqlite_dict(
            "./recommend.db", "recommend")
        self.start_time = first_time

        self.users = users

        self.Commands = utils.Commands(self, self.Config)
        self.command_funcs = [
            func for func in dir(utils.Commands)
            if callable(getattr(utils.Commands, func))
            and not func.startswith("_")
        ]
예제 #2
0
def main(gpu, path_model, path_corpus, path_config, path_target, path_program,
         path_json, path_img, win_size, path_word2vec):
    MAX_LENGTH = 70

    config = utils.Config(path_config)
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")
    batch_size = config.getint("batch_size")

    print "[info] CONFIG: %s" % path_config
    print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec
    print "[info] LOADED MODEL: %s" % path_model
    print "[info] WORD DIM: %d" % word_dim
    print "[info] STATE DIM: %d" % state_dim
    print "[info] BATCH SIZE: %d" % batch_size

    sents_train, sents_val, vocab, ivocab = \
            utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH)

    cuda.get_device(gpu).use()

    model = utils.load_cxt_model(path_model, path_config, vocab)
    model.to_gpu(gpu)

    sents = parse(vocab, path_target)
    probs = inspect(model, sents)

    words, probs = aggregate(sents, probs, vocab, ivocab, win_size)

    tokens = [ivocab[w] for w in words]

    prob_dist, grid_text = collate(tokens, probs, path_program)

    generate_json(prob_dist, 0.05, path_program, path_json)
    draw_heatmap(np.array(prob_dist), grid_text, path_img)
예제 #3
0
def get_rstdt_wsj_filenames():
    config = utils.Config()

    filenames = []
    for split in ["train", "test"]:
        filenames_ = os.listdir(
            os.path.join(config.getpath("data"), "rstdt", "wsj", split))
        filenames_ = [n for n in filenames_ if n.endswith(".edus")]
        filenames.extend(filenames_)

    assert len(filenames) == 385

    filenames.remove("file1.edus")
    filenames.remove("file2.edus")
    filenames.remove("file3.edus")
    filenames.remove("file4.edus")
    filenames.remove("file5.edus")

    filenames = [n[:-len(".out.edus")] for n in filenames]

    filenames.append("wsj_0764")  # "file1"
    filenames.append("wsj_0430")  # "file2"
    filenames.append("wsj_0766")  # "file3"
    filenames.append("wsj_0778")  # "file4"
    filenames.append("wsj_2172")  # "file5"

    return filenames
예제 #4
0
def main():
    config = utils.Config()

    path_out = os.path.join(config.getpath("data"), "aarc_abst")
    utils.mkdir(path_out)

    filenames = os.listdir(config.getpath("aarc"))
    filenames = [n for n in filenames if n.endswith(".txt.utf8")]
    filenames.sort()

    nlp = spacy.load("en_core_web_sm",
                     disable=["tagger", "parser", "ner", "textcat"])

    cnt = 0
    for filename in pyprind.prog_bar(filenames):
        text = extract_abstract(os.path.join(config.getpath("aarc"), filename))
        if text == "":
            # print("No Abstract!: %s" % filename)
            continue
        with open(
                os.path.join(path_out,
                             filename.replace(".txt.utf8", ".doc.tokens")),
                "w") as f:
            doc = nlp(text)
            tokens = [token.text for token in doc]
            assert len(tokens) > 0
            tokens = " ".join(tokens)
            f.write("%s\n" % tokens)
        cnt += 1

    print("Processed %d/%d files" % (cnt, len(filenames)))
예제 #5
0
def main(args):
    config = utils.Config()

    utils.mkdir(os.path.join(config.getpath("data"), "rstdt-vocab"))

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "rstdt", "renamed"))
    filenames = [n for n in filenames if n.endswith(".edus")]
    filenames.sort()

    with open(
            os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing",
                         "concat.edus.heads.deprel"), "w") as f:
        for filename in filenames:
            deprels = utils.read_lines(os.path.join(config.getpath("data"),
                                                    "rstdt", "renamed",
                                                    filename + ".heads"),
                                       process=lambda line: line.split()[-1])
            for deprel in deprels:
                f.write("%s\n" % deprel)

    if args.with_root:
        special_words = ["<root>"]
    else:
        special_words = []
    textpreprocessor.create_vocabulary.run(
        os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing",
                     "concat.edus.heads.deprel"),
        os.path.join(config.getpath("data"), "rstdt-vocab",
                     "deprels.vocab.txt"),
        prune_at=10000000,
        min_count=-1,
        special_words=special_words,
        with_unk=True)
def main(args):
    config = utils.Config()

    utils.mkdir(os.path.join(config.getpath("data"), "rstdt-vocab"))

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "rstdt", "renamed"))
    filenames = [n for n in filenames if n.endswith(".edus")]
    filenames.sort()

    filepaths = [
        os.path.join(config.getpath("data"), "rstdt", "renamed",
                     filename + ".postags") for filename in filenames
    ]

    # Concat
    textpreprocessor.concat.run(
        filepaths,
        os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing",
                     "concat.edus.postags"))

    # Build vocabulary
    if args.with_root:
        special_words = ["<root>"]
    else:
        special_words = []
    textpreprocessor.create_vocabulary.run(
        os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing",
                     "concat.edus.postags"),
        os.path.join(config.getpath("data"), "rstdt-vocab",
                     "postags.vocab.txt"),
        prune_at=50000,
        min_count=-1,
        special_words=special_words,
        with_unk=False)
예제 #7
0
def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt", "segmented"))
    filenames = [n for n in filenames if n.endswith(".txt")]
    filenames.sort()

    utils.mkdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                     "preprocessed"))

    for filename in pyprind.prog_bar(filenames):
        path_seg = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "segmented", filename)
        path_raw = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "raw", filename)
        path_dst = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "preprocessed",
                                filename.replace(".txt", ".edus"))
        # Input
        edus = utils.read_lines(path_seg, process=lambda line: line)
        edus = remove_empty_lines(filename, edus)
        raw_lines = utils.read_lines(path_raw, process=lambda line: line)
        raw_lines = remove_empty_lines(filename, raw_lines)
        assert count_chars(edus) == count_chars(raw_lines)
        # Processing
        edus = convert_edus(edus, raw_lines)
        assert count_chars(edus) == count_chars(raw_lines)
        # Output
        utils.write_lines(path_dst, edus)
예제 #8
0
def main():
    config = utils.Config()

    filename_map = read_filename_map(
        os.path.join(config.getpath("data"), "rstdt", "renamed",
                     "filename_map.txt"))

    with open(
            os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing",
                         "filelist.ptbtokenizer2.txt"), "w") as f:
        for filename_wsj in filename_map.keys():
            filename_ren = filename_map[filename_wsj]
            split = None
            if filename_ren.startswith("train"):
                split = "TRAINING"
            elif filename_ren.startswith("test"):
                split = "TEST"
            else:
                raise ValueError("filename_ren=%s" % filename_ren)
            path_in = os.path.join(config.getpath("rstdt"), split,
                                   filename_wsj.replace(".edus", ""))
            path_out = os.path.join(
                config.getpath("data"), "rstdt", "tmp.preprocessing",
                filename_ren.replace(".edus", ".raw.tokenized"))
            f.write("%s\t%s\n" % (path_in, path_out))
def train_modal():
    tf.reset_default_graph()
    config_model = ultils.Config()
    model_train = model.Model(config_model, 0.5)
    init = tf.initialize_all_variables()
    loss_history = []
    save_file = path + '/weight_model/' + model_train.config.model_name + '/train_model.ckpt'
    saver = tf.train.Saver()

    with tf.Session() as sess:
        if (os.path.exists(save_file + '.index')):
            saver.restore(sess, save_file)
        else:
            sess.run(init)
        for epoch in range(config_model.max_epochs):
            arrment = np.arange(config_model.total_train_img)
            np.random.shuffle(arrment)
            print('Epoch : ' + str(epoch + 1))
            total_loss = model_train.run_epoch(sess,
                                               model_train.config.batch_size,
                                               arrment)
            loss_history.extend(total_loss)

            print('Mean loss: %.1f' % np.mean(total_loss))
            if epoch % 1 == 0:
                save_f = path + '/weight_model/' + model_train.config.model_name + 'epoch_' + str(
                    epoch) + '/train_model.ckpt'
                saver.save(sess, save_f)
                print('Model Saved.')
def main():
    config = utils.Config()

    for split in ["train", "test"]:

        dataset = dataloader.read_rstdt(split=split,
                                        relation_level="coarse-grained",
                                        with_root=False)

        with open(
                os.path.join(config.getpath("data"), "rstdt", "wsj", split,
                             "gold.labeled.nary.ctrees"), "w") as f:
            for data in dataset:
                f.write("%s\n" % " ".join(data.nary_sexp))

        with open(
                os.path.join(config.getpath("data"), "rstdt", "wsj", split,
                             "gold.labeled.bin.ctrees"), "w") as f:
            for data in dataset:
                f.write("%s\n" % " ".join(data.bin_sexp))

        with open(
                os.path.join(config.getpath("data"), "rstdt", "wsj", split,
                             "gold.arcs"), "w") as f:
            for data in dataset:
                arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs]
                f.write("%s\n" % " ".join(arcs))
예제 #11
0
def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "rstdt", "renamed"))
    filenames = [n for n in filenames if n.endswith(".edus")]
    filenames.sort()

    for file_i, filename in enumerate(filenames):
        path_tok = os.path.join(config.getpath("data"), "rstdt",
                                "tmp.preprocessing", filename + ".tokenized")
        path_tok2 = os.path.join(config.getpath("data"), "rstdt",
                                 "tmp.preprocessing",
                                 filename.replace(".edus", ".raw.tokenized"))
        path_out = os.path.join(
            config.getpath("data"), "rstdt", "tmp.preprocessing",
            filename.replace(".edus", ".paragraph.boundaries"))

        try:
            bnds = get_paragraph_boundaries(path_tok, path_tok2)
        except:
            print("Error occured when comparing %s and %s" %
                  (path_tok, path_tok2))
            sys.exit(-1)
        write_boundaries(bnds, path_out)
예제 #12
0
def main():
    config = utils.Config()

    # On board LED
    led = machine.Pin(2, machine.Pin.OUT, machine.Pin.PULL_UP)

    sensor = dht.DHT22(machine.Pin(config.get("dht_gpio", 4)))

    client_id = "esp8266_" + ubinascii.hexlify(machine.unique_id()).format()
    client = MQTTClient(client_id, config.get("mqtt_broker"),
                        config.get("mqtt_port"), config.get("mqtt_user"),
                        config.get("mqtt_passwd"))
    try:
        client.connect()
    except OSError as e:
        # Just report and continue, since publish will try to reconnect
        print("Error when connecting to the MQTT broker")
    else:
        print("Connected to {}".format(config.get("mqtt_broker")))

    # Iterate and publish
    while True:
        sensor.measure()
        led.low()
        client.publish("{}/temperature".format(config.get("mqtt_topic")),
                                      str(sensor.temperature()))
        client.publish("{}/humidity".format(config.get("mqtt_topic")),
                                      str(sensor.humidity()))
        led.high()
        time.sleep(5)
예제 #13
0
def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                     "preprocessed"))
    filenames = [n for n in filenames if n.endswith(".edus")]
    filenames.sort()

    n_skipped = 0
    for file_i, filename in enumerate(filenames):
        path_tok = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "tmp.preprocessing", filename + ".tokenized")
        path_tok2 = os.path.join(
            config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing",
            filename.replace(".edus", ".txt") + ".tokenized")
        path_out = os.path.join(
            config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing",
            filename.replace(".edus", ".paragraph.boundaries"))

        bnds = None
        try:
            bnds = get_paragraph_boundaries(path_tok, path_tok2)
        except:
            print("Skipped %s" % path_tok)
            n_skipped += 1
        if bnds is not None:
            write_boundaries(bnds, path_out)
    print("Skipped %d files." % n_skipped)
예제 #14
0
파일: bot.py 프로젝트: ReinaSakuraba/Poppi
    def __init__(self, *, pool: asyncpg.pool.Pool, **kwargs):
        super().__init__(command_prefix=_get_prefix,
                         description=config.description,
                         pm_help=None,
                         game=discord.Game(name=config.game),
                         **kwargs)

        self.all_commands = utils.CaseInsensitiveDict(self.all_commands)

        self.pool = pool
        self.prefixes = utils.Config('prefixes.json', loop=self.loop)
        self.session = aiohttp.ClientSession(loop=self.loop)
        self.lavalink = lavalink.Client(bot=self,
                                        loop=self.loop,
                                        host=config.lava_host,
                                        rest_port=config.lava_rest,
                                        ws_port=config.lava_ws,
                                        password=config.lava_pass,
                                        player=music.Player)
        self.process = psutil.Process()

        for extension in self.startup_extensions:
            try:
                self.load_extension(extension)
            except Exception as e:
                print(f'Failed to load extension {extension}')
                traceback.print_exc()

        self.loop.create_task(self.init())
예제 #15
0
    def __init__(self):
        self.config = utils.Config('./config.yml')
        self.device = None
        self.summary = {}
        self.dumpPath = None
        self.sysConfig()
        self.setSummary()
        self.pipeRaw = self.loadDataset()
        self.pipeLen = self.pipeRaw['train'].__len__()
        self.pipe = None
        self.pipeIter()
        self.gen = model.Generator(self.config)
        self.dis = model.Discriminator(self.config)

        if self.config.GPU == -1 and self.config.CUDA:
            print('Using MultiGPU')
            self.gen = nn.parallel.DataParallel(self.gen).to(self.device)

        else:
            self.gen = self.gen.to(self.device)

        self.optGen = torch.optim.Adam(self.gen.parameters(),
                                       lr=eval(self.config.LR),
                                       betas=self.config.BETA)
        self.optDis = torch.optim.Adam(self.dis.parameters(),
                                       lr=eval(self.config.LR),
                                       betas=self.config.BETA)
def main():
    config = utils.Config()

    path_corpus_train = config.getpath("prep_corpus") + ".train"
    path_corpus_val = config.getpath("prep_corpus") + ".val"
    corpus_train = load_corpus(path_corpus_train,
                               vocab=path_corpus_train + ".vocab",
                               max_length=1000000000)
    corpus_val = load_corpus(path_corpus_val,
                             vocab=corpus_train.vocab,
                             max_length=1000000000)

    count = np.zeros((101, ))
    count = count_sentence_length(corpus_train, count=count)
    count = count_sentence_length(corpus_val, count=count)

    diff = len(corpus_train) + len(corpus_val) - count.sum()
    utils.logger.debug(
        "[info] Excluded %d sentences of length longer than %d" %
        (diff, len(count) - 1))

    path_out = config.getpath("prep_corpus") + ".histogram.npy"
    np.save(path_out, count)

    plot_histogram(count)
예제 #17
0
	def close(self):
		""" Closes the file and returns a URI with the final address of the file.
		If save_metadata is set, the file metadata is saved in the DHT.
		You can always access to the metadata with File.metadata """
		if self.closed: return
		logger.info('Closing %s'%self.uri.get_readable())
		if self.mode == 'w':
			self.flush(True)
			self.metadata.set('Main:parts', len(self.parts))
			self.metadata.set('Main:length', self.filelength)
			self.metadata.set('Main:hash', self.hasher.hexdigest())
			self.metadata.set('Main:p', '')
			if self.save_metadata:
				# variables used to chain metadata blocks
				puri = self.uri
				pmeta = self.metadata
				not_saved = True
				# crypter used to encrypt the metadata. There is always
				# a crypter to protect against casual atackers, but
				# if there is no Kf the crypter is nearly useless
				if SECURED:
					if self.keys[4]:
						mdencrypter = AES.new(self.keys[4], AES.MODE_CBC, self.uri.get_hd())
					else:
						mdencrypter = AES.new(self.uri.get_hd(), AES.MODE_CBC, self.uri.get_hd())
				else:
					mdencrypter = DummyEncrypter()
				for i in range(0, len(self.parts)):
					pmeta.set('Part:%d'%i, self.parts[i])
					# chain the metadata blocks, each block only with
					# DESC_PER_METAPART references to parts of the file
					if i<len(self.parts)-1 and i%self.DESC_PER_METAPART == self.DESC_PER_METAPART-1:
						nuri=URI(self.uri.uid,utils.random_nick(),'',self.keys)
						nuri.hd=utils.random_string(16,False)
						pmeta.set('Main:n',nuri.get_static())
						m=pmeta.save()
						pmeta.set('Main:p',utils.random_string(self.BLOCK_SIZE-len(m)))
						m=mdencrypter.encrypt(pmeta.save())
						dfs.dht.put(puri.get_hd(),m,puri.nick)
						pmeta=utils.Config()
						pmeta.set('Main:p','')
						puri=nuri
						not_saved=False
					else:
						not_saved=True
				if not_saved:
					m=pmeta.save()
					pmeta.set('Main:p',utils.random_string(self.BLOCK_SIZE-len(m)))
					m=mdencrypter.encrypt(pmeta.save())
					dfs.dht.put(puri.get_hd(),m,puri.nick)

			# Create the final metadata
			for i in range(0,len(self.parts)):
				self.metadata.set('Part:%d'%i,self.parts[i])
		else:
			# In read, free the buffer
			self.buffer = None
		self.closed = True
		return self.uri
예제 #18
0
def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                     "preprocessed"))
    filenames = [n for n in filenames if n.endswith(".edus")]
    filenames.sort()

    n_skipped = 0
    for file_i, filename in enumerate(filenames):
        path_s = os.path.join(
            config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing",
            filename.replace(".edus", ".sentence.boundaries"))
        path_p = os.path.join(
            config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing",
            filename.replace(".edus", ".paragraph.boundaries"))

        if not os.path.exists(path_s):
            print("Skipped %s because %s doesn't exist." % (filename, path_s))
            n_skipped += 1
            continue
        if not os.path.exists(path_p):
            print("Skipped %s because %s doesn't exist." % (filename, path_p))
            n_skipped += 1
            continue

        sbnds = utils.read_lines(
            path_s, process=lambda l: tuple([int(x) for x in l.split()]))
        pbnds = utils.read_lines(
            path_p, process=lambda l: tuple([int(x) for x in l.split()]))

        sbnds_proj, n_edus = project_pbnds_to_sbnds(sbnds=sbnds, pbnds=pbnds)
        if sbnds != sbnds_proj:
            print("Projected paragraph boundaries into the sentence boundaries (+%d): %s" % \
                    (len(sbnds_proj) - len(sbnds), path_s))

        test_boundaries(sbnds_proj, n_edus)

        pbnds = replace_subtrees_with_ids(sbnds=sbnds_proj, pbnds=pbnds)

        write_boundaries(
            sbnds,
            os.path.join(
                config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed",
                filename.replace(".edus", ".sentence.noproj.boundaries")))
        write_boundaries(
            sbnds_proj,
            os.path.join(
                config.getpath("data"), "ptbwsj_wo_rstdt", "preprocessed",
                filename.replace(".edus", ".sentence.proj.boundaries")))
        write_boundaries(
            pbnds,
            os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                         "preprocessed",
                         filename.replace(".edus", ".paragraph.boundaries")))

    print("Skipped %d files." % n_skipped)
예제 #19
0
    def __init__(self):
        self.config = utils.Config()

        super().__init__(
            command_prefix=get_prefix,
            description="A bot that does simple yet marvellous things!",
            case_insensitive=self.config.case_insensitive)

        self.remove_command("help")
예제 #20
0
def main() :
    config = utils.Config()

    utils.mkdir(os.path.join(config.getpath("data"), "rstdt-vocab"))

    filenames = []
    for filename in os.listdir(os.path.join(config.getpath("data"), "rstdt", "wsj", "train")):
        filenames.append(os.path.join(config.getpath("data"), "rstdt", "wsj", "train", filename))
    for filename in os.listdir(os.path.join(config.getpath("data"), "rstdt", "wsj", "test")):
        filenames.append(os.path.join(config.getpath("data"), "rstdt", "wsj", "test", filename))
    filenames = [n for n in filenames if n.endswith(".labeled.bin.ctree")]
    filenames.sort()

    relation_mapper = treetk.rstdt.RelationMapper()

    frelations = []
    crelations = []
    nuclearities = []

    for filename in pyprind.prog_bar(filenames):
        sexp = utils.read_lines(filename, process=lambda line: line)
        sexp = treetk.preprocess(sexp)
        tree = treetk.rstdt.postprocess(treetk.sexp2tree(sexp, with_nonterminal_labels=True, with_terminal_labels=False))

        nodes = treetk.traverse(tree, order="pre-order", include_terminal=False, acc=None)

        part_frelations = []
        part_crelations = []
        part_nuclearities = []
        for node in nodes:
            relations_ = node.relation_label.split("/")
            part_frelations.extend(relations_)
            part_crelations.extend([relation_mapper.f2c(r) for r in relations_])
            part_nuclearities.append(node.nuclearity_label)

        part_frelations.append("<root>")
        part_crelations.append("<root>")

        frelations.append(part_frelations)
        crelations.append(part_crelations)
        nuclearities.append(part_nuclearities)

    fcounter = utils.get_word_counter(lines=frelations)
    ccounter = utils.get_word_counter(lines=crelations)
    ncounter = utils.get_word_counter(lines=nuclearities)

    frelations = fcounter.most_common() # list of (str, int)
    crelations = ccounter.most_common() # list of (str, int)
    nuclearities = ncounter.most_common() # list of (str, int)

    utils.write_vocab(os.path.join(config.getpath("data"), "rstdt-vocab", "relations.fine.vocab.txt"),
                      frelations)
    utils.write_vocab(os.path.join(config.getpath("data"), "rstdt-vocab", "relations.coarse.vocab.txt"),
                      crelations)
    utils.write_vocab(os.path.join(config.getpath("data"), "rstdt-vocab", "nuclearities.vocab.txt"),
                      nuclearities)
예제 #21
0
def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                     "preprocessed"))
    filenames = [n for n in filenames if n.endswith(".paragraph.boundaries")]
    filenames = [
        n.replace(".paragraph.boundaries", ".edus") for n in filenames
    ]
    filenames.sort()

    for filename in filenames:
        # Path
        path_edus = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                 "tmp.preprocessing", filename + ".tokenized")
        path_conll = os.path.join(
            config.getpath("data"), "ptbwsj_wo_rstdt", "tmp.preprocessing",
            filename.replace(".edus", ".sentences.conll"))
        path_out = os.path.join(config.getpath("data"), "ptbwsj_wo_rstdt",
                                "preprocessed", filename + ".postags")

        # Read
        edus = utils.read_lines(
            path_edus,
            process=lambda line: line.split())  # list of list of str
        tokens_e = utils.flatten_lists(edus)  # list of str

        sentences = utils.read_conll(
            path_conll,
            keys=["ID", "FORM", "LEMMA", "POSTAG", "_1", "HEAD",
                  "DEPREL"])  # list of list of {str: str}
        conll_lines = utils.flatten_lists(sentences)  # list of {str: str}
        tokens_s = [conll_line["FORM"]
                    for conll_line in conll_lines]  # list of str
        postags_s = [conll_line["POSTAG"]
                     for conll_line in conll_lines]  # list of str

        # Check whether the number of tokens and that of postags are equivalent
        for token_e, token_s, postag_s in zip(tokens_e, tokens_s, postags_s):
            if token_e != token_s:
                raise ValueError("Error! %s != %s" % (token_e, token_s))

        # Create the POSTAG-version of EDUs
        postag_i = 0
        edus_postag = []
        for edu in edus:
            edu_postag = [postags_s[postag_i + i] for i in range(len(edu))]
            edus_postag.append(edu_postag)
            postag_i += len(edu)

        # Write
        with open(path_out, "w") as f:
            for edu_postag in edus_postag:
                f.write("%s\n" % " ".join(edu_postag))
예제 #22
0
 def __init__(self, ps, **kw):
     kw.setdefault('name', qu.to_snake_case(type(self).__name__))
     kw.setdefault('dtype', tf.float32)
     super().__init__(**kw)
     if isinstance(ps, qu.Config):
         self.cfg = ps
     else:
         self.cfg = qu.Config(**dict(self.cfg_items(ps)))
     cfg = self.cfg
     if cfg.initer_stddev:
         self.initer = ki.TruncatedNormal(stddev=cfg.initer_stddev)
def main():
    config = utils.Config()

    dataset = dataloader.read_scidtb(split="train",
                                     sub_dir="",
                                     relation_level="coarse-grained")
    with open(
            os.path.join(config.getpath("data"), "scidtb", "preprocessed",
                         "train", "gold.arcs"), "w") as f:
        for data in dataset:
            arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs]
            f.write("%s\n" % " ".join(arcs))

    dataset = dataloader.read_scidtb(split="dev",
                                     sub_dir="gold",
                                     relation_level="coarse-grained")
    with open(
            os.path.join(config.getpath("data"), "scidtb", "preprocessed",
                         "dev", "gold", "gold.arcs"), "w") as f:
        for data in dataset:
            arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs]
            f.write("%s\n" % " ".join(arcs))

    dataset = dataloader.read_scidtb(split="dev",
                                     sub_dir="second_annotate",
                                     relation_level="coarse-grained")
    with open(
            os.path.join(config.getpath("data"), "scidtb", "preprocessed",
                         "dev", "second_annotate", "gold.arcs"), "w") as f:
        for data in dataset:
            arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs]
            f.write("%s\n" % " ".join(arcs))

    dataset = dataloader.read_scidtb(split="test",
                                     sub_dir="gold",
                                     relation_level="coarse-grained")
    with open(
            os.path.join(config.getpath("data"), "scidtb", "preprocessed",
                         "test", "gold", "gold.arcs"), "w") as f:
        for data in dataset:
            arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs]
            f.write("%s\n" % " ".join(arcs))

    dataset = dataloader.read_scidtb(split="test",
                                     sub_dir="second_annotate",
                                     relation_level="coarse-grained")
    with open(
            os.path.join(config.getpath("data"), "scidtb", "preprocessed",
                         "test", "second_annotate", "gold.arcs"), "w") as f:
        for data in dataset:
            arcs = ["%s-%s-%s" % (h, d, r) for (h, d, r) in data.arcs]
            f.write("%s\n" % " ".join(arcs))
예제 #24
0
def main():
    config = utils.Config()

    utils.mkdir(os.path.join(config.getpath("data"), "scidtb-vocab"))

    relation_mapper = treetk.rstdt.RelationMapper(corpus_name="scidtb")

    filenames = []

    for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "train")):
        filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "train", filename))

    for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "gold")):
        filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "gold", filename))
    for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "second_annotate")):
        filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "dev", "second_annotate", filename))

    for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "gold")):
        filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "gold", filename))
    for filename in os.listdir(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "second_annotate")):
        filenames.append(os.path.join(config.getpath("data"), "scidtb", "preprocessed", "test", "second_annotate", filename))

    filenames = [n for n in filenames if n.endswith(".edus.tokens")]
    filenames.sort()

    tmp_f_path = os.path.join(config.getpath("data"), "scidtb-vocab", "tmp_f.txt")
    tmp_c_path = os.path.join(config.getpath("data"), "scidtb-vocab", "tmp_c.txt")
    with open(tmp_f_path, "w") as ff, open(tmp_c_path, "w") as fc:
        for filename in filenames:
            lines = utils.read_lines(filename.replace(".edus.tokens", ".arcs"), process=lambda line: line.split())
            assert len(lines) == 1
            line = lines[0]
            arcs = treetk.hyphens2arcs(line)
            fine_relations = [l for h,d,l in arcs]
            coarse_relations = [relation_mapper.f2c(l) for l in fine_relations]
            fine_relations = " ".join(fine_relations)
            coarse_relations = " ".join(coarse_relations)
            ff.write("%s\n" % fine_relations)
            fc.write("%s\n" % coarse_relations)

    utils.build_vocabulary(paths_file=[tmp_f_path],
                           path_vocab=os.path.join(config.getpath("data"), "scidtb-vocab", "relations.fine.vocab.txt"),
                           prune_at=50000,
                           min_count=-1,
                           special_words=["<root>"],
                           with_unk=False)
    utils.build_vocabulary(paths_file=[tmp_c_path],
                           path_vocab=os.path.join(config.getpath("data"), "scidtb-vocab", "relations.coarse.vocab.txt"),
                           prune_at=50000,
                           min_count=-1,
                           special_words=["<root>"],
                           with_unk=False)
예제 #25
0
    def __init__(self, first_time, users, channel=None):
        self.channel = channel

        # region reload_init
        try:
            try:
                self.Config = utils.Config("debug.json")
            except FileNotFoundError:
                self.Config = utils.Config("config.json")
        except FileNotFoundError:
            self.Config = utils.Config("config.json.template")

        self.nickname = self.Config().main.nick
        self.password = self.Config().osu.irc if self.Config(
        ).osu.irc else None

        self.UPDATE_MSG = self.Config().main.update_msg
        self.FIRST_TIME_MSG = self.Config().main.first_time_msg

        self.user_pref = utils.Utils.create_sqlite_dict(
            "./userpref.db", "userpref")
        self.recommend = utils.Utils.create_sqlite_dict(
            "./recommend.db", "recommend")
        self.start_time = first_time

        self.users = users

        self.Commands = utils.Commands(self, self.Config)
        self.command_funcs = [
            func for func in dir(utils.Commands)
            if callable(getattr(utils.Commands, func))
            and not func.startswith("_")
        ]
        # endregion

        logger.debug("Trying nickname " + self.nickname)
        logger.debug("On server " + self.Config().main.server)
        logger.debug("Using password " + (
            self.password if self.password is not None else "\"None\""))
 def generate_new_caption( img_feat):
     tf.reset_default_graph()
     config=utils.Config()
     model_gen=model.Model(config,1)
     save_file =path+ '/weight_model/'+model_gen.config.model_name+'/train_model.ckpt'
     saver = tf.train.Saver()
     with tf.Session() as sess:
         saver.restore(sess, save_file)
         
         captions=model_gen.generate_caption(sess,img_feat)[0:4]
         for caption in captions:
                     print(utils.listToSentence(caption['sen']), 'score: ', caption['score'])
     return captions  
예제 #27
0
    async def reload(self, ctx):
        """Reloads the config file."""

        del self.bot.config

        self.bot.config = utils.Config()

        await utils.embed(
            ctx,
            discord.Embed(
                timestamp=datetime.utcnow(),
                title="Config Reloaded",
                description="All config data has been successfully reloaded."))
def main():
    config = utils.Config()
    process1(os.path.join(config.getpath("rstdt"), "TRAINING"),
             os.path.join(config.getpath("data"), "rstdt", "wsj", "train"))
    process1(os.path.join(config.getpath("rstdt"), "TEST"),
             os.path.join(config.getpath("data"), "rstdt", "wsj", "test"))

    process2(os.path.join(config.getpath("data"), "rstdt", "wsj", "train"),
             os.path.join(config.getpath("data"), "rstdt", "renamed"),
             split="train")
    process2(os.path.join(config.getpath("data"), "rstdt", "wsj", "test"),
             os.path.join(config.getpath("data"), "rstdt", "renamed"),
             split="test")
예제 #29
0
파일: __init__.py 프로젝트: Juanvvc/scfs
def init_default_conf():
    """ Sets up a default configuration for DFS: uses $HOME/.dfs as
	the config dir, reads the default configuration from
	$HOME/.dfs/dfsrc, and sets up the logging system to use the file
	dfs.log in the configuration directory """

    global default_config_dir, default_config, default_log_file, default_config_file, dht

    # Creates a directory to the default config, if it does not exists.
    default_config_dir = os.path.expanduser('~%s.dfs' % os.path.sep)
    # Create the default config path if it does not exists
    if not os.path.exists(default_config_dir): os.mkdir(default_config_dir)
    default_config_file = default_config_dir + os.path.sep + 'dfsrc'
    default_log_file = default_config_dir + os.path.sep + 'dfs.log'
    # Load the default config file
    if not os.path.exists(default_config_file):
        open(default_config_file, 'w').close()
    default_config = utils.Config()
    default_config.load(open(default_config_file, 'r'))

    # Configures the logging system
    utils.configure_logging(
        level=logging.INFO,
        format='%(asctime)s %(name)s %(levelname)s %(message)s',
        datefmt='%H:%M:%S',
        filename=default_log_file,
        filemode='w')

    logging.info('Default configuration: %s' % default_config_file)

    # sets default configuration, if not set
    changed = False
    if not default_config.get('DHT:datadir'):
        default_config.set('DHT:datadir',
                           default_config_dir + os.path.sep + 'dhtdata')
        changed = True
    if not default_config.get('Main:UID'):
        default_config.set('Main:uid', utils.random_string(16))
        changes = True
    if not default_config.get('Main:nick'):
        default_config.set('Main:nick', utils.random_nick())
        changed = True
    if not default_config.get('Keys:kf'):
        logging.warning('There are not file key')
    if not default_config.get('Keys:kd'):
        logging.warning('There are not description key')
    if changed:
        default_config.save(open(default_config_file, 'w'))

    # Default DHT: a local DHT
    dht = DHT.LocalDHT(default_config)
예제 #30
0
def main():
    config = utils.Config()

    filenames = os.listdir(
        os.path.join(config.getpath("data"), "rstdt", "renamed"))
    filenames = [n for n in filenames if n.endswith(".edus")]
    filenames.sort()

    for filename in filenames:
        textpreprocessor.replace_digits.run(
            os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing",
                         filename + ".tokenized.lowercased"),
            os.path.join(config.getpath("data"), "rstdt", "tmp.preprocessing",
                         filename + ".tokenized.lowercased.replace_digits"))