Пример #1
0
def write_to_file(output_file, prices):
    file_exists = os.path.isfile(output_file)
    with open(output_file, "a") as f:
        if not file_exists:
            f.write(",".join(sorted(prices[0].keys())) + "\n")
        for p in prices:
            sorted_values = list(p.items()) | sort() | select(lambda x: x[1])
            f.write((sorted_values | select(str) | join(",")) + "\n")
Пример #2
0
def test_pipe():
    def fib():
        a, b = 0, 1
        while True:
            yield a
            a, b = b, a + b

    # 计算小于4000000的斐波那契数中的偶数之和
    amount = fib() | where(lambda x: x % 2 == 0) | take_while(
        lambda x: x < 4000000) | add()
    print(amount)

    # 读取文件,统计文件中每个单词出现的次数,然后按照次数从高到低对单词排序
    with open('argparse.py') as f:
        fs = f.read()
        print(findall('\w+', fs))
        print(fs
              | Pipe(lambda x: findall('\w+', x))
              # | Pipe(lambda x: (i for i in x if i.strip()))
              | groupby(lambda x: x)
              | select(lambda x: (x[0], (x[1] | count)))
              | sort(key=lambda x: x[1], reverse=True))
Пример #3
0
if os.path.exists(tag_list_path):
    with open(tag_list_path) as tag_list_file:
        logging.info("Processing " + tag_list_path)
        for key in tag_list_file:
            key = key.strip().decode("utf-8")
            if key in vocabs:
                ktag_to_utags[key].add(key)

if len(ktag_to_utags) == 0:
    raise BaseException("Cannot find tag.csv or labelled_word.csv")

with open(unlabelled_word_path) as unlabelled_word_file:
    with open(result_path, "w") as result_file:
        for word in [
                line.strip().decode("utf-8") for line in unlabelled_word_file
        ]:
            try:
                sim_tags_info = [
                    (tag, model.n_similarity([word], ktag_to_utags[tag]))
                    for tag in ktag_to_utags.keys()
                ] | sort(key=lambda tag_info: tag_info[1],
                         reverse=True) | as_list
                sim_tags_info = [
                    t[0] + "," + str(t[1]) for t in sim_tags_info[0:10]
                ]
                output_line = "\t".join([word] + sim_tags_info)
                result_file.write(output_line.encode("utf-8") + "\n")
            except Exception, ex:
                logging.exception(ex)
                logging.warning("Cannot tag the word " + word)
Пример #4
0
 def upload_time(self):
     tmp = self._versions | pipe.map(lambda v: v.upload_time) \
                          | pipe.sort(reverse=True) | pipe.first
     return tmp
Пример #5
0
            line = line.strip().decode("utf-8")
            if len(line.split("\t")) == 2:
                utag = line.split("\t")[0]
                ktag = line.split("\t")[1]
                if utag in vocabs:
                    ktag_to_utags[ktag].add(utag)
if os.path.exists(tag_list_path):
    with open(tag_list_path) as tag_list_file:
        logging.info("Processing " + tag_list_path)
        for key in tag_list_file:
            key = key.strip().decode("utf-8")
            if key in vocabs:
                ktag_to_utags[key].add(key)

if len(ktag_to_utags) == 0:
    raise BaseException("Cannot find tag.csv or labelled_word.csv")

with open(unlabelled_word_path) as unlabelled_word_file:
    with open(result_path, "w") as result_file:
        for word in [line.strip().decode("utf-8") for line in unlabelled_word_file]:
            try:
                sim_tags_info = [(tag, model.n_similarity([word], ktag_to_utags[tag])) for tag in
                                 ktag_to_utags.keys()] | sort(
                    key=lambda tag_info: tag_info[1], reverse=True) | as_list
                sim_tags_info = [t[0] + "," + str(t[1]) for t in sim_tags_info[0:10]]
                output_line = "\t".join([word] + sim_tags_info)
                result_file.write(output_line.encode("utf-8") + "\n")
            except Exception, ex:
                logging.exception(ex)
                logging.warning("Cannot tag the word " + word)