def load_sentiment_seeds():
    """
    加载情感种子(网上搜集的情感集合)
    把情感种子存放到字典中,键为情感种子,值为情感种子得分
    """
    pos_lines = read_lines(pos_seeds_path)
    neg_lines = read_lines(neg_seeds_path)
    neu_lines = read_lines(neu_seeds_path)

    pos_seeds = dict()
    neg_seeds = dict()
    neu_seeds = dict()

    for line in pos_lines:
        if len(line.split(',')) > 2:  # 多个,的情况忽略
            continue
        seed, score = line.split(',')[0], line.split(',')[1]
        pos_seeds[seed] = int(score)
    for line in neg_lines:
        if len(line.split(',')) > 2:
            continue
        seed, score = line.split(',')[0], line.split(',')[1]
        neg_seeds[seed] = int(score)
    for line in neu_lines:
        if len(line.split(',')) > 2:
            continue
        seed, score = line.split(',')[0], line.split(',')[1]
        neu_seeds[seed] = int(score)

    print(len(pos_seeds), len(neg_seeds), len(neu_seeds))

    temp = []
    # 去除冲突的情感词
    for i in pos_seeds:
        flag = 0
        if i in neg_seeds:
            flag = 1
            del neg_seeds[i]
        if i in neu_seeds:
            flag = 1
            del neu_seeds[i]
        if flag:
            temp.append(i)
    for i in temp:
        del pos_seeds[i]
    temp = []
    for i in neg_seeds:
        if i in neu_seeds:
            temp.append(i)
            del neu_seeds[i]
    for i in temp:
        del neg_seeds[i]
    print(len(pos_seeds), len(neg_seeds), len(neu_seeds))
    return pos_seeds, neg_seeds, neu_seeds
예제 #2
0
def brute(docs_path, qrys_path, out_path):
    docs = preprocess(read_lines(docs_path))
    qrys = preprocess(read_lines(qrys_path))

    def process_queries():
        for qry_id, qry in qrys:
            candidates = (doc_id for doc_id, doc in docs if matches(qry, doc))
            retrieved = nlargest(5, candidates)
            yield qry_id, retrieved

    result_strs = imap(to_str, process_queries())
    write_lines(out_path, result_strs)
예제 #3
0
파일: docs.py 프로젝트: c-w/ug4_TweetSearch
def docs(docs_path, qrys_path, out_path):
    docs = preprocess(read_lines(docs_path))
    qrys = preprocess(read_lines(qrys_path))

    index = inverted_index(docs)

    def process_queries():
        for qry_id, qry in qrys:
            inv_lists = (index[term] for term in qry)
            retrieved = simple_kway_merge(inv_lists)[::-1][:5]
            yield qry_id, retrieved

    result_strs = imap(to_str, process_queries())
    write_lines(out_path, result_strs)
def load_sentiment_seeds():
    """
    加载情感种子
    """
    pos_lines = read_lines(pos_seeds_path)
    neg_lines = read_lines(neg_seeds_path)
    neu_lines = read_lines(neu_seeds_path)
    lines = pos_lines + neg_lines + neu_lines
    sentiment_seeds = get_seeds(lines, 1.0)
    pos_seeds = get_seeds(pos_lines, 0.32)
    neg_seeds = get_seeds(neg_lines, 1.0)
    neu_seeds = get_seeds(neu_lines, 0.13)
    pos_seeds, neg_seeds, neu_seeds = norm_seeds(pos_seeds, neg_seeds,
                                                 neu_seeds)
    return sentiment_seeds, pos_seeds, neg_seeds, neu_seeds
예제 #5
0
def main():
    data = read_lines("../resources/aoc2020/input21")
    food = [(line[:line.index(' (')].split(' '),
             line[line.index('(') + 10:-1].split(', ')) for line in data]

    # part 1
    ing_map = dict()
    for f in food:
        for a in f[1]:
            if a in ing_map:
                ing_map.update({a: set(f[0]).intersection(ing_map.get(a))})
            else:
                ing_map.update({a: set(f[0])})

    allergens = reduce(set.union, ing_map.values())
    result1 = sum(
        [len([ing for ing in f[0] if ing not in allergens]) for f in food])
    print("result1 =", result1)  # 2170

    # part 2
    for i1 in ing_map:
        for i2 in ing_map:
            if i1 == i2:
                continue
            in1 = ing_map.get(i1)
            in2 = ing_map.get(i2)
            if len(in1) >= len(in2):
                ing_map.update({i1: in1.difference(in2)})
            else:
                ing_map.update({i2: in2.difference(in1)})

    result2 = ','.join(
        [ing_map.get(ing).pop() for ing in sorted(list(ing_map.keys()))])
    print("result2 =",
          result2)  # nfnfk,nbgklf,clvr,fttbhdr,qjxxpr,hdsm,sjhds,xchzh
예제 #6
0
def total_fuel(input_file: str, fuel: Callable[[int], int] = fuel_calc) -> int:
    """
    for all modules in input, return the mass required to launch them
    using the specified calculation
    """
    inputs = util.read_lines(input_file)
    return sum(fuel(int(mass)) for mass in inputs)
예제 #7
0
    def import_biologic(self, m_file):
        """Function to import CA mpt files from Biologic potentiostats.

        Args:
            file (str): Path to file which will be imported.
        """

        self.lines = util.read_lines(m_file, range(57))
        if "Chrono" in self.lines[3]:
            self.extract_par = [
                [r"ecell", r"Ei", r"Ei\s\(V\)\s*(\S*)"],
                [r"skiprows", r"Nb header lines", r"lines\s:\s*(\d*)\s*"],
            ]

            ext = util.extract_value(self.extract_par, self.lines.values())
            for x in ext:
                setattr(self, x[0], str(x[1]))

            self.file_trimmed = codecs.open(m_file, encoding="cp1252")
            self.data = np.loadtxt(
                self.file_trimmed,
                usecols=(7, 10),
                skiprows=int(self.skiprows),
                delimiter="\t",
            )
            self.time = pandas.DataFrame(
                {"time [s]": self.data[:, 0].tolist()})
            self.icell = pandas.DataFrame(
                {"Icell [mA]": self.data[:, 1].tolist()})
            self.chronodata = pandas.concat([self.chronodata, self.time],
                                            axis=1)
            self.chronodata = pandas.concat([self.chronodata, self.icell],
                                            axis=1)
예제 #8
0
    def import_biologic(self, m_file):
        """Function to import PEIS mpt files from Biologic potentiostats.

        Args:
            file (str): Path to file which will be imported.
        """

        self.lines = util.read_lines(m_file, range(83))
        if "Potentio Electrochemical Impedance Spectroscopy" in self.lines[3]:
            self.extract_par = [
                [r"ecell", r"E (V)", r"E\s\(V\)\s*(\S*)"],
                [r"fi", r"fi                  ", r"^fi\s*(\S*)"],
                [r"fi_unit", r"unit fi", r"\sfi\s*(\S*)"],
                [r"ff", r"ff                  ", r"^ff\s*(\S*)"],
                [r"ff_unit", r"unit ff", r"\sff\s*(\S*)"],
                [r"amplitude", r"Va", r"\(mV\)\s*(\S*)"],
                [r"skiprows", r"Nb header lines", r"lines\s:\s*(\d*)\s*"],
            ]

            ext = util.extract_value(self.extract_par, self.lines.values())
            for x in ext:
                setattr(self, x[0], str(x[1]))

            self.file_trimmed = codecs.open(m_file, encoding="cp1252")
            self.data = np.loadtxt(
                self.file_trimmed,
                usecols=(0, 1, 2),
                skiprows=int(self.skiprows),
                delimiter="\t",
            )
            self.rer = pandas.DataFrame({"re R": self.data[:, 1].tolist()})
            self.imgr = pandas.DataFrame({"img R": self.data[:, 2].tolist()})
            self.peisdata = pandas.concat([self.peisdata, self.rer], axis=1)
            self.peisdata = pandas.concat([self.peisdata, self.imgr], axis=1)
예제 #9
0
    def import_ec4(self, m_file):
        """Imports Nordic Electrochemistry EC4 file format.

        Args:
            file (str): Path to file which will be imported.
        """

        self.lines = util.read_lines(m_file, range(96))
        self.extract_par = [
            [r"vs", r"Start", r"Start\s(\S*.\S)"],
            [r"v1", r"V1", r"V1\s(\S*.\S)"],
            [r"v2", r"V2", r"V2\s(\S*.\S)"],
            [r"rate", r"Rate", r"Rate\s(\S*.\S)"],
        ]
        ext = util.extract_value(self.extract_par, self.lines.values())
        for x in ext:
            setattr(self, x[0], ureg(x[1].replace(",", ".")))

        self.data = np.loadtxt(m_file, usecols=(0, 1, 2), skiprows=96)
        self.ecell = pandas.DataFrame(
            {"Cycle 1: Ecell": self.data[:, 1].tolist()})
        self.cvdata = pandas.concat([self.cvdata, self.ecell], axis=1)
        self.icell = pandas.DataFrame(
            {"Cycle 1: Icell": self.data[:, 2].tolist()})
        self.cvdata = pandas.concat([self.cvdata, self.icell], axis=1)
예제 #10
0
def terms(docs_path, qrys_path, out_path):
    docs = preprocess(read_lines(docs_path))
    qrys = preprocess(read_lines(qrys_path))

    inv_index = inverted_index(docs)

    def process_queries():
        for qry_id, qry in qrys:
            candidates = set(inv_index[qry.pop()])
            for term in qry:
                candidates.intersection_update(inv_index[term])
            retrieved = nlargest(5, candidates)
            yield qry_id, retrieved

    result_strs = imap(to_str, process_queries())
    write_lines(out_path, result_strs)
예제 #11
0
def main():
    data = read_lines("../resources/aoc2020/input13")
    timestamp = int(data[0])
    busses = data[1].split(",")

    # part 1
    relevant = [int(b) for b in busses if b != 'x']
    t1, found, bus = 0, False, None
    while not found:
        t1 = t1 + 1
        for r in relevant:
            if (timestamp + t1) % r == 0:
                found = True
                bus = r
                break

    result1 = bus * t1
    print("result1 =", result1)  # 246

    # part 2
    indexed = []
    for i in range(0, len(busses)):
        if busses[i] != 'x':
            indexed.append((i, int(busses[i])))

    t2, step = 0, indexed[0][1]
    for i in range(1, len(indexed)):
        index = indexed[i][0]
        number = indexed[i][1]
        while (t2 + index) % number != 0:
            t2 = t2 + step
        step = step * number

    result2 = t2
    print("result2 =", result2)  # 939490236001473
def test():
    """
    In console, you will see::

        info
        warning
        error
        critical

    In logfile, you will see::

        debug
        info
        warning
        error
        critical
    """
    path = os.path.join(os.path.dirname(__file__), "SingleFile.log")
    logger = SingleFileLogger(rand_name=True, path=path, reset=True)

    logger.debug("debug")  # nothing
    logger.info("info")  # displayed, but not logged
    logger.warning("warning")  # displayed and logged
    logger.error("error")  # displayed and logged
    logger.critical("critical")  # displayed and logged

    logger.remove_all_handler()

    lines = read_lines(path)
    assert len(lines) == 5
예제 #13
0
def load_train_data(word_voc,
                    tag_voc,
                    label_voc,
                    max_sent_len=100,
                    word_embed_dim=50,
                    sentence_len=False):
    """
    加载训练数据
    :param max_sent_len: 最大句子长度
    :param word_embed_dim: ...
    :return: xx
    """
    # 构造训练数据
    lines = read_lines('./com_data/data_h/Train.csv')
    train_count = len(lines)
    train_num = []
    train_targets_str = []  # targets
    train_target_indices = np.zeros((train_count, ), dtype='int32')
    train_sentence = np.zeros((train_count, max_sent_len),
                              dtype='int32')  # sent
    train_sentence_len = np.zeros((train_count, ), dtype='int32')  # 句子长度
    train_tag = np.zeros((train_count, max_sent_len), dtype='int32')  # tags
    train_position = np.zeros((train_count, max_sent_len),
                              dtype='int32')  # target 在句子中的下标
    train_target = np.zeros((train_count, max_sent_len), dtype='int32')
    train_label = np.zeros((train_count, ), dtype='int32')  # label
    for i in range(train_count):
        line = lines[i]
        items = line.split('|')
        num, target, label = items[:3]
        label_id = label_voc[label]  # label id
        sentence_all = ' '.join(items[3:])
        words_all, tags_all = get_words_tags(sentence_all)
        words, tags = cut_sentence(target, words_all, tags_all,
                                   max_sent_len)  # sentence  截取
        train_sentence_len[i] = len(words)
        target_index = words.index(
            target) if target in words else 0  # target 在句子中的下标
        word_arr, tag_arr, position_arr = \
            get_sentence_ids(words,tags,word_voc,tag_voc,target_index,max_sent_len)

        train_num.append(num)
        train_targets_str.append(target)
        train_target_indices[i] = target_index  # new add 16-12-09
        train_sentence[i, :] = word_arr[:]
        train_tag[i, :] = tag_arr[:]
        train_position[i, :] = position_arr[:]
        if target in word_voc:
            train_target[i, :] = [0] * (
                max_sent_len - len(words)) + [word_voc[target]] * len(words)
        else:
            train_target[i, :] = [0] * max_sent_len
        train_label[i] = label_id
    train_data = [
        train_target_indices, train_sentence, train_tag, train_position,
        train_target, train_label, train_num, train_targets_str
    ]
    if sentence_len:
        train_data.append(train_sentence_len)
    return train_data
예제 #14
0
def load_external_data(word_voc, tag_voc, label_voc, max_sent_len=100, word_embed_dim=50):
    """
    加载外部数据
    """
    lines = read_lines('./external_train_data/external_train.csv')
    test_count = len(lines)
    test_nums = []  # 测试编号
    test_targets_str = []  # targets
    test_sentence = np.zeros((test_count, max_sent_len), dtype='int32')  # sent
    test_tag = np.zeros((test_count, max_sent_len), dtype='int32')  # tags
    test_position = np.zeros((test_count, max_sent_len), dtype='int32')  # target
    test_target = np.zeros((test_count, max_sent_len), dtype='int32')
    for i in range(test_count):
        line = lines[i]
        items = line.split('|')
        num, target = items[:2]
        sentence_all = ' '.join(items[2:])
        words_all, tags_all = get_words_tags(sentence_all)
        words, tags = cut_sentence(target, words_all, tags_all, max_sent_len)  # sentence  截取
        target_index = words.index(target)+1 if target in words else 0  # target 在句子中的下标
        word_arr, tag_arr, position_arr = \
            get_sentence_ids(words,tags,word_voc,tag_voc,target_index,max_sent_len)
        # xx
        test_nums.append(num)
        test_targets_str.append(target)
        test_sentence[i, :] = word_arr[:]
        test_tag[i, :] = tag_arr[:]
        test_position[i, :] = position_arr[:]
        if target in word_voc:
            test_target[i, :] = [0]*(max_sent_len-len(words)) + [word_voc[target]]*len(words)
        else:
            test_target[i, :] = [0] * max_sent_len
    test_data = [test_nums, test_targets_str, test_sentence, test_tag, test_position, test_target]
    return test_data
예제 #15
0
def main():
    data = read_lines("../resources/aoc2020/input19")
    rules = parse_rules(data)
    messages = data[len(rules) + 1:]

    # part 1
    rule0 = resolve(rules, '0')
    result1 = len([m for m in messages if re.fullmatch(rule0, m) is not None])
    print("result1 =", result1)  # 213

    # part 2
    # re-parse and update rules
    rules = parse_rules(data)
    rules.update({'8': '42 | 42 8'})
    rules.update({'11': '42 31 | 42 11 31'})

    result2 = 0
    for i in range(2, 10):
        prev = result2
        rule0 = resolve(rules, '0', i)
        result2 = len(
            [m for m in messages if re.fullmatch(rule0, m) is not None])
        if result2 == prev:
            break

    print("result2 =", result2)  # 325
예제 #16
0
def main():
    data = read_lines("../resources/aoc2020/input17")

    # part 1
    cubes = set()
    for y in range(0, len(data)):
        for x in range(0, len(data[0])):
            if data[y][x] == '#':
                cubes.add((x, y, 0))

    for _ in range(0, 6):
        cubes = proceed_3d(cubes)

    result1 = len(cubes)
    print("result1 =", result1)  # 273

    # part 2
    hcubes = set()
    for y in range(0, len(data)):
        for x in range(0, len(data[0])):
            if data[y][x] == '#':
                hcubes.add((x, y, 0, 0))

    for _ in range(0, 6):
        hcubes = proceed_4d(hcubes)

    result2 = len(hcubes)
    print("result1 =", result2)  # 1504
예제 #17
0
def main():
    instructions = read_lines("../resources/aoc2020/input14")

    # part 1
    memory, mask = {}, None
    for inst in instructions:
        if inst.startswith("mask"):
            mask = inst[7:]
        else:
            address = int(inst[inst.index('[') + 1:inst.index(']')])
            value = mask_value(int(inst[inst.index('=') + 2:]), mask)
            memory.update({address: value})

    result1 = sum(memory.values())
    print("result1 =", result1)  # 9967721333886

    # part 2
    memory, mask = {}, None
    for inst in instructions:
        if inst.startswith("mask"):
            mask = inst[7:]
        else:
            address = int(inst[inst.index('[') + 1:inst.index(']')])
            value = int(inst[inst.index('=') + 2:])
            for a in mask_address(address, mask):
                memory.update({a: value})

    result2 = sum(memory.values())
    print("result2 =", result2)  # 4355897790573
예제 #18
0
def main():
    program = read_lines("../resources/aoc2020/input08")

    # part 1
    result1 = execute_program(program)[1]
    print("result1 =", result1)  # 1614

    # part 2
    result2 = -1
    for i in range(0, len(program)):
        if program[i].startswith("acc"):
            continue
        elif program[i].startswith('jmp'):
            p2 = program.copy()
            p2[i] = program[i].replace('jmp', 'nop')
            r = execute_program(p2)
            if r[0]:
                result2 = r[1]
                break
        else:
            p2 = program.copy()
            p2[i] = program[i].replace('nop', 'jmp')
            r = execute_program(p2)
            if r[0]:
                result2 = r[1]
                break

    print("result2 =", result2)  # 1260
예제 #19
0
def main():
    numbers = read_lines("../resources/aoc2020/input01")

    # part 1
    print("result1 =", part1(numbers))  # 877971

    # part 2
    print("result2 =", part2(numbers))  # 203481432
예제 #20
0
def read_float(path):
    for line in read_lines(path):
        if line.startswith('#'):
            continue
        try:
            yield float(line)
        except ValueError:
            continue
예제 #21
0
def init_voc():
    """
    Initing vocabulary.
    return:
        word_voc: 词表
        tag_voc: 词性表
        label_voc: label
    """
    tags_list = ['neg', 'pos', 'a', 'car', 'p', 'd', 'vn']
    have = 0
    lines = read_lines('./com_data/data_h/Train1.csv')
    lines += read_lines('./com_data/data_h/Test1.csv')
    # 单词->id, 词性标注->id, label->id
    word_dict = defaultdict(int)
    tag_set = []
    label_set = ['pos', 'neu', 'neg']
    for line in lines:
        sentence = ' '.join(line.split('|')[3:])
        words, tags = get_words_tags(sentence)
        tag_set += tags
        for word in words:
            word_dict[word] += 1
    # 排序
    word_dict = sorted(word_dict.items(),key=lambda d:d[1], reverse=True)
    word_voc = dict()
    word_voc['<'] = 2
    word_voc['>'] = 3
    for i, item in enumerate(word_dict):
        if item[0] in word_voc:
            continue
        word_voc[item[0]] = i + 4  # 单词下标从2开始
    tag_set = sorted(list(set(tag_set)))
    tag_voc = dict()
    for i, item in enumerate(tag_set):
        have += 1
        tag_voc[item] = i + 1  # 词性下标从1开始
    label_voc = dict()
    label_set = sorted(label_set)
    for i, item in enumerate(label_set):
        label_voc[item] = i
    label_voc_rev = dict()  # 反转dict
    for item in label_voc.items():
        label_voc_rev[item[1]] = item[0]
    print('have:', have)
    return word_voc, tag_voc, label_voc, label_voc_rev
예제 #22
0
def part1():
    highest_seat_id = 0
    for line in read_lines("../input5.txt"):
        if not line:
            break

        highest_seat_id = max(highest_seat_id, get_seat_id(line))

    print(highest_seat_id)
예제 #23
0
def parse_grid(dims):
    grid = defaultdict(bool)
    all_lines = util.read_lines("inputs/day17.txt")
    for row, line in enumerate(all_lines):
        for col, char in enumerate(line):
            if char == "#":
                pad = (0, ) * (dims - 2)
                grid[(col, row, *pad)] = True
    return grid
예제 #24
0
def load_train_data(word_voc, tag_voc, label_voc, max_sent_len=100, word_embed_dim=50,
		    sentence_len=False):
    """
    加载训练数据
    :param max_sent_len: 最大句子长度
    :param word_embed_dim: ...
    :return: xx
    """
    # TODO 构造训练数据
    lines = read_lines('./com_data/data_h/Train1.csv')
    train_count = len(lines)
    train_num = []
    train_targets_str = []  # targets
    train_sentence = np.zeros((train_count, max_sent_len), dtype='int32')  # sent
    train_sentence_len = np.zeros((train_count,), dtype='int32')  # 句子长度
    train_tag = np.zeros((train_count, max_sent_len), dtype='int32')  # tags
    train_position = np.zeros((train_count, max_sent_len), dtype='int32')  # target 在句子中的下标
    train_target = np.zeros((train_count, max_sent_len), dtype='int32')
    train_label = np.zeros((train_count,), dtype='int32')  # label
    for i in range(train_count):
        line = lines[i]
        items = line.split('|')
        num, target, label = items[:3] 
        label_id = label_voc[label]  # label id
        sentence_all = ' '.join(items[3:])
        words_all, tags_all = get_words_tags(sentence_all)
        #print(line)
        if len(words_all)>max_sent_len - 2:
            words, tags = cut_sentence(target, words_all, tags_all, max_sent_len)  # sentence  截取
        else:
            words, tags = words_all, tags_all
        get_persentence_target_count(num, target, words, words_all, en='train')
        train_sentence_len[i] = len(words)
        target_index = words.index(target)+1 if target in words else 0  # target 在句子中的下标
        if target in words: # 标志target的位置,如<宝马>
            words.insert(words.index(target), '<')
            words.insert(words.index(target) + 2, '>')
            tags.insert(words.index(target),'wcar') # 词性标为wcar
            tags.insert(words.index(target)+2,'wcar')
        word_arr, tag_arr, position_arr = \
            get_sentence_ids(words,tags,word_voc,tag_voc,target_index,max_sent_len)
        # xx
        train_num.append(num)
        train_targets_str.append(target)
        train_sentence[i, :] = word_arr[:]
        train_tag[i, :] = tag_arr[:]
        train_position[i, :] = position_arr[:]
        if target in word_voc:
            train_target[i, :] = [0]*(max_sent_len-len(words)) + [word_voc[target]]*len(words)
        else:
            train_target[i, :] = [0] * max_sent_len
        train_label[i] = label_id
    train_data = [train_sentence, train_tag, train_position, train_target, train_label, train_num, train_targets_str]
    if sentence_len:
        train_data.append(train_sentence_len)
    return train_data
예제 #25
0
def main():
    data = read_lines("../resources/aoc2020/input18")

    # part 1
    result1 = sum([evaluate_ltr(expr) for expr in data])
    print("result1 =", result1)  # 36382392389406

    # part 2
    result2 = sum([evaluate_adv(expr) for expr in data])
    print("result2 =", result2)  # 381107029777968
예제 #26
0
def read_int(path):
    for line in read_lines(path):
        if line.startswith('#'):
            if 'float' in line:
                raise Float
            continue
        try:
            yield int(line)
        except ValueError:
            continue
예제 #27
0
def load_test_data(word_voc, tag_voc, label_voc, max_sent_len=100, word_embed_dim=50,
		   sentence_len=False):
    """
    加载测试数据
    """
    # 构造测试数据
    w_fp = open('./com_data/data_h/guize1.csv', 'w', encoding='utf-8')
    lines = read_lines('./com_data/data_h/Test1.csv')
    test_count = len(lines)
    test_nums = []  # 测试编号
    test_targets_str = []  # targets
    test_sentence = np.zeros((test_count, max_sent_len), dtype='int32')  # sent
    test_sentence_len = np.zeros((test_count,), dtype='int32')  # 句子长度
    test_tag = np.zeros((test_count, max_sent_len), dtype='int32')  # tags
    test_position = np.zeros((test_count, max_sent_len), dtype='int32')  # target
    test_target = np.zeros((test_count, max_sent_len), dtype='int32')
    for i in range(test_count):
        line = lines[i]
        items = line.split('|')
        num, target = items[:2]
        sentence_all = ' '.join(items[2:])
        words_all, tags_all = get_words_tags(sentence_all)        
        if pol != '':
            t_target = re.sub('_', ' ', target)
            string1 = num + ',' + t_target + ',' + pol + '\n'
            w_fp.write(string1)
        if len(words_all)>max_sent_len - 2:
            words, tags = cut_sentence(target, words_all, tags_all, max_sent_len)  # sentence  截取
        else:
            words, tags = words_all, tags_all
        get_persentence_target_count(num, target, words, words_all, en='test')
        test_sentence_len[i] = len(words)
        target_index = words.index(target)+1 if target in words else 0  # target 在句子中的下标
        if target in words: # 标志target的位置,如<宝马>
            words.insert(words.index(target), '<')
            words.insert(words.index(target) + 2, '>')
            tags.insert(words.index(target),'wcar') # 词性标为wcar
            tags.insert(words.index(target)+2,'wcar')
        word_arr, tag_arr, position_arr = \
            get_sentence_ids(words,tags,word_voc,tag_voc,target_index,max_sent_len)
        # xx
        test_nums.append(num)
        test_targets_str.append(target)
        test_sentence[i, :] = word_arr[:]
        test_tag[i, :] = tag_arr[:]
        test_position[i, :] = position_arr[:]
        if target in word_voc:
            test_target[i, :] = [0]*(max_sent_len-len(words)) + [word_voc[target]]*len(words)
        else:
            test_target[i, :] = [0] * max_sent_len
    w_fp.close()
    test_data = [test_nums, test_targets_str, test_sentence, test_tag, test_position, test_target]
    if sentence_len:
        test_data.append(test_sentence_len)
    return test_data
예제 #28
0
def init_sentence_id():
    """
    初始化待提交sentence id
    """
    sentence_id = set()
    lines = read_lines('./com_data/data_ori/Test.csv')
    for line in lines[1:]:
        items = line.split('\t')
        num = items[0]
        sentence_id.add(num)
    return sentence_id
예제 #29
0
def main():
    data = read_lines("../resources/aoc2020/input06")
    groups = " ".join(data).split("  ")

    # part 1
    result1 = sum([len(set(g.replace(" ", ""))) for g in groups])
    print("result1 =", result1)  # 6585

    # part 2
    result2 = sum([len(reduce(set.intersection, [set(p) for p in g.split(" ")])) for g in groups])
    print("result2 =", result2)  # 3276
예제 #30
0
def main():
    pattern = "(N|S|E|W|L|R|F)([0-9]+)$"
    data = read_lines("../resources/aoc2020/input12")
    instructions = [re.match(pattern, line) for line in data]

    # part 1
    x, y, i = 0, 0, 1
    directions = ['N', 'E', 'S', 'W']
    for instruction in instructions:
        action = instruction[1]
        value = int(instruction[2])
        if (action == 'N') | ((action == 'F') & (directions[i] == 'N')):
            y = y - value
        if (action == 'S') | ((action == 'F') & (directions[i] == 'S')):
            y = y + value
        if (action == 'E') | ((action == 'F') & (directions[i] == 'E')):
            x = x - value
        if (action == 'W') | ((action == 'F') & (directions[i] == 'W')):
            x = x + value
        if action == 'L':
            i = (i - int(value / 90)) % 4
        if action == 'R':
            i = (i + int(value / 90)) % 4

    result1 = abs(x) + abs(y)
    print("result1 =", result1)  # 562

    # part 2
    x, y = 0, 0
    waypoint = (-10, -1)
    for instruction in instructions:
        action = instruction[1]
        value = int(instruction[2])
        if action == 'N':
            waypoint = (waypoint[0], waypoint[1] - value)
        if action == 'S':
            waypoint = (waypoint[0], waypoint[1] + value)
        if action == 'E':
            waypoint = (waypoint[0] - value, waypoint[1])
        if action == 'W':
            waypoint = (waypoint[0] + value, waypoint[1])
        if (action == 'L') | (action == 'R'):
            angle = radians(value)
            if action == 'R': angle = - angle
            w1 = (int(waypoint[0] * cos(angle)) - int(waypoint[1] * sin(angle)))
            w2 = (int(waypoint[0] * sin(angle)) + int(waypoint[1] * cos(angle)))
            waypoint = (w1, w2)
        if action == 'F':
            x = x + (value * waypoint[0])
            y = y + (value * waypoint[1])

    result2 = abs(x) + abs(y)
    print("result2 =", result2)  # 101860
예제 #31
0
def main():
    data = read_lines("../resources/aoc2020/input05")
    seats = sorted([seat_id(d) for d in data])

    # part 1
    print("result1 =", seats[-1])  # 894

    # part 2
    i = 1
    while seats[i] - seats[i - 1] == 1:
        i = i + 1
    print("result2 =", seats[i] - 1)  # 579
def load_data(sentiment_seeds):
    """
    加载测试集
    """
    lines = read_lines(test_path)
    lines += read_lines(test_path_second)
    candidate_sentiment_words = list()
    for line in lines:
        sentence = line.split('|')[3]
        for word_tag in sentence.split(' '):
            if '/' not in word_tag:
                continue
            word = word_tag.split('/')[0]
            if len(word) <= 1:
                continue
            tag = word_tag.split('/')[1]
            if tag == 'v' or tag == 'vi' or tag == 'd' or tag == 'a'\
                or tag == 'vn' or tag == 'pos' or tag == 'neg':
                if word not in sentiment_seeds and word not in candidate_sentiment_words:
                    candidate_sentiment_words.append(word)
    return candidate_sentiment_words
예제 #33
0
def main():
    data = read_lines("../resources/aoc2020/input04")
    passports = " ".join(data).split("  ")

    # part 1
    result1 = len(list(filter(is_valid, passports)))
    print("result1 =", result1)  # 233

    # part 2
    filtered = list(filter(is_valid_strict, passports))
    result2 = len(filtered)
    print("result2 =", result2)  # 111
예제 #34
0
def get_vocab(file):
    lines = read_lines(file)
    #     lines = random.sample(lines, 30)
    entries = []
    vocab = set()
    for line in lines:
        split = line.split("\t")
        assert len(split) == 3
        entries.append(split)
        vocab.add(
            split[1])  # there could be repeated word with different pos tags
    return entries, vocab