示例#1
0
        def import_from_file(file1):
            # NOTE import_from_file 暂不支持depth
            for line in UnicodeUtils.read(file1).strip().split(line_split):
                line = line.strip()
                if TMCTree.root_node not in self:
                    self[TMCTree.root_node] = dict()
                current_dict = self[TMCTree.root_node]
                parent_node  = TMCTree.root_node
                for current_node in line.split(item_split):
                    current_node = Node({"name": current_node})
                    self.name_to_nodes[current_node.name].add(current_node)
                    if current_node not in current_dict:
                        current_dict[current_node] = dict()

                    self.child_name_to_parent_relation_dict[current_node.name].add(parent_node)
                    current_dict = current_dict[current_node]
                    parent_node  = current_node
示例#2
0
        def import_from_file(file1):
            # NOTE import_from_file 暂不支持depth
            for line in UnicodeUtils.read(file1).strip().split(line_split):
                line = line.strip()
                if TMCTree.root_node not in self:
                    self[TMCTree.root_node] = dict()
                current_dict = self[TMCTree.root_node]
                parent_node = TMCTree.root_node
                for current_node in line.split(item_split):
                    current_node = Node({"name": current_node})
                    self.name_to_nodes[current_node.name].add(current_node)
                    if current_node not in current_dict:
                        current_dict[current_node] = dict()

                    self.child_name_to_parent_relation_dict[
                        current_node.name].add(parent_node)
                    current_dict = current_dict[current_node]
                    parent_node = current_node
示例#3
0
    def __init__(self, source):
        if "\n" not in source:
            source = UnicodeUtils.read(source)  # 有换行 表示已经读进来了

        self.result = dict()
        current_kp = current_features = None

        for num, line in enumerate(source.split("\n")):
            line = line.strip()
            line = re.sub("'", "\"", line)
            line = re.sub("u\"", "\"", line)

            try:
                current_kp, current_features = self.parse(line, current_kp, current_features)
            except:
                print "[num]", num + 1, "[line]", line
                raise Exception("parse error ...")

            if current_kp and current_features:
                self.result[current_kp] = current_features
    def load_data_from_input(self, input1):
        """ return data is a dict. """
        def wrap(data):
            avg = sum(data.values()) / float(len(data))
            return defaultdict(lambda: avg, data)

        if isinstance(input1, dict):
            return wrap(input1)

        if not os.path.exists(input1):
            return defaultdict(float)

        content = UnicodeUtils.read(input1).strip()
        try:
            data = json.loads(content)
        except:
            data = dict()
            for line in content.split("\n"):
                result = line.split(',')
                data[result[0]] = float(result[1].strip())

        return wrap(data)
示例#5
0
    def load_data_from_input(self, input1):
        """ return data is a dict. """
        def wrap(data):
            avg = sum(data.values()) / float(len(data))
            return defaultdict(lambda: avg, data)

        if isinstance(input1, dict):
            return wrap(input1)

        if not os.path.exists(input1):
            return defaultdict(float)

        content = UnicodeUtils.read(input1).strip()
        try:
            data = json.loads(content)
        except:
            data = dict()
            for line in content.split("\n"):
                result = line.split(',')
                data[result[0]] = float(result[1].strip())

        return wrap(data)
 def stop_words_set(self):
     return set([w1.strip() for file1 in self.classify.stop_words_files
                 for w1 in UnicodeUtils.read(file1).split("\n")])
示例#7
0
 def stop_words_set(self):
     return set([
         w1.strip() for file1 in self.classify.stop_words_files
         for w1 in UnicodeUtils.read(file1).split("\n")
     ])