Python load_features_from_file примеры, utils.load_features_from_file Python примеры использования

Пример #1

0

Показать файл

    def get_chars_per_word(self):

        featureNames = [self.type + "_CharsPerWord"]
        functionName = "get_chars_per_word"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            lWords = instance.tokens
            nwords = len(lWords)
            ratio = 0.0

            ncharsword = 0

            for word in lWords:
                nchars = len(word)
                ncharsword = ncharsword + nchars

            if nwords > 0:
                ratio = ncharsword / nwords

            instance.addFeature(self.type, self.type + "_CharsPerWord", ratio)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #2

0

Показать файл

Файл: sentenceBasedFeatures.py Проект: joanSolCom/author_profiling_tools

    def get_wordsPerSentence_stdandrange(self):
        featureNames = [
            self.type + "_STD", self.type + "_Range",
            self.type + "_wordsPerSentence"
        ]
        functionName = "get_wordsPerSentence_stdandrange"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            sentences = instance.sentences
            lengths = []
            for sentence in sentences:
                lengths.append(len(word_tokenize(sentence)))

            std = np.std(lengths)
            mean = np.mean(lengths)
            rng = np.amax(lengths) - np.amin(lengths)

            instance.addFeature(self.type, self.type + "_STD", std)
            instance.addFeature(self.type, self.type + "_Range", rng)
            instance.addFeature(self.type, self.type + "_wordsPerSentence",
                                mean)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #3

0

Показать файл

    def get_interjections(self):
        featureNames = [self.type + "_Interjections"]
        functionName = "get_interjections"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            content = instance.text
            nwords = len(instance.tokens)
            nInterjections = 0
            ratio = 0.0

            for interjection in self.interjections:
                if content.count(interjection.lower()) > 0:
                    nInterjections += content.count(interjection.lower())

            if nwords > 0:
                ratio = nInterjections / float(nwords)

            instance.addFeature(self.type, self.type + "_Interjections", ratio)
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #4

0

Показать файл

	def compute_syntactic_features(self):
		functionName = "compute_syntactic_features"

		if os.path.isfile(self.iC.featurePath+self.modelName+"_"+functionName):
			utils.load_features_from_file(self.iC.featurePath+self.modelName+"_"+functionName, self.iC, self.type)
			print "loaded "+functionName
			return

		nPosts = len(self.iC.instances)
		nProcessed = 0
		print "Building Syntactic Trees"
		for instance in self.iC.instances:
			conllSents = instance.conll.split("\n\n")
			iTrees = []
			conllSents = conllSents[:-1]
			for conllSent in conllSents:
				try:
					iTree = SyntacticTreeOperations(conllSent)
					iTrees.append(iTree)
				except ValueError as e:
					print e
					continue

			self.get_relation_usage(iTrees, instance)
			self.get_relationgroup_usage(iTrees, instance)
			self.get_pos_usage(iTrees, instance)
			self.get_posgroup_usage(iTrees, instance)
			
			self.get_shape_features(iTrees, instance)
			self.get_subcoord_features(iTrees, instance)
			self.get_verb_features(iTrees, instance)
			nProcessed +=1
			print "processed "+str(nProcessed) + " of " + str(nPosts)

		self.adjust_features()

Пример #5

0

Показать файл

    def get_symbols(self, symbols, featureName):
        featureNames = [self.type + "_" + featureName]
        functionName = "get_symbols_" + featureName

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            nChars = len(instance.text)
            matches = 0
            ratio = 0.0

            for char in instance.text:
                if char in symbols:
                    matches = matches + 1

            if nChars > 0:
                ratio = matches / nChars

            instance.addFeature(self.type, self.type + "_" + featureName,
                                ratio)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #6

0

Показать файл

Файл: discourseFeatures.py Проект: joanSolCom/author_profiling_tools

    def compute_discourse_features(self):
        functionName = "compute_discourse_features"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        nPosts = len(self.iC.instances)
        nDone = 0
        for instance in self.iC.instances:
            discourseOut = instance.discourse
            iTree = DiscourseTreeOperations(discourseOut)
            sentences = instance.sentences
            nsents = len(sentences)

            self.get_shape_features(iTree, nsents, instance)
            self.get_discourse_relation_usage(iTree, nsents, instance)
            nDone += 1
            print "processed " + str(nDone) + " of " + str(nPosts)

        self.adjust_features()
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            self.allDeps, self.iC, self.type)

Пример #7

0

Показать файл

    def get_proper_nouns(self):
        featureNames = [self.type + "_ProperNouns"]
        functionName = "get_proper_nouns"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            nwords = len(instance.tokens)
            sentences = instance.sentences
            proper = 0
            ratio = 0.0
            for sentence in sentences:
                words = word_tokenize(sentence)
                first = False
                for word in words:
                    if first == False:
                        first = True
                    else:
                        if word[0].isupper():
                            proper = proper + 1
            if nwords > 0:
                ratio = proper / nwords

            instance.addFeature(self.type, self.type + "_ProperNouns", ratio)

Пример #8

0

Показать файл

    def get_acronyms(self):
        featureNames = [self.type + "_Acronyms"]
        functionName = "get_acronyms"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            nacr = 0
            words = instance.tokens
            nwords = len(words)
            totalWords = 0
            ratio = 0.0
            for word in words:
                totalWords = totalWords + 1
                pattern = '(^[A-Z]([0-9]|[A-Z]|\.){3})'
                match = re.match(pattern, word)
                if match and word[len(word) - 1] != ":" and word[len(word) -
                                                                 1] != ',':
                    nacr = nacr + 1

            if nwords > 0:
                ratio = nacr / totalWords

            instance.addFeature(self.type, self.type + "_Acronyms", ratio)
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #9

0

Показать файл

    def get_stopwords(self):
        featureNames = [self.type + "_Stopwords"]
        functionName = "get_stopwords"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        stopwords = nltk.corpus.stopwords.words('english')
        for instance in self.iC.instances:
            words = instance.tokens
            nstopwords = 0
            totalWords = 0
            ratio = 0.0
            for word in words:
                totalWords = totalWords + 1
                if word.strip().lower() in stopwords:
                    nstopwords = nstopwords + 1

            if len(words) > 0:
                ratio = nstopwords / totalWords

            instance.addFeature(self.type, self.type + "_Stopwords", ratio)
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #10

0

Показать файл

    def get_discourse_markers(self):
        featureNames = [self.type + "_DiscourseMarkers"]
        functionName = "get_discourse_markers"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            content = instance.text
            words = instance.tokens
            nwords = len(words)
            nMarkers = 0
            for marker in self.discourseMarkersList:
                nApparitions = content.count(marker)
                nMarkers = nMarkers + nApparitions

            ratio = 0.0
            if nwords > 0:
                ratio = nMarkers / nwords

            instance.addFeature(self.type, self.type + "_DiscourseMarkers",
                                ratio)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #11

0

Показать файл

    def get_numbers(self):
        featureNames = [self.type + "_Numbers"]
        functionName = "get_numbers"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            matches = re.findall("[0-9]", instance.text)
            ratio = 0.0
            nchars = len(instance.text)

            if nchars > 0:
                ratio = len(matches) / nchars

            instance.addFeature(self.type, self.type + "_Numbers", ratio)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #12

0

Показать файл

    def get_dict_count(self):
        featureNames = [
            self.type + "_Abbrev", self.type + "_Curse",
            self.type + "_Positive", self.type + "_Negative"
        ]
        functionName = "get_dict_count"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            lWords = instance.tokens
            nwords = len(lWords)
            nAbbrev = 0
            nCurse = 0
            nPos = 0
            nNeg = 0
            ratioAbbrev = 0.0
            ratioCurse = 0.0
            ratioPos = 0.0
            ratioNeg = 0.0

            for word in lWords:
                word = word.lower()
                if word in self.abbreviationList:
                    nAbbrev = nAbbrev + 1
                if word in self.badWordsList:
                    nCurse = nCurse + 1
                if word in self.negList:
                    nNeg = nNeg + 1
                if word in self.posList:
                    nPos = nPos + 1

            if nwords > 0:
                ratioAbbrev = nAbbrev / nwords
                ratioCurse = nCurse / nwords
                ratioPos = nPos / nwords
                ratioNeg = nNeg / nwords

            instance.addFeature(self.type, self.type + "_Abbrev", ratioAbbrev)
            instance.addFeature(self.type, self.type + "_Curse", ratioCurse)
            instance.addFeature(self.type, self.type + "_Positive", ratioPos)
            instance.addFeature(self.type, self.type + "_Negative", ratioNeg)
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #13

0

Показать файл

Файл: area_features.py Проект: wufeim/microstructure-characterization-II-bak

def plot_area_features(feature_file, mode, output_filename, scale=4):
    feature_list = ['area_0', 'area_1', 'area_2']
    df = utils.load_features_from_file(feature_file, feature_list)
    if mode == '10_class':
        fig = px.scatter_3d(df, x='area_0', y='area_1', z='area_2',
                            color='met_id')
        fig.update_layout(scene_aspectmode='cube')
        fig.update_layout(title_text='Area Features (10 classes)')
    elif mode == 'binary':
        column1 = ['DUM1178', 'DUM1154', 'DUM1297', 'DUM1144', 'DUM1150', 'DUM1160']
        column2 = ['DUM1180', 'DUM1303', 'DUM1142', 'DUM1148', 'DUM1162']
        labels = df['met_id'].to_numpy().astype(str)
        df['binary_label'] = ['column_1' if x in column1 else 'column_2' for x in labels]
        fig = px.scatter_3d(df, x='area_0', y='area_1', z='area_2',
                            color='binary_label')
        fig.update_layout(scene_aspectmode='cube')
        fig.update_layout(title_text='Area Features (binary)')
    else:
        raise ValueError('Unknown plotting mode. Use \'10_class\' or \'binary\' instead.')
    if output_filename.endswith('.html'):
        pio.write_html(fig, output_filename)
    elif output_filename.endswith('.png'):
        img_str = fig.to_image(format='png', scale=scale)
        arr = np.frombuffer(img_str, np.uint8)
        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
        cv2.imwrite(output_filename, img)
    else:
        raise ValueError('Unknown output format given by output_filename: {:s}'.format(output_filename))

Пример #14

0

Показать файл

    def get_firstperson_pronouns(self):

        featureNames = [
            self.type + "_FirstSingular", self.type + "_FirstPlural"
        ]
        functionName = "get_firstperson_pronouns"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        #first_singular = ["i","me","my","mine"]
        #first_plural = ["we","our","ours"]

        first_singular = ["yo", "mi", "mío"]
        first_plural = ["nos", "nosotros", "nuestro"]

        for instance in self.iC.instances:
            lWords = instance.tokens
            nwords = len(lWords)
            ratioFirstS = 0.0
            ratioFirstP = 0.0

            nFirstS = 0
            nFirstP = 0
            for word in lWords:
                word = word.lower()
                if word in first_singular:
                    nFirstS = nFirstS + 1
                elif word in first_plural:
                    nFirstP = nFirstP + 1

            if nwords > 0:
                ratioFirstS = nFirstS / nwords
                ratioFirstP = nFirstP / nwords

            instance.addFeature(self.type, self.type + "_FirstSingular",
                                ratioFirstS)
            instance.addFeature(self.type, self.type + "_FirstPlural",
                                ratioFirstP)
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #15

0

Показать файл

Файл: taggers.py Проект: chrisleewashere/nlpnet

def load_network(md):
    """
    Loads the network from the default file and returns it.
    """
    logger = logging.getLogger("Logger")
    is_srl = md.task.startswith('srl') and md.task != 'srl_predicates'

    logger.info('Loading network')
    if is_srl:
        net_class = ConvolutionalNetwork
    elif md.task == 'lm':
        net_class = LanguageModel
    else:
        net_class = Network
    nn = net_class.load_from_file(config.FILES[md.network])

    logger.info('Loading features')
    type_features = utils.load_features_from_file(
        config.FILES[md.type_features])
    tables = [type_features]

    if md.use_caps:
        caps_features = utils.load_features_from_file(
            config.FILES[md.caps_features])
        tables.append(caps_features)
    if md.use_suffix:
        suffix_features = utils.load_features_from_file(
            config.FILES[md.suffix_features])
        tables.append(suffix_features)
    if md.use_pos:
        pos_features = utils.load_features_from_file(
            config.FILES[md.pos_features])
        tables.append(pos_features)
    if md.use_chunk:
        chunk_features = utils.load_features_from_file(
            config.FILES[md.chunk_features])
        tables.append(chunk_features)

    nn.feature_tables = tables

    logger.info('Done')
    return nn

Пример #16

0

Показать файл

    def get_in_parenthesis_stats(self):

        featureNames = [
            self.type + "_charsinparenthesis",
            self.type + "_wordsinparenthesis"
        ]
        functionName = "get_in_parenthesis_stats"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            matches = re.findall("\((.*?)\)", instance.text)
            npar = len(matches)
            totalchars = 0
            totalwords = 0

            for match in matches:
                totalchars += len(match)
                words = word_tokenize(match)
                totalwords = len(words)

            charsInParenthesis = 0.0
            wordsInParenthesis = 0.0
            if npar > 0:
                charsInParenthesis = totalchars / npar
                wordsInParenthesis = totalwords / npar

            instance.addFeature(self.type, self.type + "_charsinparenthesis",
                                charsInParenthesis)
            instance.addFeature(self.type, self.type + "_wordsinparenthesis",
                                wordsInParenthesis)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #17

0

Показать файл

    def get_uppers(self):
        featureNames = [self.type + "_UpperCases"]
        functionName = "get_uppers"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            featValue = 0.0
            matches = re.findall("[A-Z]", instance.text, re.DOTALL)
            upperCases = len(matches)
            ratio = upperCases / len(instance.text)
            instance.addFeature(self.type, self.type + "_UpperCases", ratio)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #18

0

Показать файл

Файл: taggers.py Проект: chrisleewashere/nlpnet

def load_network(md):
    """
    Loads the network from the default file and returns it.
    """
    logger = logging.getLogger("Logger")
    is_srl = md.task.startswith('srl') and md.task != 'srl_predicates'
    
    logger.info('Loading network')
    if is_srl:
        net_class = ConvolutionalNetwork
    elif md.task == 'lm':
        net_class = LanguageModel
    else:
        net_class = Network
    nn = net_class.load_from_file(config.FILES[md.network])
    
    logger.info('Loading features')
    type_features = utils.load_features_from_file(config.FILES[md.type_features])
    tables = [type_features]
    
    if md.use_caps:
        caps_features = utils.load_features_from_file(config.FILES[md.caps_features])
        tables.append(caps_features)
    if md.use_suffix:
        suffix_features = utils.load_features_from_file(config.FILES[md.suffix_features])
        tables.append(suffix_features)
    if md.use_pos:
        pos_features = utils.load_features_from_file(config.FILES[md.pos_features])
        tables.append(pos_features)
    if md.use_chunk:
        chunk_features = utils.load_features_from_file(config.FILES[md.chunk_features])
        tables.append(chunk_features)
        
    nn.feature_tables = tables
    
    logger.info('Done')
    return nn

Пример #19

0

Показать файл

    def get_twothree_words(self):
        featureNames = [self.type + "_twoWords", self.type + "_threeWords"]
        functionName = "get_twothree_words"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            textTokenized = instance.tokens
            nwords = len(textTokenized)
            nTwo = 0
            nThree = 0
            twoWords = 0.0
            threeWords = 0.0

            for word in textTokenized:
                if len(word) == 2:
                    nTwo += 1
                elif len(word) == 3:
                    nThree += 1

            if nwords > 0:
                twoWords = nTwo / nwords
                threeWords = nThree / nwords

            instance.addFeature(self.type, self.type + "_twoWords", twoWords)
            instance.addFeature(self.type, self.type + "_threeWords",
                                threeWords)
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #20

0

Показать файл

    def get_vocabulary_richness(self):
        featureNames = [self.type + "_VocabularyRichness"]
        functionName = "get_vocabulary_richness"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        for instance in self.iC.instances:
            lAllWords = instance.tokens
            lDiffWords = set(lAllWords)
            ratio = 0.0
            if len(lAllWords) > 0:
                ratio = len(lDiffWords) / len(lAllWords)
            instance.addFeature(self.type, self.type + "_VocabularyRichness",
                                ratio)

        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Пример #21

0

Показать файл

def load_network(md):
    """
    Loads the network from the default file and returns it.
    """
    logger = logging.getLogger("Logger")
    is_srl = md.task.startswith('srl') and md.task != 'srl_predicates'

    logger.info('Loading network')
    if is_srl:
        net_class = ConvolutionalNetwork
    else:
        net_class = Network
    nn = net_class.load_from_file(md.paths[md.network])

    logger.info('Loading features...')
    type_features = utils.load_features_from_file(md.paths[md.type_features])
    tables = [type_features]

    if md.use_caps:
        caps_features = utils.load_features_from_file(
            md.paths[md.caps_features])
        tables.append(caps_features)
    if md.use_prefix:
        prefix_features = utils.load_features_from_file(
            md.paths[md.prefix_features])
        for table in prefix_features:
            # one table for each size
            tables.append(table)
    if md.use_suffix:
        suffix_features = utils.load_features_from_file(
            md.paths[md.suffix_features])
        tables.append(suffix_features)
    if md.use_pos:
        pos_features = utils.load_features_from_file(md.paths[md.pos_features])
        tables.append(pos_features)
    if md.use_chunk:
        chunk_features = utils.load_features_from_file(
            md.paths[md.chunk_features])
        tables.append(chunk_features)
    # NER gazetteers
    if md.use_gazetteer:
        for gaz_file in md.paths[md.gaz_features]:
            features = utils.load_features_from_file(gaz_file)
            tables.append(features)

    nn.feature_tables = tables

    logger.info('Done')
    return nn

Пример #22

0

Показать файл

Файл: taggers.py Проект: attardi/nlpnet

def load_network(md):
    """
    Loads the network from the default file and returns it.
    """
    logger = logging.getLogger("Logger")
    is_srl = md.task.startswith('srl') and md.task != 'srl_predicates'
    
    logger.info('Loading network')
    if is_srl:
        net_class = ConvolutionalNetwork
    else:
        net_class = Network
    nn = net_class.load_from_file(md.paths[md.network])
    
    logger.info('Loading features...')
    type_features = utils.load_features_from_file(md.paths[md.type_features])
    tables = [type_features]
    
    if md.use_caps:
        caps_features = utils.load_features_from_file(md.paths[md.caps_features])
        tables.append(caps_features)
    if md.use_prefix:
        prefix_features = utils.load_features_from_file(md.paths[md.prefix_features])
        for table in prefix_features:
            # one table for each size
            tables.append(table)
    if md.use_suffix:
        suffix_features = utils.load_features_from_file(md.paths[md.suffix_features])
        tables.append(suffix_features)
    if md.use_pos:
        pos_features = utils.load_features_from_file(md.paths[md.pos_features])
        tables.append(pos_features)
    if md.use_chunk:
        chunk_features = utils.load_features_from_file(md.paths[md.chunk_features])
        tables.append(chunk_features)
    # NER gazetteers
    if md.use_gazetteer:
        for gaz_file in md.paths[md.gaz_features]:
            features = utils.load_features_from_file(gaz_file)
            tables.append(features)

    nn.feature_tables = tables
    
    logger.info('Done')
    return nn

Пример #23

0

Показать файл

    def get_mean_mood(self):
        featureNames = [
            self.type + "_TokenRatioAfraid", self.type + "_TokenRatioAmused",
            self.type + "_TokenRatioAngry", self.type + "_TokenRatioAnnoyed",
            self.type + "_TokenRatioDontCare", self.type + "_TokenRatioHappy",
            self.type + "_TokenRatioInspired", self.type + "_TokenRatioSad",
            self.type + "_EmotionRatio", self.type + "_EmotionRatioAfraid",
            self.type + "_EmotionRatioAmused", self.type +
            "_EmotionRatioAngry", self.type + "_EmotionRatioAnnoyed",
            self.type + "_EmotionRatioDontCare",
            self.type + "_EmotionRatioHappy",
            self.type + "_EmotionRatioInspired", self.type + "_EmotionRatioSad"
        ]
        functionName = "get_mean_mood"

        if os.path.isfile(self.iC.featurePath + self.modelName + "_" +
                          functionName):
            utils.load_features_from_file(
                self.iC.featurePath + self.modelName + "_" + functionName,
                self.iC, self.type)
            print "loaded " + functionName
            return

        lmtzr = WordNetLemmatizer()

        for instance in self.iC.instances:
            tokens = instance.tokens
            totalTokens = len(tokens)
            text_tagged = nltk.pos_tag(tokens)

            totalAfraid = 0
            totalAmused = 0
            totalAngry = 0
            totalAnnoyed = 0
            totalDontCare = 0
            totalHappy = 0
            totalInspired = 0
            totalSad = 0
            totalEmotionTokens = 0

            ratioAfraid = 0.0
            ratioAmused = 0.0
            ratioAngry = 0.0
            ratioAnnoyed = 0.0
            ratioDontCare = 0.0
            ratioHappy = 0.0
            ratioInspired = 0.0
            ratioSad = 0.0
            ratioEmotionTokens = 0.0

            ratioEAfraid = 0.0
            ratioEAmused = 0.0
            ratioEAngry = 0.0
            ratioEAnnoyed = 0.0
            ratioEDontCare = 0.0
            ratioEHappy = 0.0
            ratioEInspired = 0.0
            ratioESad = 0.0

            for word in text_tagged:
                pos = self.getDepecheMoodPos(word[1])
                if pos is None:
                    continue

                if pos == "v":
                    lemma = lmtzr.lemmatize(word[0], "v")
                else:
                    lemma = lmtzr.lemmatize(word[0])

                lemma = lemma.lower()

                idx = lemma + "#" + pos

                if idx in self.depecheMood.keys():
                    totalEmotionTokens += 1
                    totalAfraid += float(self.depecheMood[idx]["afraid"])
                    totalAmused += float(self.depecheMood[idx]["amused"])
                    totalAngry += float(self.depecheMood[idx]["angry"])
                    totalAnnoyed += float(self.depecheMood[idx]["annoyed"])
                    totalDontCare += float(self.depecheMood[idx]["dont_care"])
                    totalHappy += float(self.depecheMood[idx]["happy"])
                    totalInspired += float(self.depecheMood[idx]["inspired"])
                    totalSad += float(self.depecheMood[idx]["sad"])

            if totalTokens > 0:
                ratioAfraid = totalAfraid / totalTokens
                ratioAmused = totalAmused / totalTokens
                ratioAngry = totalAngry / totalTokens
                ratioAnnoyed = totalAnnoyed / totalTokens
                ratioDontCare = totalDontCare / totalTokens
                ratioHappy = totalHappy / totalTokens
                ratioInspired = totalInspired / totalTokens
                ratioSad = totalSad / totalTokens
                ratioEmotionTokens = totalEmotionTokens / totalTokens

            instance.addFeature(self.type, self.type + "_TokenRatioAfraid",
                                ratioAfraid)
            instance.addFeature(self.type, self.type + "_TokenRatioAmused",
                                ratioAmused)
            instance.addFeature(self.type, self.type + "_TokenRatioAngry",
                                ratioAngry)
            instance.addFeature(self.type, self.type + "_TokenRatioAnnoyed",
                                ratioAnnoyed)
            instance.addFeature(self.type, self.type + "_TokenRatioDontCare",
                                ratioDontCare)
            instance.addFeature(self.type, self.type + "_TokenRatioHappy",
                                ratioHappy)
            instance.addFeature(self.type, self.type + "_TokenRatioInspired",
                                ratioInspired)
            instance.addFeature(self.type, self.type + "_TokenRatioSad",
                                ratioSad)
            instance.addFeature(self.type, self.type + "_EmotionRatio",
                                ratioEmotionTokens)

            if totalEmotionTokens > 0:
                ratioEAfraid = totalAfraid / totalEmotionTokens
                ratioEAmused = totalAmused / totalEmotionTokens
                ratioEAngry = totalAngry / totalEmotionTokens
                ratioEAnnoyed = totalAnnoyed / totalEmotionTokens
                ratioEDontCare = totalDontCare / totalEmotionTokens
                ratioEHappy = totalHappy / totalEmotionTokens
                ratioEInspired = totalInspired / totalEmotionTokens
                ratioESad = totalSad / totalEmotionTokens

            instance.addFeature(self.type, self.type + "_EmotionRatioAfraid",
                                ratioEAfraid)
            instance.addFeature(self.type, self.type + "_EmotionRatioAmused",
                                ratioEAmused)
            instance.addFeature(self.type, self.type + "_EmotionRatioAngry",
                                ratioEAngry)
            instance.addFeature(self.type, self.type + "_EmotionRatioAnnoyed",
                                ratioEAnnoyed)
            instance.addFeature(self.type, self.type + "_EmotionRatioDontCare",
                                ratioEDontCare)
            instance.addFeature(self.type, self.type + "_EmotionRatioHappy",
                                ratioEHappy)
            instance.addFeature(self.type, self.type + "_EmotionRatioInspired",
                                ratioEInspired)
            instance.addFeature(self.type, self.type + "_EmotionRatioSad",
                                ratioESad)
        utils.save_features_to_file(
            self.iC.featurePath + self.modelName + "_" + functionName,
            featureNames, self.iC, self.type)

Python load_features_from_file примеры использования