Exemplo n.º 1
0
def main(No_ch=0):
    #######################DEPRESION#################################
    # Adquisicion del corpus >>>>>>>> INICIO
    print("Adquisicion de corpus de depresion")
    dic.chunks_paths = []
    dic.loadchunkXML('dpp')
    dic.analyzeChunk('dpp', No_ch)
    dic.chunks_paths = []
    dic.loadchunkXML('dpn')
    dic.analyzeChunk('dpn', No_ch)
    print('Numero de chunks en types ', len(dic.types['dpp']))
    print('Numero de chunks en types ', len(dic.types['dpn']))
    dic.initialize_class_types('dp')
    dic.appendPost('dpp', 'dp')
    dic.appendPost('dpn', 'dp')
    print('Numero de instancias en depresion', len(dic.types['dp']['rows']))
    dic.types['dp']['cols'] = dic.fillOnesZeros('dp')
    print('Matriz Y', len(dic.types['dp']['cols']))
    dic.types['dp']['names'] = ['Negative', 'Positive']

    #    print(dic.types['dp']['rows'])
    # Adquisicion del corpus >>>>>>>> FIN
    # Normalizado del corpus >>>>>>>>>> INICIO
    norm_train_corpus = norm.parseForTokensFixed(dic.types['dp']['rows'])
    print(norm_train_corpus)
Exemplo n.º 2
0
    chunk.newUser(uid, posts)

dic.chunks_paths = []
dic.loadchunkXML('dpn')
dic.analyzeChunk('dpn', No_ch)
for v in dic.chunks_paths[No_ch - 1]:
    (uid, posts) = dic.PostForUser(v)
    chunk.newUser(uid, posts)

print('Numero de chunks en types ', len(dic.types['dpp']))
print('Numero de chunks en types ', len(dic.types['dpn']))

dic.initialize_class_types('dp')
IDLVdic = {}

dic.appendPost('dpp', 'dp')
dic.appendPost('dpn', 'dp')
chunk.loadVocabulary()
chunk.calcIDLV()
print('Numero de instancias en depresion', len(dic.types['dp']['rows']))

dic.types['dp']['cols'] = dic.fillOnesZeros('dp')
print(len(dic.types['dp']['cols']))

# Adquisicion del corpus >>>>>>>> FIN
# Normalizado del corpus >>>>>>>>>> INICIO
norm_train_corpus = [
    norm.remove_special_characters(text) for text in dic.types['dp']['rows']
]
train_corpus = [norm.remove_stopwords(text) for text in norm_train_corpus]
# Normalizado del corpus >>>>>>>>>> FIN
Exemplo n.º 3
0
def main(No_ch=0):
    # Adquisicion del corpus >>>>>>>> INICIO
    print("Adquisición de corpus de depresion")
    dic.chunks_paths = []
    dic.loadchunkXML('dpp')
    dic.analyzeChunk('dpp', No_ch)
    dic.chunks_paths = []
    dic.loadchunkXML('dpn')
    dic.analyzeChunk('dpn', No_ch)
    print('Numero de chunks en types ', len(dic.types['dpp']))
    print('Numero de chunks en types ', len(dic.types['dpn']))
    dic.initialize_class_types('dp')
    dic.appendPost('dpp', 'dp')
    dic.appendPost('dpn', 'dp')
    print('Numero de instancias en depresion', len(dic.types['dp']['rows']))
    dic.types['dp']['cols'] = dic.fillOnesZeros('dp')
    print('Matriz Y', len(dic.types['dp']['cols']))
    dic.types['dp']['names'] = ['Negative', 'Positive']
    # Adquisicion del corpus >>>>>>>> FIN
    # Normalizado del corpus >>>>>>>>>> INICIO
    norm_train_corpus = norm.normalize_corpus(dic.types['dp']['rows'])
    # Normalizado del corpus >>>>>>>>>> FIN
    from feature_extractor import bow_extractor, tfidf_extractor, bow_extractor_maxdf
    from sklearn.feature_selection import mutual_info_classif
    import nltk
    import gensim
    # BOW features
    bow_vectorizer, bow_train_features = bow_extractor(norm_train_corpus)
    feature_names = bow_vectorizer.get_feature_names()
    print('Numero de caracteristicas tomadas en cuenta', len(feature_names))
    from sklearn.naive_bayes import MultinomialNB
    nb = MultinomialNB()
    from sklearn.model_selection import cross_val_predict
    y_predicted = cross_val_predict(nb,
                                    bow_train_features,
                                    dic.types['dp']['cols'],
                                    cv=10)
    evaluator.get_metrics(dic.types['dp']['cols'], y_predicted)
    bow_vectorizer, bow_train_features = bow_extractor_maxdf(norm_train_corpus)
    res = dict(
        zip(
            feature_names,
            mutual_info_classif(bow_train_features,
                                dic.types['dp']['cols'],
                                discrete_features=True)))
    for feat in res.keys():
        print(feat, str(res[feat]), '\n')
    # y_predicted = cross_val_predict( nb, bow_train_features, dic.types['dp']['cols'], cv=10)
    # evaluator.get_metrics(dic.types['dp']['cols'], y_predicted)
    # Adquisicion del corpus >>>>>>>> INICIO
    print("Adquisición de corpus de anorexia")
    dic.chunks_paths = []
    dic.loadchunkXML('axp')
    dic.analyzeChunk('axp', No_ch)
    dic.chunks_paths = []
    dic.loadchunkXML('axn')
    dic.analyzeChunk('axn', No_ch)
    print('Numero de chunks en types ', len(dic.types['axp']))
    print('Numero de chunks en types ', len(dic.types['axn']))
    dic.initialize_class_types('ax')
    dic.appendPost('axp', 'ax')
    dic.appendPost('axn', 'ax')
    print('Numero de instancias en anorexia', len(dic.types['ax']['rows']))
    dic.types['ax']['cols'] = dic.fillOnesZeros('ax')
    print('Matriz Y', len(dic.types['ax']['cols']))
    dic.types['ax']['names'] = ['Negative', 'Positive']
    # Adquisicion del corpus >>>>>>>> FIN
    # Normalizado del corpus >>>>>>>>>> INICIO
    norm_train_corpus = norm.normalize_corpus(dic.types['ax']['rows'])
    # Normalizado del corpus >>>>>>>>>> FIN
    # BOW features
    bow_vectorizer, bow_train_features = bow_extractor(norm_train_corpus)
    feature_names = bow_vectorizer.get_feature_names()
    print('Numero de caracteristicas tomadas en cuenta', len(feature_names))
    nb = MultinomialNB()
    y_predicted = cross_val_predict(nb,
                                    bow_train_features,
                                    dic.types['ax']['cols'],
                                    cv=10)
    evaluator.get_metrics(dic.types['ax']['cols'], y_predicted)
    bow_vectorizer, bow_train_features = bow_extractor_maxdf(norm_train_corpus)
    res = dict(
        zip(
            feature_names,
            mutual_info_classif(bow_train_features,
                                dic.types['ax']['cols'],
                                discrete_features=True)))
    for feat in res.keys():
        print(feat, str(res[feat]), '\n')
Exemplo n.º 4
0
    chunk.newUser(uid, posts)

dic.chunks_paths = []
dic.loadchunkXML('axn')
dic.analyzeChunk('axn', No_ch)
for v in dic.chunks_paths[No_ch - 1]:
    (uid, posts) = dic.PostForUser(v)
    chunk.newUser(uid, posts)

print('Numero de chunks en types ', len(dic.types['axp']))
print('Numero de chunks en types ', len(dic.types['axn']))

IDLVdic = {}
dic.initialize_class_types('ax')

dic.appendPost('axp', 'ax')
dic.appendPost('axn', 'ax')
chunk.loadVocabulary()
chunk.calcIDLV()
print('Numero de instancias en depresion: ', len(dic.types['ax']['rows']))
print('Vocabulario en el chunk: ', len(chunk.vocablos.keys()))

dic.types['ax']['cols'] = dic.fillOnesZeros('ax')
print(len(dic.types['ax']['cols']))
tfs = [(v.IDLV(0), k) for k, v in chunk.vocablos.items()]
tfs.sort()
tfs.reverse()
for i in range(0, 1000):
    IDLVdic[tfs[i][1]] = tfs[i][0]

norm.thousand_words = IDLVdic.keys()