示例#1
0
def FuzzyWazzy_SimilarityOverAll(Country, gallery_id):
    S, Data = Load_GalLery_Textual_Data(Country, gallery_id)
    labels, Data1 = Load_GoogleVision_Labels(Country, gallery_id)

    setA = list(set([x.lower() for x in labels]))
    setB = get_entities(S)

    if len(setB) == 0:
        return 0.0

    overlap = 0

    for l in setA:
        for w in setB:
            if fuzz.ratio(l, w) >= 75:
                overlap += 1

    Similarity = round(float(overlap) / len(setA) * 100., 2)

    #    print ('overlap = ',overlap)
    #    print ('Labels = ',len(setA))
    #    print ('Comments = ',len(setB))
    #    print ('overlap(Labels,Comments)/Labels = ',Similarity)

    return Similarity
示例#2
0
def Envy_Sentiments():
    Galeries_Matrix = np.array(galeries).reshape(len(Countries),10)
    LuxuryList = [item for sublist in Luxury for item in sublist]
    
    PSentiments = []
    NSentiments = []
    
    PComments = []
    NComments = []
    
    i = 0
    for Country in Countries:
        print(str(i+1) + ' : ' + Country)
        for j in range (10):
            if Galeries_Matrix[i,j] in LuxuryList:
               Comments,Data = Load_GalLery_Textual_Data(Country, Galeries_Matrix[i,j])
               S = round(mean([float(i) for i in Senti_List(Comments)]),2)
               if S >= 0:
                  PSentiments.append(S)
                  PComments.append(len(Comments))
               else:
                   NSentiments.append(S)
                   NComments.append(len(Comments))
        i+=1
    return PSentiments,PComments,NSentiments,NComments
示例#3
0
def Luxury_vs_NonLuxury(Sentiment=False):
    Galeries_Matrix = np.array(galeries).reshape(len(Countries), 10)
    LuxuryList = [item for sublist in Luxury for item in sublist]

    NbComments = []
    Groups = []
    Sentiments = []

    i = 0
    for Country in Countries:
        #print(str(i+1) + ' : ' + Country)
        for j in range(10):
            Comments, Data = Load_GalLery_Textual_Data(Country,
                                                       Galeries_Matrix[i, j])
            NbComments.append(len(Comments))
            if Galeries_Matrix[i, j] in LuxuryList:
                Groups.append('Luxary')
            else:
                Groups.append('NonLuxuary')
            if Sentiment:
                Sentiments.append(Senti_List(Comments))
        i += 1
    if Sentiment:
        return Groups, NbComments, Sentiments
    else:
        return Groups, NbComments
def keyWords_Labels_Matching(Country,gallery_id):
    DocList ,Data  = Load_GalLery_Textual_Data(Country,gallery_id)
    S1 ,Data1  = Load_GoogleVision_Labels(Country,gallery_id)
        
    data_lemmatized = [w for doc in PrepareData(DocList) for w in doc]
    
    print (data_lemmatized)
    
    fullStr = ' '.join(data_lemmatized)
    
    #labels = [Preprocessing(x['label']) for x in S1[0]]
    #labels.append(Preprocessing(S1[1]))

    labels = [w for label in PrepareData(S1) for w in label]
        
    setA = list(set(labels))
    
    setB = keywords(fullStr).split('\n')

    setB = [w for docs in PrepareData(setB) for w in docs]
  
    overlap = 0
    
    for l in setA:
        for w in setB:
            if fuzz.ratio(l, w) >= 75:
               overlap += 1
               
    universe = []
    
    uni = list(set(setA) | set(setB))
        
    for i in range(len(uni)):
        if uni[i] not in universe:
           universe.append(uni[i]) 
        for j in range(i+1,len(uni)):
            if fuzz.ratio(uni[i], uni[j]) >= 75 and uni[j] not in universe:
               universe.append(uni[j])
               
    universe = len(universe)
    
    labels = round(float(overlap) / len(setA) * 100., 2)
    comments = round(float(overlap) / len(setB) * 100., 2)
    overall = round(float(overlap) / float(universe) * 100., 2)
        
    #print ('overlap = ',overlap)
    #print ('universe = ',universe)
    
    #print ('\nLabels = ',len(setA))
    #print ('Comments = ',len(setB))

    #print ('overlap(Labels,Comments)/Labels = ',labels)
    #print ('overlap(Labels,Comments)/Comments = ',comments)
    
    print ('overlap(Labels,Comments)/Universe(Labels,Comments) = ',overall)
    
    
    return labels,comments,overall,setA,setB
示例#5
0
def Number_of_Comments():
    Galeries_Matrix = np.array(galeries).reshape(len(Countries),10)
    NbComments = []
    i = 0
    for Country in Countries:
        for j in range (10):
            Comments,Data = Load_GalLery_Textual_Data(Country, Galeries_Matrix[i,j])
            NbComments.append(len(Comments))
        i+=1
    return NbComments
示例#6
0
def Sentiments_Analysis():
    Galeries_Matrix = np.array(galeries).reshape(len(Countries),10)
    
    Sentiments = []
    NbComments = []
    i = 0
    for Country in Countries:
        print(str(i+1) + ' : ' + Country)
        for j in range (10):
            Comments,Data = Load_GalLery_Textual_Data(Country, Galeries_Matrix[i,j])
            Sentiments.append(round(mean([float(i) for i in Senti_List(Comments)]),2))
            NbComments.append(len(Comments))
        i+=1
    return Sentiments,NbComments
示例#7
0
def Luxury_vs_users():
    Galeries_Matrix = np.array(galeries).reshape(len(Countries), 10)

    NbComments = []
    i = 0
    for Country in Countries:
        print(str(i + 1) + ' : ' + Country)
        for j in range(10):
            Labels, jData = Load_Google_Labels(Country, Galeries_Matrix[i, j])
            for label in Labels:
                if label in Luxurykeys:
                    Comments, Data = Load_GalLery_Textual_Data(
                        Country, Galeries_Matrix[i, j])
                    NbComments.append(len(Comments))

        i += 1
    return NbComments
def Statistique():
    Galeries_Matrix = np.array(galeries).reshape(len(Countries),10)

    Countries_Comments = {}
    Comments_word_Nb = {}
    #Comments_char_Nb = {}
    Countries_emogi = {}
    Countries_URLS = {}
    Countries_Mentions = {}
    Countries_Symbols = {}

    i = 0
    for Country in Countries:
        NB_Comments = []
        NB_W_Comments = []
        NB_emogi = []
        NB_URLS = []
        NB_Mentions = []
        NB_Symbols = []
        
        print(str(i+1) + ' : ' + Country)
        
        for j in range (10):
            Comments,Data = Load_GalLery_Textual_Data(Country, Galeries_Matrix[i,j])
            NB_Comments.append(len(Comments))
            for Comment in Comments:
                emoji_counter, words_counter,urls_counter,Mentions_counter,Symbols_counter = split_count(Comment)
                
                NB_W_Comments.append(words_counter)
                NB_emogi.append(emoji_counter)
                
                NB_URLS.append(urls_counter)
                NB_Mentions.append(Mentions_counter)
                NB_Symbols.append(Symbols_counter)
                
        Comments_word_Nb[Country] = NB_W_Comments
        Countries_Comments[Country] = NB_Comments
        Countries_emogi[Country] = NB_emogi
        
        Countries_URLS[Country] = NB_URLS
        Countries_Mentions[Country] = NB_Mentions
        Countries_Symbols[Country] = NB_Symbols
        
        i+=1
    return Countries_Comments,Comments_word_Nb,Countries_emogi,Countries_URLS,Countries_Mentions,Countries_Symbols
示例#9
0
def NaturePics_Vs_Comments():
    Galeries_Matrix = np.array(galeries).reshape(len(Countries), 10)
    NatureList = [item for sublist in Nature for item in sublist]

    NaturePics = {}
    NoNaturePics = {}

    i = 0
    for Country in Countries:
        print(str(i + 1) + ' : ' + Country)
        for j in range(10):
            Comments, Data = Load_GalLery_Textual_Data(Country,
                                                       Galeries_Matrix[i, j])
            if Galeries_Matrix[i, j] in NatureList:
                NaturePics[Galeries_Matrix[i, j]] = len(Comments)
            else:
                NoNaturePics[Galeries_Matrix[i, j]] = len(Comments)
        i += 1
    return NaturePics, NoNaturePics
示例#10
0
def LoadTextData(Country,gallery_id):
    S ,Data  = Load_GalLery_Textual_Data(Country,gallery_id)
    
    S1 ,Data1  = Load_GoogleVision_Labels(Country,gallery_id)
    
    labels = [Preprocessing(x['label']) for x in S1[0]]
    labels.append(Preprocessing(S1[1]))
    
    DocList = S[1]
    DocList.append(S[0])

    for s in S[2]:
        DocList.extend(s)
    
    data_lemmatized = PrepareData(DocList)
    lda_model,id2word,corpus = LDA(data_lemmatized,num_topics=20)#len(labels))
    Topic_Words = Topics_Words(lda_model,num_words=len(labels))   
    
    return Topic_Words,labels
示例#11
0
            labels,comments,overall = keyWords_Labels_Matching(Country,Galeries_Matrix[i,j])
            
            Slabels.append(labels)
            Scomments.append(comments)
            Soverall.append(overall)
        
        Similarities['labels'] = Slabels
        Similarities['comments'] = Scomments
        Similarities['overall'] = Soverall
        
        with open('LDA Similarities/'+Country+'.json', 'w') as outfile:
             json.dump(Similarities, outfile)
        #break             
        i+=1

def Histogramme(Country):
    with open('LDA Similarities/'+Country+'.json') as data_file:    
         Data = json.load(data_file)
    #plt.hist(Data['overall'])
    
    x = np.arange(10)
    plt.bar(x, Data['labels'])
    plt.xticks(x+.2, x)

#OverAll_Text_Similarity_DataSet()
#Histogramme('Algeria')

#labels,comments,overall,setA,setB = keyWords_Labels_Matching('Algeria','x6TwpSQ')
    
S ,Data  = Load_GalLery_Textual_Data('Algeria','x6TwpSQ')
#S1 ,Data1  = Load_GoogleVision_Labels('Algeria','x6TwpSQ')