Exemplo n.º 1
0
 def __init__(self):
     self.url = 'https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV'
     self.Pre = CE.Preprocessor(self.url)
     self.bowSet = self.Pre._getResult()
     self.Con = CE.ConceptExtraction(self.url)
     self.Clu = CL.HClustering(self.url)
     self.titles = self.Pre._get_videoID_titles()[1]
Exemplo n.º 2
0
 def __init__(self, playlistURL):
     self.playlist_url = playlistURL
     self.Pre = CE.Preprocessor(self.playlist_url)  #module1
     self.video_titles = self.Pre._get_videotitles()
     self.Con = CE.ConceptExtraction(self.playlist_url)  #module2
     self.bowSet = self.Pre._getResult()
     self.dict_set = self.Con._createDictSet(self.bowSet)
Exemplo n.º 3
0
def main():
    playlist_url = 'https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV'
    submitCode = "1503584363%7C27ffbb267ba8d3522f0aee70b23c388d"
    defineConcept = DD.DefineDistance(submitCode)
    makeGraph = MG.MakeGraph()

    ## Concept Extraction
    C = CE.ConceptExtraction(playlist_url)
    max_concept, max_weight = 5, 0.07
    result = C._get_onlyConcepts(max_concept, max_weight)
    origins = C.Pre._get_allURLs()

    ## Concept Mapping (concept to its Wikipedia page)
    # e.g. 'inertia'(input) -> https://en.wikipedia.org/wiki/Inertia (output)
    Cmap = CM.Mapping()
    concept = 'inertia'
    wiki_url = Cmap._mapingConcept2Wiki(concept)

    ## Relation Extraction
    for index in range(len(origins)):
        sourceName = origins[index].split("v=")[1].split("&")[0] + ".json"
        print(result[index])
        print(sourceName)
        conceptRelation, All_degree = defineConcept.getConceptRelation(
            result[index])
        print(conceptRelation)

        ## Start Graph
        graphSource = makeGraph.py2json(result[index], conceptRelation,
                                        All_degree)
        sourceLoc = os.path.join("./Web/conceptproto/play/static/play/data/" +
                                 sourceName)
        print(sourceLoc)
        with open(sourceLoc, "w") as f:
            f.write(graphSource)
Exemplo n.º 4
0
def testGraph():
    playlistURL = 'https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV'
    C = CE.ConceptExtraction(playlistURL)
    max_concept, max_weight = 5, 0.07
    makeGraph = MG.MakeGraph()
    result = C._get_onlyConcepts(max_concept, max_weight)
    origins = C.Pre._get_allURLs()

    conceptRelation = [[0, 1, 1, 1, 2], [2, 0, 2, 1, 2], [1, 1, 0, 1, 2],
                       [1, 1, 1, 0, 1], [2, 2, 2, 2, 0]]

    #graphSource = makeGraph.py2json(result, conceptRelation) #err
    for index in range(len(origins)):

        sourceName = origins[index].split("v=")[1].split("&")[0] + ".json"
        print(result[index])
        print(sourceName)
        submitCode = "1503382656%7Ce5c72339e330f6814ae2fe97aa5c6301"
        defineConcept = DD.DefineDistance(submitCode)

        conceptRelation, All_degree = defineConcept.getConceptRelation(
            result[index])
        print(conceptRelation)

        # Start graph
        graphSource = makeGraph.py2json(result[index], conceptRelation,
                                        All_degree)
        sourceLoc = os.path.join("./Web/conceptproto/play/static/play/data/" +
                                 sourceName)
        print(sourceLoc)
        with open(sourceLoc, "w") as f:
            f.write(graphSource)
Exemplo n.º 5
0
    def _linkWord2Lec(self, max_concept, max_weight, bowSet):
        playlistURL = "https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV"
        Con = CE.ConceptExtraction(playlistURL)
        Pre = CE.Preprocessor(playlistURL)
        final_concept_weight = Con._get_conceptWeight(bowSet, max_concept,
                                                      max_weight)
        titles = Pre._get_videoID_titles()[1]

        lec_title = {}
        # e.g. {1:'Motion in a Straight Line', 2: 'Derivatives', 3: 'Integrals',..}
        for i in range(len(titles)):
            lec_title[i + 1] = titles[i]

        ConceptToLec = {}
        for i in range(len(final_concept_weight)):
            for word, val in final_concept_weight[i]:
                if word in ConceptToLec:
                    ConceptToLec[word].append((val, (i + 1, lec_title[i + 1])))
                else:
                    ConceptToLec[word] = [(val, (i + 1, lec_title[i + 1]))]
        return ConceptToLec
Exemplo n.º 6
0
print('\n2) ids>\n', ids)
# 3)docs
print('\n3) docs[0]>\n', docs[0])
# 4)bows
print('\n4) bows[0]>\n', bows[0])

#### 2.Concept Extraction ####
'''
parameters
    1) dicSet: Term-Document Dictionary(각 문서의 "Term Frequency" 계산 결과)
    2) tfidf: TF-IDF 알고리즘을 계산한 최종 결과
    3) *getConcept: 개념추출 결과
        - 조건: 가중치(weight) 0.07 이상, 강의 별 최대 컨셉 수 5개
'''

Con = CE.ConceptExtraction(playlist_url)
lecMaxConcept, lecMaxWeight = 5, 0.07
bowSet = Pre._getResult()

dictSet = Con._createDictSet(bowSet)
tfidf = Con._runTfIdf(bowSet)
getConcept = Con._get_conceptWeight(bowSet, lecMaxConcept, lecMaxWeight)

print('\n\n2. Concept Extraction 결과..')
# 1)dicSet
print('\n1) dicSet[0]>\n', dictSet[0])
# 2)tfidf
print('\n2) tfidf[0]>\n', tfidf[0])
# 3)getConcept
print('\n1) getConcept[0]>\n', getConcept[0])