def __init__(self): self.url = 'https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV' self.Pre = CE.Preprocessor(self.url) self.bowSet = self.Pre._getResult() self.Con = CE.ConceptExtraction(self.url) self.Clu = CL.HClustering(self.url) self.titles = self.Pre._get_videoID_titles()[1]
def __init__(self, playlistURL): self.playlist_url = playlistURL self.Pre = CE.Preprocessor(self.playlist_url) #module1 self.video_titles = self.Pre._get_videotitles() self.Con = CE.ConceptExtraction(self.playlist_url) #module2 self.bowSet = self.Pre._getResult() self.dict_set = self.Con._createDictSet(self.bowSet)
def main(): playlist_url = 'https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV' submitCode = "1503584363%7C27ffbb267ba8d3522f0aee70b23c388d" defineConcept = DD.DefineDistance(submitCode) makeGraph = MG.MakeGraph() ## Concept Extraction C = CE.ConceptExtraction(playlist_url) max_concept, max_weight = 5, 0.07 result = C._get_onlyConcepts(max_concept, max_weight) origins = C.Pre._get_allURLs() ## Concept Mapping (concept to its Wikipedia page) # e.g. 'inertia'(input) -> https://en.wikipedia.org/wiki/Inertia (output) Cmap = CM.Mapping() concept = 'inertia' wiki_url = Cmap._mapingConcept2Wiki(concept) ## Relation Extraction for index in range(len(origins)): sourceName = origins[index].split("v=")[1].split("&")[0] + ".json" print(result[index]) print(sourceName) conceptRelation, All_degree = defineConcept.getConceptRelation( result[index]) print(conceptRelation) ## Start Graph graphSource = makeGraph.py2json(result[index], conceptRelation, All_degree) sourceLoc = os.path.join("./Web/conceptproto/play/static/play/data/" + sourceName) print(sourceLoc) with open(sourceLoc, "w") as f: f.write(graphSource)
def testGraph(): playlistURL = 'https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV' C = CE.ConceptExtraction(playlistURL) max_concept, max_weight = 5, 0.07 makeGraph = MG.MakeGraph() result = C._get_onlyConcepts(max_concept, max_weight) origins = C.Pre._get_allURLs() conceptRelation = [[0, 1, 1, 1, 2], [2, 0, 2, 1, 2], [1, 1, 0, 1, 2], [1, 1, 1, 0, 1], [2, 2, 2, 2, 0]] #graphSource = makeGraph.py2json(result, conceptRelation) #err for index in range(len(origins)): sourceName = origins[index].split("v=")[1].split("&")[0] + ".json" print(result[index]) print(sourceName) submitCode = "1503382656%7Ce5c72339e330f6814ae2fe97aa5c6301" defineConcept = DD.DefineDistance(submitCode) conceptRelation, All_degree = defineConcept.getConceptRelation( result[index]) print(conceptRelation) # Start graph graphSource = makeGraph.py2json(result[index], conceptRelation, All_degree) sourceLoc = os.path.join("./Web/conceptproto/play/static/play/data/" + sourceName) print(sourceLoc) with open(sourceLoc, "w") as f: f.write(graphSource)
def _linkWord2Lec(self, max_concept, max_weight, bowSet): playlistURL = "https://www.youtube.com/playlist?list=PL8dPuuaLjXtN0ge7yDk_UA0ldZJdhwkoV" Con = CE.ConceptExtraction(playlistURL) Pre = CE.Preprocessor(playlistURL) final_concept_weight = Con._get_conceptWeight(bowSet, max_concept, max_weight) titles = Pre._get_videoID_titles()[1] lec_title = {} # e.g. {1:'Motion in a Straight Line', 2: 'Derivatives', 3: 'Integrals',..} for i in range(len(titles)): lec_title[i + 1] = titles[i] ConceptToLec = {} for i in range(len(final_concept_weight)): for word, val in final_concept_weight[i]: if word in ConceptToLec: ConceptToLec[word].append((val, (i + 1, lec_title[i + 1]))) else: ConceptToLec[word] = [(val, (i + 1, lec_title[i + 1]))] return ConceptToLec
print('\n2) ids>\n', ids) # 3)docs print('\n3) docs[0]>\n', docs[0]) # 4)bows print('\n4) bows[0]>\n', bows[0]) #### 2.Concept Extraction #### ''' parameters 1) dicSet: Term-Document Dictionary(각 문서의 "Term Frequency" 계산 결과) 2) tfidf: TF-IDF 알고리즘을 계산한 최종 결과 3) *getConcept: 개념추출 결과 - 조건: 가중치(weight) 0.07 이상, 강의 별 최대 컨셉 수 5개 ''' Con = CE.ConceptExtraction(playlist_url) lecMaxConcept, lecMaxWeight = 5, 0.07 bowSet = Pre._getResult() dictSet = Con._createDictSet(bowSet) tfidf = Con._runTfIdf(bowSet) getConcept = Con._get_conceptWeight(bowSet, lecMaxConcept, lecMaxWeight) print('\n\n2. Concept Extraction 결과..') # 1)dicSet print('\n1) dicSet[0]>\n', dictSet[0]) # 2)tfidf print('\n2) tfidf[0]>\n', tfidf[0]) # 3)getConcept print('\n1) getConcept[0]>\n', getConcept[0])