def getResourceUrl(source): '''Returns the resource url from the resourceWorkaroundPageSource''' elems = scraptools.getElementsFromHTML(source, '.resourceworkaround>a') if len(elems) == 0: # The resource is probably embedded in the page container = scraptools.getElementsFromHTML(source, 'object') if len(container) == 0: # Some other type of container container = scraptools.getElementsFromHTML(source, 'frame') href = container[1].get('src') else: href = container[0].get('data') else: href = elems[0].get('href') return href
def getResourceUrl(source): """Returns the resource url from the resourceWorkaroundPageSource""" elems = scraptools.getElementsFromHTML(source, ".resourceworkaround>a") if len(elems) == 0: # The resource is probably embedded in the page container = scraptools.getElementsFromHTML(source, "object") if len(container) == 0: # Some other type of container container = scraptools.getElementsFromHTML(source, "frame") href = container[1].get("src") else: href = container[0].get("data") else: href = elems[0].get("href") return href
def extractResources(self): pageSource = connection.getUrlData(self.url) resourceElems = scraptools.getElementsFromHTML(pageSource, '#region-main a') print len(resourceElems), 'files found in folder', self.folderName for a in resourceElems: self.resources.append(Resource(a.get('href'), a.text))
def extractResources(self): '''Extracts the resources from a course page''' pageSource = connection.getUrlData(self.pageUrl) resourceElems = scraptools.getElementsFromHTML(pageSource, '.resource') print len(resourceElems), 'Direct resources found' self.resources = [] for element in resourceElems: url, instanceName = self.getUrlAndInstanceName(element) self.resources.append(Resource(url, instanceName)) # Look for folders folderElems = scraptools.getElementsFromHTML(pageSource, '.folder') print len(folderElems), 'Folders found' for folder in folderElems: url, instanceName = self.getUrlAndInstanceName(folder) self.resources.append(ResourceFolder(connection, url, instanceName))
def __init__(self, moodleConnection): pageSource = moodleConnection.main_page # Find course boxes elems = scraptools.getElementsFromHTML(pageSource, '.course_title a') genieRe = '[A-Z]{1,4}-?([A-Z]{3})?' numRe = '[0-9]{3,4}[A-Z]?' sigleRe = '(' + genieRe + numRe + ')' self.coursePages = [] for e in elems: courseDescription = e.text match = re.match(sigleRe, courseDescription) if match: #We have a course box with a valid sigle sigle = match.group(1) pageUrl = e.get('href') self.coursePages.append(MoodleCoursePage(moodleConnection, pageUrl, sigle))
def __init__(self, moodleConnection): pageSource = moodleConnection.main_page # Find course boxes elems = scraptools.getElementsFromHTML(pageSource, ".course_title a") genieRe = "[A-Z]{1,4}-?([A-Z]{3})?" numRe = "[0-9]{3,4}[A-Z]?" sigleRe = "(" + genieRe + numRe + ")" self.coursePages = [] for e in elems: courseDescription = e.text match = re.match(sigleRe, courseDescription) # Automatic course discovery or Check sigles in args (manual) if len(sys.argv) <= 1 and match or isInArgv(courseDescription): sigle = match.group(1) pageUrl = e.get("href") self.coursePages.append(MoodleCoursePage(moodleConnection, pageUrl, sigle))
def __init__(self, moodleConnection): pageSource = moodleConnection.main_page # Find course boxes elems = scraptools.getElementsFromHTML(pageSource, '.course_title a') genieRe = '[A-Z]{1,4}-?([A-Z]{3})?' numRe = '[0-9]{3,4}[A-Z]?' sigleRe = '(' + genieRe + numRe + ')' self.coursePages = [] for e in elems: courseDescription = e.text match = re.match(sigleRe, courseDescription) #Automatic course discovery or Check sigles in args (manual) if len(sys.argv) <=1 and match or \ isInArgv(courseDescription): sigle = match.group(1) pageUrl = e.get('href') self.coursePages.append( MoodleCoursePage(moodleConnection, pageUrl, sigle))