def spoj(page, link, uniqueId): #request = requests.get("http://www.spoj.com/problems/TTABLE/") #page = bs.BeautifulSoup(request.content, "html.parser") dataT = {} #Container da descricao do problema problem = page.find("div", {"class": "prob"}) name = problem.find("h2", {"id": "problem-name"}) problemName = name.text dataT["Title"] = problemName tags = page.find("div", {"id": "problem-tags"}) problemTags = "" if len(tags) >= 1: problemTags = (tags.text)[1:] problemTags = problemTags.replace("#", " - ") problemBody = problem.find("div", {"id": "problem-body"}) #contents problemArray = problemBody.text problemArray = problemArray.split("\n") elementTitle = "Description" elementContent = "" for element in problemArray: if (element == "Input") or (element == "Output") or (element == "Example"): dataT[elementTitle] = elementContent[:-1] elementTitle = element elementContent = "" else: elementContent += element + "\n" dataT[elementTitle] = elementContent[:-1] #time and memory limit problemInfo = page.find("div", {"class": "col-lg-4 col-md-4"}) table = problemInfo.find("table", {"id": "problem-meta"}) rows = table.findAll("tr") for row in rows: if "Time" in row.text: info = (row.text).split("limit:") dataT["Time Limit"] = info[1] else: if "Memory" in row.text: info = (row.text).split("limit:") dataT["Memory Limit"] = info[1] dataT["Problem"] = util.getText(problem) + "\n" + util.getText(table) dataT["URL"] = link data = {} data[uniqueId] = dataT util.loadData(data)
def leetcode(page, link, uniqueId): #driver = webdriver.PhantomJS() #driver.get("https://leetcode.com/problems/minimum-time-difference/description/") #page = BeautifulSoup(driver.page_source, "html.parser") #driver.service.process.send_signal(signal.SIGTERM) dataT = {} problemBody = page.find("div", {"class": "question-description"}) if problemBody is None: problemBody = page.find("div", {"class": "question-description__3U1T"}) title = page.title title = title.text problemName = title.replace(" - LeetCode", "") bodyText = problemBody.text bodyArray = bodyText.split("\n") bodyText = problemBody.find_all(["p", "pre", "li"]) dataT["Title"] = problemName elementTitle = "Description" elementContent = "" for element in bodyArray: if "Note:" in element: dataT[elementTitle] = elementContent[:-1] elementTitle = "Note" if len(element.replace("Note:", "")) > 1: elementContent = element.replace("Note: ", "") else: elementContent = "" elif "Example" in element: if elementTitle == "Example": continue else: dataT[elementTitle] = elementContent[:-1] elementTitle = "Example" elementContent = "" elif "Follow up:" in element: dataT[elementTitle] = elementContent[:-1] elementTitle = "Follow up" elementContent = "" else: elementContent += element + "\n" dataT[elementTitle] = elementContent title = page.find("div", {"class": "question-title clearfix"}) dataT["Problem"] = title.text + "\n" + problemBody.text dataT["URL"] = link data = {} data[uniqueId] = dataT util.loadData(data)
def a2oj(page, link, uniqueId): #request = requests.get("https://a2oj.com/p?ID=134", verify = False) #page = BeautifulSoup(request.content, "html.parser") problem = page.find("div", {"id": "page"}) title = page.title title = title.text problemName = title.replace(" - A2 Online Judge", "") problemBody = problem.findAll("div") problemIndex = util.findIndex(problemBody, "Problem Statement:") inputIndex = util.findIndex(problemBody, "Input Format:") outputIndex = util.findIndex(problemBody, "Output Format:") sampleInIndex = util.findIndex(problemBody, "Sample Input:") sampleOutIndex = util.findIndex(problemBody, "Sample Output:") notesIndex = util.findIndex(problemBody, "Notes:") problemDescripton = util.getInfo(problemBody, problemIndex, inputIndex) problemInput = util.getInfo(problemBody, inputIndex, outputIndex) problemOutput = util.getInfo(problemBody, outputIndex, sampleInIndex) problemSampleIn = util.getInfo(problemBody, sampleInIndex, sampleOutIndex) problemSampleOut = util.getInfo(problemBody, sampleOutIndex, sampleOutIndex + 2) problemSamples = problemSampleIn + problemSampleOut problemNotes = "" if notesIndex is not None: problemNotes = util.getInfo(problemBody, notesIndex, notesIndex + 2) problemTimeLimit = "" table = problemBody[-1].findAll("tr") for t in table: if "Time Limit:" in t.text: problemTimeLimit = t.text problemTimeLimit = problemTimeLimit.replace("Time Limit:", "") problemTimeLimit = problemTimeLimit.replace("\n", "") break data = {} data[uniqueId] = { "URL": link, "Title": problemName, "Description": problemDescripton, "Input Description": problemInput, "Output Description": problemOutput, "Example": problemSamples, "Notes": problemNotes, "Time Limit": problemTimeLimit, "Problem": util.getText(problem) } util.loadData(data)
def codeforces(page, link, uniqueId): #request = requests.get("http://codeforces.com/problemset/problem/27/C") #page = bs.BeautifulSoup(request.content, "html.parser") try: #Container da descricao do problema problem = page.find("div", {"class": "problem-statement"}) name = problem.find("div", {"class": "title"}) problemName = name.text time = problem.find("div", {"class": "time-limit"}) timeText = time.text problemTime = timeText.replace("time limit per test", "") memory = problem.find("div", {"class": "memory-limit"}) memoryText = memory.text problemMemory = memoryText.replace("memory limit per test", "") description = problem.findAll("div")[10] problemDescripton = description.text inputDescripton = problem.find("div", {"class": "input-specification"}) problemInput = "" if inputDescripton is not None: problemInput = inputDescripton.text[5:] outputDescripton = problem.find("div", {"class": "output-specification"}) problemOutput = "" if outputDescripton is not None: problemOutput = outputDescripton.text[6:] sampleTest = problem.find("div", {"class": "sample-test"}) problemSamples = "" if sampleTest is not None: samplesInput = sampleTest.findAll("div", {"class": "input"}) samplesOutput = sampleTest.findAll("div", {"class": "output"}) for i in range(len(samplesInput)): problemSamples += util.treatStr( samplesInput[i]) + "\n" + util.treatStr( samplesOutput[i]) + "\n\n" note = problem.find("div", {"class": "note"}) problemNote = "" if note is not None: problemNote = note.text data = {} data[uniqueId] = { "URL": link, "Title": problemName, "Description": problemDescripton, "Input Description": problemInput, "Output Description": problemOutput, "Example": problemSamples, "Time Limit": problemTime, "Memory Limit": problemMemory, "Note": problemNote, "Problem": util.getText(problem) } util.loadData(data) except: pass
def timus(page, link, uniqueId): #request = requests.get("http://acm.timus.ru/problem.aspx?space=1&num=1328") #page = bs.BeautifulSoup(request.content, "html.parser") problem = page.find("div", {"class": "problem_content"}) title = page.title title = title.text problemName = title.replace(" @ Timus Online Judge", "") problemLimits = problem.find("div", {"class": "problem_limits"}) limit = util.treatStr(problemLimits) limits = limit.split("\n") problemTime = limits[0].replace("Time limit: ", "") problemMemory = limits[1].replace("Memory limit: ", "") problemBody = page.find("div", {"id": "problem_text"}) problemText = problemBody.findAll(["div", "h3"]) texts = [] count = 0 for text in problemText[:-1]: info = text.text texts.append(info) inputIndex = util.findTextIndex(texts, "Input") outputIndex = util.findTextIndex(texts, "Output") exampleIndex = util.findTextIndex(texts, "Sample") lastIndex = len(texts) if exampleIndex is None: exampleIndex = util.findTextIndex(texts, "Samples") notesIndex = util.findTextIndex(texts, "Notes") problemDescripton = "" problemInputDes = "" problemOutputDes = "" problemNotes = "" if inputIndex is None: problemDescripton = util.getTextInfo(texts, 0, exampleIndex) else: problemDescripton = util.getTextInfo(texts, 0, inputIndex) problemInputDes = util.getTextInfo(texts, inputIndex + 1, outputIndex) if exampleIndex is not None: problemOutputDes = util.getTextInfo(texts, outputIndex + 1, exampleIndex) elif notesIndex is not None: problemOutputDes = util.getTextInfo(texts, outputIndex + 1, notesIndex) else: problemOutputDes = util.getTextInfo(texts, outputIndex + 1, lastIndex) if notesIndex is not None: problemNotes = util.getTextInfo(texts, exampleIndex + 1, lastIndex) problemSamples = "" samples = problemBody.find("table", {"class": "sample"}) if samples is not None: tableTitles = samples.findAll("th") tableInfos = samples.findAll("pre") problemSamples = tableTitles[0].text + "\n" + util.getEvenText( tableInfos) + tableTitles[1].text + "\n" + util.getOddText( tableInfos) data = {} data[uniqueId] = { "URL": link, "Title": problemName, "Description": problemDescripton, "Input Description": problemInputDes, "Output Description": problemOutputDes, "Example": problemSamples, "Notes": problemNotes, "Time Limit": problemTime, "Memory Limit": problemMemory, "Problem": util.getText(problem) } util.loadData(data)
def wcipeg(page, link, uniqueId): #request = requests.get("https://wcipeg.com/problem/dt16l1p1") #page = bs.BeautifulSoup(request.content, "html.parser") problem = page.find("div", {"id": "descContent"}) problemSidebar = page.find("div", {"id": "descSidebar"}) dataT = {} title = page.title title = title.text problemName = title.replace("PEG Judge - ", "") #contents elements = problem.findAll(["h3", "p", "pre", "li","table"]) if len(elements) == 0: elements = problem.findAll() elements = elements[1:] #Existem casos q o primeiro elemento é nome da universidade ou coisa do tipo. elif elements[0].name in ["h3", "table"]: elements = elements[1:] elementTitle = "Description" elementContent = "" inputFlag = False OutputFlag = False for element in elements: if element.name == "h3": if "Input" in element.text: if not inputFlag: inputFlag = True else: break if "Output" in element.text: if not OutputFlag: OutputFlag = True else: break if "Sample Input" in element.text: if elementTitle == "Example": elementContent += element.text + "\n" else: dataT[elementTitle] = elementContent[:-1] elementTitle = "Example" elementContent = "Sample Input \n" elif "Sample Output" in element.text: elementContent += element.text + "\n" else: dataT[elementTitle] = elementContent[:-1] elementTitle = element.text elementContent = "" else: elementContent += element.text + "\n" dataT[elementTitle] = elementContent[:-1] limits = problemSidebar.findAll("p") problemTime = util.findBetween(limits[3].text, "Time Limit: ", "\n") problemMemory = util.findBetween(limits[3].text, "Memory Limit: ", "\n") dataT["Title"] = problemName dataT["Time Limit"] = problemTime dataT["Memory Limit"] = problemMemory dataT["Problem"] = util.getText(problem) + "\n" + util.getText(problemSidebar) dataT["URL"] = link data = {} data[uniqueId] = dataT util.loadData(data)