def create_puzzle(title, url, tag, is_meta=False): try: existing_puzzle = Puzzle.objects.get(title=title, url=url) except Puzzle.DoesNotExist: # Look for Submit Answer link puzzle_page = puzzlelogin.fetch_with_single_login(url) doc = etree.HTML(puzzle_page) answer_links = doc.xpath( "//div[@id='submit']/a[text()='Check answer']") if len(answer_links) == 1: answer_link = answer_links[0] checkAnswerLink = urlparse.urljoin(url, answer_link.get('href')) else: checkAnswerLink = '' try: puzzle_object = Puzzle.objects.create( title=title, url=url, checkAnswerLink=checkAnswerLink) puzzle_object.tags.add(Tag.objects.get(name=tag)) if is_meta: puzzle_object.tags.add(Tag.objects.get(name='metas')) print("Created puzzle (%s, %s, %s)" % (title, url, checkAnswerLink)) except django.db.utils.IntegrityError: # puzzle already exists (race) pass
def handle(self, *args, **kwargs): overview_url = 'https://molasses.holiday/puzzle' print("Beginning puzzlescrape run at " + datetime.now().isoformat()) text = puzzlelogin.fetch_with_single_login(overview_url) doc = etree.HTML(text) rnds = doc.xpath("//div[@class='round-list-header']/a") for rnd in rnds: if rnd.text is None: continue title = rnd.text + ' Meta' rnd_name = rnd.text url = rnd.get('href') url = 'https://molasses.holiday' + url try: auto_tag = AutoTag.objects.get(html_name=rnd_name) tag = auto_tag.tag except AutoTag.DoesNotExist: continue tag, created = Tag.objects.get_or_create( name=html_to_tag(rnd_name)) auto_tag, _ = AutoTag.objects.get_or_create(html_name=rnd_name, tag=tag) if created: add_tag_to_taglist(tag, 'unsolved rounds') add_tag_to_taglist(tag, 'all rounds') # no round metas # create_puzzle(title=title, url=url, tag=tag, is_meta=True) puzzles = doc.xpath("//div[@class='puzzle-list-item']/a") for puzzle in puzzles: title = puzzle.text url = puzzle.get('href') url = 'https://molasses.holiday' + url rnd = puzzle.getparent().getparent().getchildren()[0].getchildren( )[0] rnd = rnd.text try: auto_tag = AutoTag.objects.get(html_name=rnd) tag = auto_tag.tag except AutoTag.DoesNotExist: tag, created = Tag.objects.get_or_create(name=html_to_tag(rnd)) auto_tag, _ = AutoTag.objects.get_or_create(html_name=rnd, tag=tag) if created: add_tag_to_taglist(tag, 'unsolved rounds') add_tag_to_taglist(tag, 'all rounds') create_puzzle(title=title, url=url, tag=tag) print("Finished puzzlescrape run")
def handle(self, *args, **kwargs): overview_url = 'https://www.starrats.org/puzzles' print("Beginning puzzlescrape run at " + datetime.now().isoformat()) text = puzzlelogin.fetch_with_single_login(overview_url) doc = etree.HTML(text) rnds = doc.xpath("//section/h2/a") for rnd in rnds: rnd_name = rnd.text rnd_tag = rnd_name rnd_url = rnd.get('href') tag_obj, created = Tag.objects.get_or_create(name=rnd_tag) if created: add_tag_to_taglist(tag_obj, 'unsolved rounds') add_tag_to_taglist(tag_obj, 'all rounds') # metas are listed as ordinary puzzles # create_puzzle(rnd_name + ' Meta', rnd_url, rnd_tag, is_meta=True) puzzles = rnd.getparent().getnext().iter('a') for puzzle in puzzles: is_meta = False # (puzzle.xpath('..')[0].get('class') == 'meta') create_puzzle(puzzle.text.strip(), puzzle.get('href'), rnd_tag, is_meta=is_meta) # puz = json.loads(text.decode('utf-8')) # for rnd in puz['lands']: # rnd_name = rnd['title'] # try: # auto_tag = AutoTag.objects.get(html_name=rnd_name) # tag = auto_tag.tag # except AutoTag.DoesNotExist: # tag, created = Tag.objects.get_or_create(name=html_to_tag(rnd_name)) # auto_tag, _ = AutoTag.objects.get_or_create(html_name=rnd_name, tag=tag) # if created: # add_tag_to_taglist(tag, 'unsolved rounds') # add_tag_to_taglist(tag, 'all rounds') # # metas are also puzzles # # create_puzzle(title=title, url=url, tag=tag, is_meta=True) # for idx, puzzle in enumerate(rnd['puzzles']): # title = puzzle['title'] # url = puzzle['url'] # url = 'https://perpendicular.institute' + url # answer = puzzle.get('answer') # create_puzzle(title=title, url=url, tag=tag, is_meta=(False), answer=answer) print("Finished puzzlescrape run")
def handle(self, *args, **kwargs): overview_url = 'https://molasses.holiday/puzzle' print("Beginning puzzlescrape run at " + datetime.now().isoformat()) text = puzzlelogin.fetch_with_single_login(overview_url) doc = etree.HTML(text) rnds = doc.xpath("//div[@class='round-list-header']/a") for rnd in rnds: if rnd.text is None: continue title = rnd.text + ' Meta' rnd_name = rnd.text url = rnd.get('href') url = 'https://molasses.holiday' + url try: auto_tag = AutoTag.objects.get(html_name=rnd_name) tag = auto_tag.tag except AutoTag.DoesNotExist: continue tag, created = Tag.objects.get_or_create(name=html_to_tag(rnd_name)) auto_tag, _ = AutoTag.objects.get_or_create(html_name=rnd_name, tag=tag) if created: add_tag_to_taglist(tag, 'unsolved rounds') add_tag_to_taglist(tag, 'all rounds') # no round metas # create_puzzle(title=title, url=url, tag=tag, is_meta=True) puzzles = doc.xpath("//div[@class='puzzle-list-item']/a") for puzzle in puzzles: title = puzzle.text url = puzzle.get('href') url = 'https://molasses.holiday' + url rnd = puzzle.getparent().getparent().getchildren()[0].getchildren()[0] rnd = rnd.text try: auto_tag = AutoTag.objects.get(html_name=rnd) tag = auto_tag.tag except AutoTag.DoesNotExist: tag, created = Tag.objects.get_or_create(name=html_to_tag(rnd)) auto_tag, _ = AutoTag.objects.get_or_create(html_name=rnd, tag=tag) if created: add_tag_to_taglist(tag, 'unsolved rounds') add_tag_to_taglist(tag, 'all rounds') create_puzzle(title=title, url=url, tag=tag) print("Finished puzzlescrape run")
def handle(self, *args, **kwargs): print "Beginning answerscrape run at " + datetime.now().isoformat() puzzles = Puzzle.objects.all().order_by('id') for puzzle in puzzles: if puzzle.status == solved_status: continue answer_url = puzzle.checkAnswerLink if answer_url is None: continue text = puzzlelogin.fetch_with_single_login(answer_url) QueuedAnswer.objects.filter(puzzle=puzzle).delete() mode = 'none' skip = True for l in text.split('\n'): solved_prefix = ' Solved! Answer: <b>' solved_suffix = '</b><br>' if l.startswith(solved_prefix) and l.endswith(solved_suffix): answer = l[len(solved_prefix):len(l)-len(solved_suffix)] print 'SOLVED ' + puzzle.title + ' = ' + answer puzzle.answer = answer puzzle.status = solved_status puzzle.save() if l == ' <h3>In the Queue:</h3>': mode = 'queued' if l == ' <h3>Previous Answers Submitted:</h3>': mode = 'wrong' non_answer_prefix = '\t <td>' if l.startswith(non_answer_prefix): if skip: # Every other match is a header which we ignore skip = False continue else: skip = True rest = l[len(non_answer_prefix):] if rest.endswith('</td>'): rest = rest[:-len('</td>')] if mode == 'queued': print 'QUEUED ' + puzzle.title + ' = ' + rest QueuedAnswer.objects.get_or_create(puzzle=puzzle, answer=rest) if mode == 'wrong' and puzzle.status != solved_status: print 'WRONG ' + puzzle.title + ' = ' + rest PuzzleWrongAnswer.objects.get_or_create(puzzle=puzzle, answer=rest) print "Finished answerscrape run"
def create_puzzle(title, url, tag, is_meta=False): try: existing_puzzle = Puzzle.objects.get(title=title, url=url) except Puzzle.DoesNotExist: # Look for Submit Answer link puzzle_page = puzzlelogin.fetch_with_single_login(url) doc = etree.HTML(puzzle_page) answer_links = doc.xpath("//div[@id='submit']/a[text()='Check answer']") if len(answer_links) == 1: answer_link = answer_links[0] checkAnswerLink = urlparse.urljoin(url, answer_link.get('href')) else: checkAnswerLink = '' try: puzzle_object = Puzzle.objects.create(title=title, url=url, checkAnswerLink=checkAnswerLink) puzzle_object.tags.add(Tag.objects.get(name=tag)) if is_meta: puzzle_object.tags.add(Tag.objects.get(name='metas')) print("Created puzzle (%s, %s, %s)" % (title, url, checkAnswerLink)) except django.db.utils.IntegrityError: # puzzle already exists (race) pass
def handle(self, *args, **kwargs): print("Beginning answerscrape run at " + datetime.now().isoformat()) puzzles = Puzzle.objects.all().order_by('id') for puzzle in puzzles: if puzzle.answer: # answer already in database continue puzzle_prefix = 'https://www.starrats.org/puzzle/' if puzzle.url.startswith(puzzle_prefix): answer_url = 'https://www.starrats.org/embed/submit/puzzle/' + \ puzzle.url[len(puzzle_prefix):] else: continue print(answer_url) text = puzzlelogin.fetch_with_single_login(answer_url).decode( 'utf-8') QueuedAnswer.objects.filter(puzzle=puzzle).delete() # skip = True cur_answer = None for l in text.split('\n'): l = l.lstrip() answer_prefix = '<td class="answer">' answer_suffix = '</td>' if l.startswith(answer_prefix) and l.endswith(answer_suffix): cur_answer = l[len(answer_prefix):len(l) - len(answer_suffix)] if l == '<td class="incorrect">Incorrect</td>': self.handle_wrong_answer(puzzle, cur_answer) if l == '<td class="correct">Correct!</td>': self.handle_correct_answer(puzzle, cur_answer) # solved_prefix = ' Solved! Answer: <b>' # solved_suffix = '</b><br>' # if l.startswith(solved_prefix) and l.endswith(solved_suffix): # answer = l[len(solved_prefix):len(l)-len(solved_suffix)] # print 'SOLVED ' + puzzle.title + ' = ' + answer # puzzle.answer = answer # puzzle.status = solved_status # puzzle.save() # if l == ' <h3>In the Queue:</h3>': # mode = 'queued' # if l == ' <h3>Previous Answers Submitted:</h3>': # mode = 'wrong' # non_answer_prefix = '\t <td>' # if l.startswith(non_answer_prefix): # if skip: # Every other match is a header which we ignore # skip = False # continue # else: # skip = True # rest = l[len(non_answer_prefix):] # if rest.endswith('</td>'): # rest = rest[:-len('</td>')] # if mode == 'queued': # print 'QUEUED ' + puzzle.title + ' = ' + rest # QueuedAnswer.objects.get_or_create(puzzle=puzzle, answer=rest) # if mode == 'wrong' and puzzle.status != solved_status: # print 'WRONG ' + puzzle.title + ' = ' + rest # PuzzleWrongAnswer.objects.get_or_create(puzzle=puzzle, answer=rest) print("Finished answerscrape run")