def update_questions(self): """ Update self.questions with Question instances, which contain text, answers, user_answer, and explanation attributes. See the Question class in objects.py for more details. Like User.update_questions(), note that this can take a while due to OKCupid displaying only ten questions on each page, potentially requiring a large number of requests to the server. """ count = 0 for category in ['Ethics', 'Sex', 'Religion', 'Lifestyles', 'Dating', 'Other']: keep_going = True while keep_going: questions_data = { 'low': 1 + 10*count, category: '1', } questions_request = self._session.post('http://www.okcupid.com/profile/{0}/questions'.format(self.name), data=questions_data) tree = html.fromstring(questions_request.content.decode('utf8')) for div in tree.iter('div'): if 'id' in div.attrib and re.match(r'question_(\d+)', div.attrib['id']): explanation = '' number = re.match(r'question_(\d+)', div.attrib['id']).group(1) text = helpers.replace_chars(div.xpath(".//p[@class = 'qtext']")[0].text) answer_eles = div.xpath(".//input[contains(@id,'question_{0}_qans')]".format(number)) answers = [] for ele in answer_eles: answers.append(ele.attrib['value']) user_answer_ele = div.xpath(".//span[@id = 'answer_viewer_{0}']".format(number))[0] user_answer = user_answer_ele.text.strip() they_approve = None if 'class' in user_answer_ele.attrib and user_answer_ele.attrib['class'] == 'not_accepted': they_approve = False elif len(user_answer): they_approve = True answer_target = div.xpath(".//span[@id = 'answer_target_{0}']".format(number))[0] you_approve = None if 'class' in answer_target.attrib and answer_target.attrib['class'] == 'not_accepted': you_approve = False elif len(user_answer): you_approve = True explanation = div.xpath(".//span[@id = 'note_target_{0}']".format(number))[0].text if explanation is None: explanation = '' else: explanation = helpers.replace_chars(explanation.strip()) if text not in [q.text for q in self.questions]: self.questions.append(ProfileQuestion(text, answers, user_answer, explanation, self, category, you_approve, they_approve)) next = tree.xpath("//a[text() = 'Next']") if not len(next) or 'href' not in next[0].attrib: keep_going = False else: count += 1
def update_questions(self): """ Update self.questions with Question instances, which contain text, user_answer, and explanation attributes. See the Question class in objects.py for more details. Like User.update_questions(), note that this can take a while due to OKCupid displaying only ten questions on each page, potentially requiring a large number of requests to the server. """ keep_going = True question_number = 0 while keep_going: questions_data = { 'low': 1 + question_number, } get_questions = self._session.post( 'http://www.okcupid.com/profile/{0}/questions'.format(self.name), data=questions_data) tree = html.fromstring(get_questions.content.decode('utf8')) next_wrapper = tree.xpath("//li[@class = 'next']") question_wrappers = tree.xpath("//div[contains(@id, 'question_')]") for div in question_wrappers: if not div.attrib['id'][9:].isdigit(): question_wrappers.remove(div) for div in question_wrappers: question_number += 1 explanation = '' text = helpers.replace_chars(div.xpath(".//div[@class = 'qtext']/p/text()")[0]) user_answer = div.xpath(".//span[contains(@id, 'answer_target_')]/text()")[0].strip() explanation_span = div.xpath(".//span[@class = 'note']") if explanation_span[0].text is not None: explanation = explanation_span[0].text.strip() self.questions.append(Question(text, user_answer, explanation)) if not len(next_wrapper): keep_going = False
def update_questions(self): """ Update `self.questions` with a sequence of question objects, whose properties can be found in objects.py. Note that this can take a while due to OKCupid displaying only ten questions on each page, potentially requiring a large number of requests. """ keep_going = True question_number = 0 while keep_going: questions_data = { 'low': 1 + question_number, } get_questions = self._session.post( 'http://www.okcupid.com/profile/{0}/questions'.format(self.username), data=questions_data) tree = html.fromstring(get_questions.content.decode('utf8')) next_wrapper = tree.xpath("//li[@class = 'next']") # Get a list of each question div wrapper, ignore the first because it's an unanswered question question_wrappers = tree.xpath("//div[contains(@id, 'question_')]")[1:] for div in question_wrappers: if not div.attrib['id'][9:].isdigit(): question_wrappers.remove(div) for div in question_wrappers: question_number += 1 explanation = '' text = helpers.replace_chars(div.xpath(".//div[@class = 'qtext']/p/text()")[0]) user_answer = div.xpath(".//li[contains(@class, 'mine')]/text()")[0] explanation_p = div.xpath(".//p[@class = 'value']") if explanation_p[0].text is not None: explanation = explanation_p[0].text self.questions.append(Question(text, user_answer, explanation)) if not len(next_wrapper): keep_going = False
def update_questions(self): """ Update `self.questions` with a sequence of question objects, whose properties can be found in objects.py. Note that this can take a while due to OKCupid displaying only ten questions on each page, potentially requiring a large number of requests. """ count = 0 question_number = 0 keep_going = True while keep_going: questions_data = { 'low': 1 + 10*count, } get_questions = self._session.post('http://www.okcupid.com/profile/{0}/questions'.format(self.username), data=questions_data) time_start = time.clock() tree = html.fromstring(get_questions.content.decode('utf8')) for div in tree.iter('div'): if 'id' in div.attrib and re.match(r'question_(\d+)', div.attrib['id']): question_number += 1 explanation = '' number = re.match(r'question_(\d+)', div.attrib['id']).group(1) text = helpers.replace_chars(div.xpath(".//p[@class = 'qtext']")[0].text) answer_eles = div.xpath(".//li") answers = {} # Use a dictionary/regex for the answer values # because occasionally the numbers are not sequential for ele in answer_eles: value = re.match(r'self_answers_\d+_(\d+)', ele.attrib['id']).group(1) answers[value] = ele.text acceptable_answers = [ele.text for ele in answer_eles if ele.attrib['class'] in (' match', 'mine match')] importance_no = div.xpath(".//input[@id = 'question_{0}_importance']/@value".format(number))[0] if importance_no == '5': importance = 'Irrelevant' elif importance_no == '4': importance = 'A little important' elif importance_no == '3': importance = 'Somewhat important' elif importance_no == '2': importance = 'Very important' elif importance_no == '1': importance = 'Mandatory' explanation_p = div.xpath(".//p[@class = 'explanation']") if explanation_p[0].text is not None: explanation = explanation_p[0].text answer_int = int(div.xpath(".//input[@id = 'question_{0}_answer']/@value".format(number))[0]) if question_number > 1 and text not in [q.text for q in self.questions]: user_answer = answers[str(answer_int)] self.questions.append(UserQuestion(text, answers, user_answer, explanation, self, acceptable_answers, importance)) next = tree.xpath("//a[text() = 'Next']") if not len(next) or 'href' not in next[0].attrib: keep_going = False else: count += 1
def update_questions(self): """ Update self.questions with Question instances, which contain text, answers, user_answer, and explanation attributes. See the Question class in objects.py for more details. Like User.update_questions(), note that this can take a while due to OKCupid displaying only ten questions on each page, potentially requiring a large number of requests to the server. """ count = 0 for category in [ 'Ethics', 'Sex', 'Religion', 'Lifestyles', 'Dating', 'Other' ]: keep_going = True while keep_going: questions_data = { 'low': 1 + 10 * count, category: '1', } questions_request = self._session.post( 'http://www.okcupid.com/profile/{0}/questions'.format( self.name), data=questions_data) tree = html.fromstring( questions_request.content.decode('utf8')) for div in tree.iter('div'): if 'id' in div.attrib and re.match(r'question_(\d+)', div.attrib['id']): explanation = '' number = re.match(r'question_(\d+)', div.attrib['id']).group(1) text = helpers.replace_chars( div.xpath(".//p[@class = 'qtext']")[0].text) answer_eles = div.xpath( ".//input[contains(@id,'question_{0}_qans')]". format(number)) answers = [] for ele in answer_eles: answers.append(ele.attrib['value']) user_answer_ele = div.xpath( ".//span[@id = 'answer_viewer_{0}']".format( number))[0] user_answer = user_answer_ele.text.strip() they_approve = None if 'class' in user_answer_ele.attrib and user_answer_ele.attrib[ 'class'] == 'not_accepted': they_approve = False elif len(user_answer): they_approve = True answer_target = div.xpath( ".//span[@id = 'answer_target_{0}']".format( number))[0] you_approve = None if 'class' in answer_target.attrib and answer_target.attrib[ 'class'] == 'not_accepted': you_approve = False elif len(user_answer): you_approve = True explanation = div.xpath( ".//span[@id = 'note_target_{0}']".format( number))[0].text if explanation is None: explanation = '' else: explanation = helpers.replace_chars( explanation.strip()) if text not in [q.text for q in self.questions]: self.questions.append( ProfileQuestion(text, answers, user_answer, explanation, self, category, you_approve, they_approve)) next = tree.xpath("//a[text() = 'Next']") if not len(next) or 'href' not in next[0].attrib: keep_going = False else: count += 1
def update_questions(self): """ Update `self.questions` with a sequence of question objects, whose properties can be found in objects.py. Note that this can take a while due to OKCupid displaying only ten questions on each page, potentially requiring a large number of requests. """ count = 0 question_number = 0 keep_going = True while keep_going: questions_data = { 'low': 1 + 10 * count, } get_questions = self._session.post( 'http://www.okcupid.com/profile/{0}/questions'.format( self.username), data=questions_data) time_start = time.clock() tree = html.fromstring(get_questions.content.decode('utf8')) for div in tree.iter('div'): if 'id' in div.attrib and re.match(r'question_(\d+)', div.attrib['id']): question_number += 1 explanation = '' number = re.match(r'question_(\d+)', div.attrib['id']).group(1) text = helpers.replace_chars( div.xpath(".//p[@class = 'qtext']")[0].text) answer_eles = div.xpath(".//li") answers = {} # Use a dictionary/regex for the answer values # because occasionally the numbers are not sequential for ele in answer_eles: value = re.match(r'self_answers_\d+_(\d+)', ele.attrib['id']).group(1) answers[value] = ele.text acceptable_answers = [ ele.text for ele in answer_eles if ele.attrib['class'] in (' match', 'mine match') ] importance_no = div.xpath( ".//input[@id = 'question_{0}_importance']/@value". format(number))[0] if importance_no == '5': importance = 'Irrelevant' elif importance_no == '4': importance = 'A little important' elif importance_no == '3': importance = 'Somewhat important' elif importance_no == '2': importance = 'Very important' elif importance_no == '1': importance = 'Mandatory' explanation_p = div.xpath(".//p[@class = 'explanation']") if explanation_p[0].text is not None: explanation = explanation_p[0].text answer_int = int( div.xpath( ".//input[@id = 'question_{0}_answer']/@value". format(number))[0]) if question_number > 1 and text not in [ q.text for q in self.questions ]: user_answer = answers[str(answer_int)] self.questions.append( UserQuestion(text, answers, user_answer, explanation, self, acceptable_answers, importance)) next = tree.xpath("//a[text() = 'Next']") if not len(next) or 'href' not in next[0].attrib: keep_going = False else: count += 1