Exemplo n.º 1
0
def get_basic_info(root=None):
  sex = u'?'
  birthdate = None

  try:
    basic_info = root.find_element(By.ID, "basic-info")
  except NoSuchElementException:
    print "[ERROR]: Could not find 'basic-info' section"
    return sex, birthdate

  try:
    sex_elem = basic_info.find_element(
      By.XPATH, "//div[@title='Sexo']/table/tbody/tr/td[2]/div"
    )
    if sex_elem.text == 'Hombre':
      sex = u'H'
    elif sex_elem.text == 'Mujer':
      sex = u'M'
    else:
      sex = u'?'
  except NoSuchElementException:
    print "[ERROR]: No information about gender"

  try:
    birthdate_elem = basic_info.find_element(
      By.XPATH, "//div[@title='Fecha de nacimiento']/table/tbody/tr/td[2]/div"
    )
    birthdate = parse_date(birthdate_elem.text)
  except NoSuchElementException:
    print "[ERROR]: No information about birthdate"
  except UnrecognizedDateFormatError as e:
    print "[ERROR]: Unrecognized date format for '{0}'".format(e.date)

  return sex, birthdate
Exemplo n.º 2
0
def group_by_period(queryset, column, period, **annotate):
    """
    Group and annotate given queryset by a given date period.

    :param queryset: Original queryset
    :type queryset: django.db.QuerySet
    :param column: Column for grouping
    :type column: str
    :param period: Period for grouping ('year', 'month', 'day')
    :type period:  str
    :param annotate: Dict for `.annotate()`
    :type annotate: dict[str,str]
    :return: OrderedDict of period -> annotate columns
    :rtype: collections.OrderedDict
    """

    # Based on http://stackoverflow.com/a/8746532/51685

    d = OrderedDict()
    for line in (
        queryset
        .extra({"period_group": connection.ops.date_trunc_sql(period, column)})
        .values("period_group")
        .annotate(**annotate)
        .order_by("period_group")
        .values(*["period_group"] + list(annotate.keys()))
    ):
        d[parse_date(line.pop("period_group"))] = line
    return d
Exemplo n.º 3
0
 def test_dates(self):
     for test in ['01/01/09', '1/1/09', '1/1/2009', 'Jan 1, 09', 'Jan 1, 2009', 'January 1, 2009',
                  '2009-01-01', '1.1.09']:
         dt = parse_date(test)
         self.assertNotEqual(dt, None, test)
         if dt is not None:
             self.assertEqual(dt.date(), datetime.date(2009,1,1), test)
Exemplo n.º 4
0
 def _set_datetime_value(self, new_value):
     if self.attribute.type == AttributeType.DATETIME:
         # Just store datetimes
         if not isinstance(new_value, datetime.datetime):
             raise TypeError("Can't assign %r to DATETIME attribute" % new_value)
         self.datetime_value = new_value
         self.numeric_value = calendar.timegm(self.datetime_value.timetuple())
         self.untranslated_string_value = self.datetime_value.isoformat()
     elif self.attribute.type == AttributeType.DATE:
         # Store dates as "date at midnight"
         date = parse_date(new_value)
         self.datetime_value = datetime.datetime.combine(date=date, time=datetime.time())
         self.numeric_value = date.toordinal()  # Store date ordinal as numeric value
         self.untranslated_string_value = date.isoformat()  # Store date ISO format as string value
Exemplo n.º 5
0
  def parse(self, response):
    user = FacebookUser()

    # Get the ID
    user['id'] = FacebookSpider.id_from_url(response.url).decode('utf-8')

    # Get the name
    try:
      user['name'] = response.xpath('//title/text()').extract()[0]
      root = response\
        .xpath("(//div[@id='root' and descendant::div[@id='contact-info']])[1]")
    except IndexError:
      self.log("Impossible to determine name of user. Skipping", log.CRITICAL)
      return

    # Get the picture
    user['image_urls'] = []
    if 'anabel' in user['name'].lower():
      try:
        picture_url = root.xpath("//img[parent::a[contains(@href, 'photo.php')]]/@src")
        user['image_urls'].append(picture_url.extract()[0])
      except IndexError:
        pass

    living_info = root.xpath("div/div[@id='living']")

    # Get location
    location_link = living_info\
      .xpath(
        "//a[ancestor::div[@title='Ciudad actual'] and contains(@href, '{0}')]"\
        .format(FacebookSpider.path_profile)
      )
    try:
      location_url = urlparse(location_link.xpath("@href").extract()[0])
      location = FacebookCity()
      location['id'] = '/' + parse_qs(location_url.query).get('id')[0]
      location['name'] = location_link.xpath("text()").extract()[0]
      user['location_id'] = location['id']
      yield location
    except IndexError:
      user['location_id'] = None

    # Get birthplace
    birthplace_link = living_info\
      .xpath(
        "//a[ancestor::div[@title='Ciudad de origen'] and contains(@href, '{0}')]"\
        .format(FacebookSpider.path_profile)
      )
    try:
      birthplace_url = urlparse(birthplace_link.xpath("@href").extract()[0])
      birthplace = FacebookCity()
      birthplace['id'] = '/' + parse_qs(birthplace_url.query).get('id')[0]
      birthplace['name'] = birthplace_link.xpath("text()").extract()[0] 
      user['birthplace_id'] = birthplace['id']
      yield birthplace
    except IndexError:
      user['birthplace_id'] = None

    basic_info = root.xpath("div/div[@id='basic-info']")

    # Get sex
    try:
      sex = basic_info\
        .xpath("//tr[ancestor::div[@title='Sexo']]/td[2]/div/text()")\
        .extract()[0]
      if sex == 'Hombre':
        sex = u'H'
      elif sex == 'Mujer':
        sex = u'M'
      else:
        sex = u'?'
      user['sex'] = sex
    except IndexError:
      user['sex'] = u'?'

    # Get birthdate
    try:
      birthdate = basic_info\
        .xpath("//tr[ancestor::div[@title='Fecha de nacimiento']]/td[2]/div/text()")\
        .extract()[0]
      user['birthdate'] = parse_date(birthdate)
    except (UnrecognizedDateFormatError, IndexError):
      user['birthdate'] = None

    # Get friends URL
    self.friends_url = FacebookSpider.friends_url(user['id'])
    user['friends'] = []

    # Set exploration depth level
    try:
        level = int(FacebookSpider.max_depth)\
        if not 'level' in response.meta\
        else response.meta['level']
    except:
      raise Exception("Could not determine the desired exploration depth level")

    # Check if we continue going down the tree or not
    if level == 0:
      yield user
    else:
      request = Request(url=self.friends_url, callback=self.parse_friends)
      request.meta['user'] = user
      request.meta['level'] = level - 1
      yield request
Exemplo n.º 6
0
 def test_times(self):
     for test in ['1/1/09 13:17', '1/1/09 1:17 pm']:
         dt = parse_date(test)
         self.assertNotEqual(dt, None, test)
         if dt is not None:
             self.assertEqual(dt, datetime.datetime(2009,1,1,13,17))