Exemplo n.º 1
0
def parse_fast(instr, maxrecs, progress):
  """fast parser but doesn't check correctness,
  i.e. must be pre-checked by caller."""
  numorgs = numopps = 0
  outstr_list = ['<?xml version="1.0" ?>']
  outstr_list.append('<FootprintFeed schemaVersion="0.1">')

  # note: processes Organizations first, so ID lookups work
  for match in re.finditer(re.compile('<FeedInfo>.+?</FeedInfo>',
                                      re.DOTALL), instr):
    node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False)
    xmlh.set_default_value(node, node.firstChild, "feedID", "0")
    set_default_time_elem(node, node.firstChild, "createdDateTime")
    outstr_list.append(xmlh.prettyxml(node, True))

  outstr_list.append('<Organizations>')
  for match in re.finditer(re.compile('<Organization>.+?</Organization>',
                                      re.DOTALL), instr):
    node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False)
    numorgs += 1
    outstr_list.append(xmlh.prettyxml(node, True))
  outstr_list.append('</Organizations>')
               
  outstr_list.append('<VolunteerOpportunities>')
  for match in re.finditer(re.compile(
      '<VolunteerOpportunity>.+?</VolunteerOpportunity>', re.DOTALL), instr):
    opp = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False)

    numopps += 1
    if (maxrecs > 0 and numopps > maxrecs):
      break
    #if progress and numopps % 250 == 0:
    #  print datetime.now(), ": ", numopps, " records generated."

    # these set_default_* functions dont do anything if the field
    # doesnt already exists
    xmlh.set_default_value(opp, opp, "volunteersNeeded", -8888)
    xmlh.set_default_value(opp, opp, "paid", "No")
    xmlh.set_default_value(opp, opp, "sexRestrictedTo", "Neither")
    xmlh.set_default_value(opp, opp, "language", "English")
    set_default_time_elem(opp, opp, "lastUpdated")
    set_default_time_elem(opp, opp, "expires", 
        xmlh.current_ts(DEFAULT_EXPIRATION))
   
    try:
      opplocs = opp.getElementsByTagName("location")
    except:
      opplocs = []

    for loc in opplocs:
      xmlh.set_default_value(opp, loc, "virtual", "No")
      xmlh.set_default_value(opp, loc, "country", "US")

    try:
      dttms = opp.getElementsByTagName("dateTimeDurations")
    except:
      dttms = []

    for dttm in dttms:
      # redundant xmlh.set_default_value(opp, dttm, "openEnded", "No")
      xmlh.set_default_value(opp, dttm, "iCalRecurrence", "")
      if (dttm.getElementsByTagName("startTime") == None and
          dttm.getElementsByTagName("endTime") == None):
        set_default_time_elem(opp, dttm, "timeFlexible", "Yes")
      else:
        set_default_time_elem(opp, dttm, "timeFlexible", "No")
      xmlh.set_default_value(opp, dttm, "openEnded", "No")

    try:
      time_elems = opp.getElementsByTagName("startTime")
      time_elems += opp.getElementsByTagName("endTime")
    except:
      time_elems = []

    for el in time_elems:
      xmlh.set_default_attr(opp, el, "olsonTZ", "America/Los_Angeles")

    str_opp = xmlh.prettyxml(opp, True)

    outstr_list.append(str_opp)

  outstr_list.append('</VolunteerOpportunities>')

  outstr_list.append('</FootprintFeed>')
  return "".join(outstr_list), numorgs, numopps
def parse_fast(instr, maxrecs, progress):
    """fast parser but doesn't check correctness,
  i.e. must be pre-checked by caller."""
    numorgs = numopps = 0
    outstr_list = ['<?xml version="1.0" ?>']
    outstr_list.append('<FootprintFeed schemaVersion="0.1">')

    # note: processes Organizations first, so ID lookups work
    for match in re.finditer(re.compile('<FeedInfo>.+?</FeedInfo>', re.DOTALL),
                             instr):
        node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False)
        xmlh.set_default_value(node, node.firstChild, "feedID", "0")
        set_default_time_elem(node, node.firstChild, "createdDateTime")
        outstr_list.append(xmlh.prettyxml(node, True))

    outstr_list.append('<Organizations>')
    for match in re.finditer(
            re.compile('<Organization>.+?</Organization>', re.DOTALL), instr):
        node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False)
        numorgs += 1
        outstr_list.append(xmlh.prettyxml(node, True))
    outstr_list.append('</Organizations>')

    outstr_list.append('<VolunteerOpportunities>')
    for match in re.finditer(
            re.compile('<VolunteerOpportunity>.+?</VolunteerOpportunity>',
                       re.DOTALL), instr):
        opp = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False)

        numopps += 1
        if (maxrecs > 0 and numopps > maxrecs):
            break
        #if progress and numopps % 250 == 0:
        #  print datetime.now(), ": ", numopps, " records generated."

        # these set_default_* functions dont do anything if the field
        # doesnt already exists
        xmlh.set_default_value(opp, opp, "volunteersNeeded", -8888)
        xmlh.set_default_value(opp, opp, "paid", "No")
        xmlh.set_default_value(opp, opp, "sexRestrictedTo", "Neither")
        xmlh.set_default_value(opp, opp, "language", "English")
        set_default_time_elem(opp, opp, "lastUpdated")
        set_default_time_elem(opp, opp, "expires",
                              xmlh.current_ts(DEFAULT_EXPIRATION))

        try:
            opplocs = opp.getElementsByTagName("location")
        except:
            opplocs = []

        for loc in opplocs:
            xmlh.set_default_value(opp, loc, "virtual", "No")
            xmlh.set_default_value(opp, loc, "country", "US")

        try:
            dttms = opp.getElementsByTagName("dateTimeDurations")
        except:
            dttms = []

        for dttm in dttms:
            # redundant xmlh.set_default_value(opp, dttm, "openEnded", "No")
            xmlh.set_default_value(opp, dttm, "iCalRecurrence", "")
            if (dttm.getElementsByTagName("startTime") == None
                    and dttm.getElementsByTagName("endTime") == None):
                set_default_time_elem(opp, dttm, "timeFlexible", "Yes")
            else:
                set_default_time_elem(opp, dttm, "timeFlexible", "No")
            xmlh.set_default_value(opp, dttm, "openEnded", "No")

        try:
            time_elems = opp.getElementsByTagName("startTime")
            time_elems += opp.getElementsByTagName("endTime")
        except:
            time_elems = []

        for el in time_elems:
            xmlh.set_default_attr(opp, el, "olsonTZ", "America/Los_Angeles")

        str_opp = xmlh.prettyxml(opp, True)

        outstr_list.append(str_opp)

    outstr_list.append('</VolunteerOpportunities>')

    outstr_list.append('</FootprintFeed>')
    return "".join(outstr_list), numorgs, numopps
Exemplo n.º 3
0
def set_default_time_elem(parent, entity, tagname, timest=xmlh.current_ts()):
  """footprint macro."""
  cdt = xmlh.set_default_value(parent, entity, tagname, timest)
  xmlh.set_default_attr(parent, cdt, "olsonTZ", "America/Los_Angeles")
def set_default_time_elem(parent, entity, tagname, timest=xmlh.current_ts()):
    """footprint macro."""
    cdt = xmlh.set_default_value(parent, entity, tagname, timest)
    xmlh.set_default_attr(parent, cdt, "olsonTZ", "America/Los_Angeles")