示例#1
0
    def parse(instr, maxrecs, progress):
        numorgs = numopps = 0
        instr = re.sub(r'<(/?db):', r'<\1_', instr)
        opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>',
                          instr, re.DOTALL)
        volopps = ""
        for i, oppstr in enumerate(opps):
            #if progress and i > 0 and i % 250 == 0:
            #  print str(datetime.now())+": ", i, " opportunities processed."
            if (maxrecs > 0 and i > maxrecs):
                break
            xmlh.print_rps_progress("opps", progress, i, maxrecs)

            item = xmlh.simple_parser(oppstr, known_elnames, progress=False)

            orgid = register_org(item)

            # logoURL -- sigh, this is for the opportunity not the org
            volopps += '<VolunteerOpportunity>'
            volopps += xmlh.output_val('volunteerOpportunityID', str(i))
            volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid))
            volopps += xmlh.output_node('volunteerHubOrganizationID', item,
                                        "LocalID")
            volopps += xmlh.output_node('title', item, "Title")
            volopps += xmlh.output_node('abstract', item, "Description")
            volopps += xmlh.output_node('description', item, "Description")
            volopps += xmlh.output_node('detailURL', item, "DetailURL")
            volopps += xmlh.output_val('volunteersNeeded', "-8888")

            try:
                oppdates = item.getElementsByTagName("OpportunityDate")
            except:
                oppdates = []

            if len(oppdates) > 1:
                print datetime.now(), \
                    "parse_servenet.py: only 1 OpportunityDate supported."
                #return None
                oppdate = oppdates[0]
            elif len(oppdates) == 0:
                oppdate = None
            else:
                oppdate = oppdates[0]
            volopps += '<dateTimeDurations><dateTimeDuration>'

            if oppdate:
                volopps += xmlh.output_val('openEnded', 'No')
                volopps += xmlh.output_val(
                    'duration',
                    'P%s%s' % (xmlh.get_tag_val(oppdate, "DurationQuantity"),
                               xmlh.get_tag_val(oppdate, "DurationUnit")))
                volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
                volopps += xmlh.output_node('startDate', oppdate, "StartDate")
                volopps += xmlh.output_node('endDate', oppdate, "EndDate")
            else:
                volopps += xmlh.output_val('openEnded', 'Yes')
                volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
            volopps += '</dateTimeDuration></dateTimeDurations>'

            volopps += '<locations>'
            try:
                opplocs = item.getElementsByTagName("Location")
            except:
                opplocs = []
            for opploc in opplocs:
                volopps += '<location>'
                virtual_tag = opploc.getElementsByTagName("Virtual")
                if virtual_tag and xmlh.get_tag_val(
                        opploc, "Virtual").lower() == "yes":
                    volopps += xmlh.output_val('virtual', 'Yes')
                else:
                    volopps += xmlh.output_node('region', opploc,
                                                "StateOrProvince")
                    volopps += xmlh.output_node('country', opploc, "Country")
                    volopps += xmlh.output_node('postalCode', opploc,
                                                "ZipOrPostalCode")
                volopps += '</location>'
            volopps += '</locations>'
            volopps += '<categoryTags/>'
            volopps += '</VolunteerOpportunity>'
            numopps += 1

        # convert to footprint format
        outstr = '<?xml version="1.0" ?>'
        outstr += '<FootprintFeed schemaVersion="0.1">'
        outstr += '<FeedInfo>'
        outstr += xmlh.output_val('providerID', providerID)
        outstr += xmlh.output_val('providerName', providerName)
        outstr += xmlh.output_val('feedID', feedID)
        outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
        outstr += xmlh.output_val('providerURL', providerURL)
        outstr += xmlh.output_val('description', feedDescription)
        # TODO: capture ts -- use now?!
        outstr += '</FeedInfo>'

        # hardcoded: Organization
        outstr += '<Organizations>'
        for key in ORGS:
            outstr += ORGS[key]
            numorgs += 1
        outstr += '</Organizations>'
        outstr += '<VolunteerOpportunities>'
        outstr += volopps
        outstr += '</VolunteerOpportunities>'
        outstr += '</FootprintFeed>'

        #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr)
        return outstr, numorgs, numopps
def parse(instr, maxrecs, progress):
    """return FPXML given craigslist data"""
    if CL_LATLONGS == None:
        load_craigslist_latlongs()
    xmlh.print_progress("loading craigslist crawler output...")
    crawl_craigslist.parse_cache_file(instr, listings_only=True)
    xmlh.print_progress("loaded " + str(len(crawl_craigslist.pages)) + " craigslist pages.")

    # convert to footprint format
    outstr = '<?xml version="1.0" ?>'
    outstr += '<FootprintFeed schemaVersion="0.1">'
    outstr += "<FeedInfo>"
    outstr += xmlh.output_val("providerID", "105")
    outstr += xmlh.output_val("providerName", "craigslist")
    outstr += xmlh.output_val("feedID", "craigslist")
    outstr += xmlh.output_val("createdDateTime", xmlh.current_ts())
    outstr += xmlh.output_val("providerURL", "http://www.craigslist.org/")
    outstr += "</FeedInfo>"

    numorgs = numopps = 0

    # no "organization" in craigslist postings
    outstr += "<Organizations>"
    outstr += "<Organization>"
    outstr += "<organizationID>0</organizationID>"
    outstr += "<nationalEIN></nationalEIN>"
    outstr += "<name></name>"
    outstr += "<missionStatement></missionStatement>"
    outstr += "<description></description>"
    outstr += "<location>"
    outstr += xmlh.output_val("city", "")
    outstr += xmlh.output_val("region", "")
    outstr += xmlh.output_val("postalCode", "")
    outstr += "</location>"
    outstr += "<organizationURL></organizationURL>"
    outstr += "<donateURL></donateURL>"
    outstr += "<logoURL></logoURL>"
    outstr += "<detailURL></detailURL>"
    outstr += "</Organization>"
    numorgs += 1
    outstr += "</Organizations>"

    skipped_listings = {}
    skipped_listings["body"] = skipped_listings["title"] = skipped_listings["not-ok"] = 0
    outstr += "<VolunteerOpportunities>"
    for i, url in enumerate(crawl_craigslist.pages):
        page = crawl_craigslist.pages[url]

        ok = extract(page, "it's OK to distribute this " + "charitable volunteerism opportunity")
        if ok == "":
            skipped_listings["not-ok"] += 1
            continue

        title = extract(page, "<title>(.+?)</title>")
        if title == "":
            skipped_listings["title"] += 1
            continue

        body = extract(page, '<div id="userbody">(.+?)<')
        if len(body) < 25:
            skipped_listings["body"] += 1
            continue

        item_id = extract(url, "/vol/(.+?)[.]html$")
        locstr = extract(page, "Location: (.+?)<")
        datestr = extract(page, "Date: (.+?)<")
        ts = dateutil.parser.parse(datestr)
        datetimestr = ts.strftime("%Y-%m-%dT%H:%M:%S")
        datestr = ts.strftime("%Y-%m-%d")

        if maxrecs > 0 and i > maxrecs:
            break
        xmlh.print_rps_progress("opps", progress, i, maxrecs)
        if progress and i > 0 and i % 250 == 0:
            msg = "skipped " + str(skipped_listings["title"] + skipped_listings["body"])
            msg += " listings (" + str(skipped_listings["title"]) + " for no-title and "
            msg += str(skipped_listings["body"]) + " for short body and "
            msg += str(skipped_listings["not-ok"]) + " for no-redistrib)"
            xmlh.print_progress(msg)
            # print "---"
            # print "title:",title
            # print "loc:",locstr
            # print "date:",datestr
            # print "body:",body[0:100]

        # craigslist is full of weird escapes-- strip them
        body = re.sub(r"&[a-z]+;", "", body)
        title = re.sub(r"&[a-z]+;", "", title)
        locstr = re.sub(r"&[a-z]+;", "", locstr)
        outstr += "<VolunteerOpportunity>"
        outstr += "<volunteerOpportunityID>%s</volunteerOpportunityID>" % (item_id)
        outstr += "<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>"
        outstr += "<volunteerHubOrganizationIDs><volunteerHubOrganizationID>0</volunteerHubOrganizationID></volunteerHubOrganizationIDs>"
        outstr += "<title>%s</title>" % (title)
        outstr += "<detailURL>%s</detailURL>" % (url)
        # avoid CDATA in body...
        esc_body = xml.sax.saxutils.escape(body)
        esc_body100 = xml.sax.saxutils.escape(body[0:100])
        outstr += "<description>%s</description>" % (esc_body)
        outstr += "<abstract>%s</abstract>" % (esc_body100 + "...")
        outstr += "<lastUpdated>%s</lastUpdated>" % (datetimestr)
        # TODO: expires
        # TODO: synthesize location from metro...
        outstr += "<locations><location>"
        outstr += "<name>%s</name>" % (xml.sax.saxutils.escape(locstr))
        # what about the few that do geocode?
        lat, lng = "", ""
        try:
            domain, unused = url.split("vol/")
            lat, lng = CL_LATLONGS[domain].split(",")
        except:
            # ignore for now
            # print url
            # continue
            pass
        outstr += "<latitude>%s</latitude>" % (lat)
        outstr += "<longitude>%s</longitude>" % (lng)
        outstr += "</location></locations>"
        # outstr += '<locations><location>'
        # outstr += '<city>%s</city>' % (
        # outstr += '<region>%s</region>' % (
        # outstr += '</location></locations>'
        outstr += "<dateTimeDurations><dateTimeDuration>"
        outstr += "<openEnded>No</openEnded>"
        outstr += "<startDate>%s</startDate>" % (datestr)
        # TODO: endDate = startDate + N=14 days?
        # TODO: timezone???
        # outstr += '<endDate>%s</endDate>' % (
        outstr += "</dateTimeDuration></dateTimeDurations>"
        # TODO: categories???
        # outstr += '<categoryTags>'
        outstr += "</VolunteerOpportunity>"
        numopps += 1
    outstr += "</VolunteerOpportunities>"
    outstr += "</FootprintFeed>"

    # outstr = re.sub(r'><([^/])', r'>\n<\1', outstr)
    return outstr, numorgs, numopps
 def parse(instr, maxrecs, progress):
   numorgs = numopps = 0
   instr = re.sub(r'<(/?db):', r'<\1_', instr)
   opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>',
                     instr, re.DOTALL)
   volopps = ""
   for i, oppstr in enumerate(opps):
     #if progress and i > 0 and i % 250 == 0:
     #  print str(datetime.now())+": ", i, " opportunities processed."
     if (maxrecs > 0 and i > maxrecs):
       break
     xmlh.print_rps_progress("opps", progress, i, maxrecs)
 
     item = xmlh.simple_parser(oppstr, known_elnames, progress=False)
 
     orgid = register_org(item)
 
     # logoURL -- sigh, this is for the opportunity not the org
     volopps += '<VolunteerOpportunity>'
     volopps += xmlh.output_val('volunteerOpportunityID', str(i))
     volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid))
     volopps += xmlh.output_node('volunteerHubOrganizationID', item, "LocalID")
     volopps += xmlh.output_node('title', item, "Title")
     volopps += xmlh.output_node('abstract', item, "Description")
     volopps += xmlh.output_node('description', item, "Description")
     volopps += xmlh.output_node('detailURL', item, "DetailURL")
     volopps += xmlh.output_val('volunteersNeeded', "-8888")
 
     try:
       oppdates = item.getElementsByTagName("OpportunityDate")
     except:
       oppdates = []
     
     if len(oppdates) > 1:
       print datetime.now(), \
           "parse_servenet.py: only 1 OpportunityDate supported."
       #return None
       oppdate = oppdates[0]
     elif len(oppdates) == 0:
       oppdate = None
     else:
       oppdate = oppdates[0]
     volopps += '<dateTimeDurations><dateTimeDuration>'
 
     if oppdate:
       volopps += xmlh.output_val('openEnded', 'No')
       volopps += xmlh.output_val('duration', 'P%s%s' % 
                                 (xmlh.get_tag_val(oppdate, "DurationQuantity"),
                                  xmlh.get_tag_val(oppdate, "DurationUnit")))
       volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
       volopps += xmlh.output_node('startDate', oppdate, "StartDate")
       volopps += xmlh.output_node('endDate', oppdate, "EndDate")
     else:
       volopps += xmlh.output_val('openEnded', 'Yes')
       volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
     volopps += '</dateTimeDuration></dateTimeDurations>'
 
     volopps += '<locations>'
     try:
       opplocs = item.getElementsByTagName("Location")
     except:
       opplocs = []
     for opploc in opplocs:
       volopps += '<location>'
       virtual_tag = opploc.getElementsByTagName("Virtual")
       if virtual_tag and xmlh.get_tag_val(opploc, "Virtual").lower() == "yes":
         volopps += xmlh.output_val('virtual', 'Yes')
       else:
         volopps += xmlh.output_node('region', opploc, "StateOrProvince")
         volopps += xmlh.output_node('country', opploc, "Country")
         volopps += xmlh.output_node('postalCode', opploc, "ZipOrPostalCode")
       volopps += '</location>'
     volopps += '</locations>'
     volopps += '<categoryTags/>'
     volopps += '</VolunteerOpportunity>'
     numopps += 1
     
   # convert to footprint format
   outstr = '<?xml version="1.0" ?>'
   outstr += '<FootprintFeed schemaVersion="0.1">'
   outstr += '<FeedInfo>'
   outstr += xmlh.output_val('providerID', providerID)
   outstr += xmlh.output_val('providerName', providerName)
   outstr += xmlh.output_val('feedID', feedID)
   outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
   outstr += xmlh.output_val('providerURL', providerURL)
   outstr += xmlh.output_val('description', feedDescription)
   # TODO: capture ts -- use now?!
   outstr += '</FeedInfo>'
 
   # hardcoded: Organization
   outstr += '<Organizations>'
   for key in ORGS:
     outstr += ORGS[key]
     numorgs += 1
   outstr += '</Organizations>'
   outstr += '<VolunteerOpportunities>'
   outstr += volopps
   outstr += '</VolunteerOpportunities>'
   outstr += '</FootprintFeed>'
 
   #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr)
   return outstr, numorgs, numopps
示例#4
0
def parse(instr, maxrecs, progress):
  """return FPXML given craigslist data"""
  if CL_LATLONGS == None:
    load_craigslist_latlongs()
  xmlh.print_progress("loading craigslist crawler output...")
  crawl_craigslist.parse_cache_file(instr, listings_only=True)
  xmlh.print_progress("loaded "+str(len(crawl_craigslist.pages))+" craigslist pages.")

  # convert to footprint format
  outstr = '<?xml version="1.0" ?>'
  outstr += '<FootprintFeed schemaVersion="0.1">'
  outstr += '<FeedInfo>'
  outstr += xmlh.output_val('providerID', "105")
  outstr += xmlh.output_val('providerName', "craigslist")
  outstr += xmlh.output_val('feedID', "craigslist")
  outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
  outstr += xmlh.output_val('providerURL', "http://www.craigslist.org/")
  outstr += '</FeedInfo>'

  numorgs = numopps = 0

  # no "organization" in craigslist postings
  outstr += '<Organizations>'
  outstr += '<Organization>'
  outstr += '<organizationID>0</organizationID>'
  outstr += '<nationalEIN></nationalEIN>'
  outstr += '<name></name>'
  outstr += '<missionStatement></missionStatement>'
  outstr += '<description></description>'
  outstr += '<location>'
  outstr += xmlh.output_val("city", "")
  outstr += xmlh.output_val("region", "")
  outstr += xmlh.output_val("postalCode", "")
  outstr += '</location>'
  outstr += '<organizationURL></organizationURL>'
  outstr += '<donateURL></donateURL>'
  outstr += '<logoURL></logoURL>'
  outstr += '<detailURL></detailURL>'
  outstr += '</Organization>'
  numorgs += 1
  outstr += '</Organizations>'

  skipped_listings = {}
  skipped_listings["body"] = skipped_listings["title"] = \
      skipped_listings["not-ok"] = 0
  outstr += '<VolunteerOpportunities>'
  for i, url in enumerate(crawl_craigslist.pages):
    page = crawl_craigslist.pages[url]

    ok = extract(page, "it's OK to distribute this "+
                 "charitable volunteerism opportunity")
    if ok == "":
      skipped_listings["not-ok"] += 1
      continue

    title = extract(page, "<title>(.+?)</title>")
    if title == "":
      skipped_listings["title"] += 1
      continue

    body = extract(page, '<div id="userbody">(.+?)<')
    if len(body) < 25:
      skipped_listings["body"] += 1
      continue

    item_id = extract(url, "/vol/(.+?)[.]html$")
    locstr = extract(page, "Location: (.+?)<")
    datestr = extract(page, "Date: (.+?)<")
    ts = dateutil.parser.parse(datestr)
    datetimestr = ts.strftime("%Y-%m-%dT%H:%M:%S")
    datestr = ts.strftime("%Y-%m-%d")


    if (maxrecs>0 and i>maxrecs):
      break
    xmlh.print_rps_progress("opps", progress, i, maxrecs)
    if progress and i > 0 and i % 250 == 0:
      msg = "skipped " + str(skipped_listings["title"]+skipped_listings["body"])
      msg += " listings ("+str(skipped_listings["title"]) + " for no-title and "
      msg += str(skipped_listings["body"]) + " for short body and "
      msg += str(skipped_listings["not-ok"]) + " for no-redistrib)"
      xmlh.print_progress(msg)
      #print "---"
      #print "title:",title
      #print "loc:",locstr
      #print "date:",datestr
      #print "body:",body[0:100]

    # craigslist is full of weird escapes-- strip them
    body = re.sub(r'&[a-z]+;', '', body)
    title = re.sub(r'&[a-z]+;', '', title)
    locstr = re.sub(r'&[a-z]+;', '', locstr)
    outstr += '<VolunteerOpportunity>'
    outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (item_id)
    outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>'
    outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>0</volunteerHubOrganizationID></volunteerHubOrganizationIDs>'
    outstr += '<title>%s</title>' % (title)
    outstr += '<detailURL>%s</detailURL>' % (url)
    # avoid CDATA in body...
    esc_body = xml.sax.saxutils.escape(body)
    esc_body100 = xml.sax.saxutils.escape(body[0:100])
    outstr += '<description>%s</description>' % (esc_body)
    outstr += '<abstract>%s</abstract>' % (esc_body100 + "...")
    outstr += '<lastUpdated>%s</lastUpdated>' % (datetimestr)
    # TODO: expires
    # TODO: synthesize location from metro...
    outstr += '<locations><location>'
    outstr += '<name>%s</name>' % (xml.sax.saxutils.escape(locstr))
    # what about the few that do geocode?
    lat, lng = "", ""
    try:
      domain, unused = url.split("vol/")
      lat, lng = CL_LATLONGS[domain].split(",")
    except:
      # ignore for now
      #print url
      #continue
      pass
    outstr += '<latitude>%s</latitude>' % (lat)
    outstr += '<longitude>%s</longitude>' % (lng)
    outstr += '</location></locations>'
    #outstr += '<locations><location>'
    #outstr += '<city>%s</city>' % (
    #outstr += '<region>%s</region>' % (
    #outstr += '</location></locations>'
    outstr += '<dateTimeDurations><dateTimeDuration>'
    outstr += '<openEnded>No</openEnded>'
    outstr += '<startDate>%s</startDate>' % (datestr)
    # TODO: endDate = startDate + N=14 days?
    # TODO: timezone???
    #outstr += '<endDate>%s</endDate>' % (
    outstr += '</dateTimeDuration></dateTimeDurations>'
    # TODO: categories???
    #outstr += '<categoryTags>'
    outstr += '</VolunteerOpportunity>'
    numopps += 1
  outstr += '</VolunteerOpportunities>'
  outstr += '</FootprintFeed>'

  #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr)
  return outstr, numorgs, numopps
示例#5
0
def parse(instr, maxrecs, progress):
    """return FPXML given usaservice data"""
    # TODO: progress
    known_elnames = [
        'channel',
        'db:abstract',
        'db:address',
        'db:attendee_count',
        'db:categories',
        'db:city',
        'db:country',
        'db:county',
        'db:dateTime',
        'db:event',
        'db:eventType',
        'db:guest_total',
        'db:host',
        'db:latitude',
        'db:length',
        'db:longitude',
        'db:rsvp',
        'db:scheduledTime',
        'db:state',
        'db:street',
        'db:title',
        'db:venue_name',
        'db:zipcode',
        'description',
        'docs',
        'guid',
        'item',
        'language',
        'link',
        'pubDate',
        'rss',
        'title',
    ]

    # convert to footprint format
    s = '<?xml version="1.0" ?>'
    s += '<FootprintFeed schemaVersion="0.1">'
    s += '<FeedInfo>'
    # TODO: assign provider IDs?
    s += '<providerID>101</providerID>'
    s += '<providerName>usaservice.org</providerName>'
    s += '<feedID>1</feedID>'
    s += '<createdDateTime>%s</createdDateTime>' % xmlh.current_ts()
    s += '<providerURL>http://www.usaservice.org/</providerURL>'
    s += '<description>Syndicated events</description>'
    # TODO: capture ts -- use now?!
    s += '</FeedInfo>'

    numorgs = numopps = 0
    # hardcoded: Organization
    s += '<Organizations>'
    s += '<Organization>'
    s += '<organizationID>0</organizationID>'
    s += '<nationalEIN></nationalEIN>'
    s += '<name></name>'
    s += '<missionStatement></missionStatement>'
    s += '<description></description>'
    s += '<location><city></city><region></region><postalCode></postalCode></location>'
    s += '<organizationURL></organizationURL>'
    s += '<donateURL></donateURL>'
    s += '<logoURL></logoURL>'
    s += '<detailURL></detailURL>'
    s += '</Organization>'
    numorgs += 1
    s += '</Organizations>'

    s += '<VolunteerOpportunities>'

    instr = re.sub(r'<(/?db):', r'<\1_', instr)
    for i, line in enumerate(instr.splitlines()):
        if (maxrecs > 0 and i > maxrecs):
            break
        xmlh.print_rps_progress("opps", progress, i, maxrecs)
        item = xmlh.simple_parser(line, known_elnames, progress=False)

        # unmapped: db_rsvp  (seems to be same as link, but with #rsvp at end of url?)
        # unmapped: db_host  (no equivalent?)
        # unmapped: db_county  (seems to be empty)
        # unmapped: attendee_count
        # unmapped: guest_total
        # unmapped: db_title   (dup of title, above)
        s += '<VolunteerOpportunity>'
        s += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (
            xmlh.get_tag_val(item, "guid"))
        # hardcoded: sponsoringOrganizationID
        s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>'
        # hardcoded: volunteerHubOrganizationID
        s += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>0</volunteerHubOrganizationID></volunteerHubOrganizationIDs>'
        s += '<title>%s</title>' % (xmlh.get_tag_val(item, "title"))
        s += '<abstract>%s</abstract>' % (xmlh.get_tag_val(item, "abstract"))
        s += '<volunteersNeeded>-8888</volunteersNeeded>'

        dbscheduledTimes = item.getElementsByTagName("db_scheduledTime")
        if (dbscheduledTimes.length != 1):
            print datetime.now(
            ), "parse_usaservice: only 1 db_scheduledTime supported."
            return None
        dbscheduledTime = dbscheduledTimes[0]
        s += '<dateTimeDurations><dateTimeDuration>'
        length = xmlh.get_tag_val(dbscheduledTime, "db_length")
        if length == "" or length == "-1":
            s += '<openEnded>Yes</openEnded>'
        else:
            s += '<openEnded>No</openEnded>'
        date, time = xmlh.get_tag_val(dbscheduledTime,
                                      "db_dateTime").split(" ")
        s += '<startDate>%s</startDate>' % (date)
        # TODO: timezone???
        s += '<startTime>%s</startTime>' % (time)
        s += '</dateTimeDuration></dateTimeDurations>'

        dbaddresses = item.getElementsByTagName("db_address")
        if (dbaddresses.length != 1):
            print datetime.now(
            ), "parse_usaservice: only 1 db_address supported."
            return None
        dbaddress = dbaddresses[0]
        s += '<locations><location>'
        s += '<name>%s</name>' % (xmlh.get_tag_val(item, "db_venue_name"))
        s += '<streetAddress1>%s</streetAddress1>' % (xmlh.get_tag_val(
            dbaddress, "db_street"))
        s += '<city>%s</city>' % (xmlh.get_tag_val(dbaddress, "db_city"))
        s += '<region>%s</region>' % (xmlh.get_tag_val(dbaddress, "db_state"))
        s += '<country>%s</country>' % (xmlh.get_tag_val(
            dbaddress, "db_country"))
        s += '<postalCode>%s</postalCode>' % (xmlh.get_tag_val(
            dbaddress, "db_zipcode"))
        s += '<latitude>%s</latitude>' % (xmlh.get_tag_val(
            item, "db_latitude"))
        s += '<longitude>%s</longitude>' % (xmlh.get_tag_val(
            item, "db_longitude"))
        s += '</location></locations>'

        type = xmlh.get_tag_val(item, "db_eventType")
        s += '<categoryTags><categoryTag>%s</categoryTag></categoryTags>' % (
            type)

        s += '<contactName>%s</contactName>' % xmlh.get_tag_val(
            item, "db_host")
        s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(item, "link"))
        s += '<description>%s</description>' % (xmlh.get_tag_val(
            item, "description"))
        pubdate = xmlh.get_tag_val(item, "pubDate")
        if re.search("[0-9][0-9] [A-Z][a-z][a-z] [0-9][0-9][0-9][0-9]",
                     pubdate):
            # TODO: parse() is ignoring timzone...
            ts = dateutil.parser.parse(pubdate)
            pubdate = ts.strftime("%Y-%m-%dT%H:%M:%S")
        s += '<lastUpdated>%s</lastUpdated>' % (pubdate)
        s += '</VolunteerOpportunity>'
        numopps += 1

    s += '</VolunteerOpportunities>'
    s += '</FootprintFeed>'
    #s = re.sub(r'><([^/])', r'>\n<\1', s)
    return s, numorgs, numopps