예제 #1
0
def project(request, abstractid):
    LogRequest(request, "project: " + str(abstractid))
    try:
        project = NSFProject.objects.get(awardID = abstractid)
    except:
        msg = "No project found for award ID: " + str(abstractid) 
        LogWarning(msg)
        return render_to_response('error.html',
                                  {'message': msg,
                                   'menu': generate_error_menu(request),
                                   },
                                  context_instance=RequestContext(request))
                                 
    pis = project.getPIs() 

    collabs = uniq([c.project2 for c in CollabProjects.objects.filter(project1 = project)])
    collabpis = uniq([(p.investigator, collab)
                      for collab in collabs
                      for p in ProjectPIs.objects.filter(project=collab)])
    collabpis.sort(key=lambda r: r[0].lastname)

    amount = format_amount(project.amount)

    return render_to_response(
        'project.html',
        {'project': project, 
         'menu': generate_menu(request),
         'amount': amount,
         'pis': pis, 'collabs': collabs, 'collabpis': collabpis},
        context_instance=RequestContext(request)
    )
예제 #2
0
def project(request, abstractid):
    LogRequest(request, "project: " + str(abstractid))
    try:
        project = NSFProject.objects.get(awardID=abstractid)
    except:
        msg = "No project found for award ID: " + str(abstractid)
        LogWarning(msg)
        return render_to_response('error.html', {
            'message': msg,
            'menu': generate_error_menu(request),
        },
                                  context_instance=RequestContext(request))

    pis = project.getPIs()

    collabs = uniq(
        [c.project2 for c in CollabProjects.objects.filter(project1=project)])
    collabpis = uniq([(p.investigator, collab) for collab in collabs
                      for p in ProjectPIs.objects.filter(project=collab)])
    collabpis.sort(key=lambda r: r[0].lastname)

    amount = format_amount(project.amount)

    return render_to_response('project.html', {
        'project': project,
        'menu': generate_menu(request),
        'amount': amount,
        'pis': pis,
        'collabs': collabs,
        'collabpis': collabpis
    },
                              context_instance=RequestContext(request))
예제 #3
0
def write_mutatee_cmakelists(directory, info, platform):
   
   cmakelists = open(directory + "/cmake-mutatees.txt", "w")

   compilers = info['compilers']
   mutatees = info['mutatees']
   comps = utils.uniq(map(lambda m: m['compiler'], mutatees))
   pname = os.environ.get('PLATFORM')
   modules = utils.uniq(map(lambda t: t['module'], info['tests']))

   print_src_lists(mutatees, platform, info, directory)
   print_compiler_cmakefiles(mutatees, platform, info, cmakelists, directory)
예제 #4
0
def write_mutatee_cmakelists(directory, info, platform):
   
   cmakelists = open(directory + "/cmake-mutatees.txt", "w")

   compilers = info['compilers']
   mutatees = info['mutatees']
   comps = utils.uniq(map(lambda m: m['compiler'], mutatees))
   pname = os.environ.get('PLATFORM')
   modules = utils.uniq(map(lambda t: t['module'], info['tests']))

   print_src_lists(mutatees, platform, info, directory)
   print_compiler_cmakefiles(mutatees, platform, info, cmakelists, directory)
예제 #5
0
파일: views.py 프로젝트: matveym/passwords
def home(request):
    if not _is_logged_in(request):
        return redirect(users.create_login_url('/'))

    sites = all_sites()
    logins = sorted(uniq([site.login for site in sites if site.login]))
    passwords = sorted(uniq([site.password for site in sites if site.password]))
    return render(request, 'passwords.html', {
        'sites': sites,
        'sites_json': json.dumps(_sites_dict(sites)),
        'logins_json': json.dumps(logins),
        'passwords_json': json.dumps(passwords),
        'logout_url': users.create_logout_url('/'),
        })
예제 #6
0
파일: views.py 프로젝트: matveym/passwords
def _refresh_sites(request):
    sites = all_sites()
    sites_html = render(request, '_sites.html', {
        'sites': sites
        }).content

    logins = sorted(uniq([site.login for site in sites if site.login]))
    passwords = sorted(uniq([site.password for site in sites if site.password]))

    return json.dumps({
            'sites_html': sites_html,
            'sites': _sites_dict(sites),
            'logins': logins,
            'passwords': passwords,
            })
예제 #7
0
def gooddsspfilename(filename, dsspdirList=None):
    # first check if pdbid is really a file
    if os.path.exists(filename):
        return filename

    # extract pdbid from pdbid
    if dsspdirList is None:
        dsspdirList = _DSSP_DIR
    if type(dsspdirList) == str:
        dsspdirList = _DSSP_DIR.split(os.pathsep)
    pdbid = pdbidFromFilename(filename)
    pdbidl = pdbid.lower()
    branch = pdbidl[1:3]

    # generate filename variants
    basenames = [
        x % vars()
        for x in ("%(filename)s", "%(pdbid)s", "%(pdbidl)s", "pdb%(pdbidl)s")
    ]
    basenames = uniq(basenames)
    extensions = ("", ".dssp", ".DSSP")
    compressions = ("", ".gz", ".Z")

    # generate subdirectory locations
    subdirs = []
    for dsspdir in dsspdirList:
        innerSubdirs = [
            x % vars()
            for x in ("", "%(branch)s", "%(dsspdir)s",
                      os.path.join("%(dsspdir)s", "%(branch)s"),
                      os.path.join("%(dsspdir)s", "divided", "%(branch)s"),
                      os.path.join("%(dsspdir)s", "data", "structures",
                                   "divided", "pdb", "%(branch)s"))
        ]
        subdirs.extend(innerSubdirs)
    subdirs = uniq(subdirs)

    # search tree
    for subdir in subdirs:
        for cmp in compressions:
            for base in basenames:
                for ext in extensions:
                    filename = os.path.join(subdir,
                                            "%(base)s%(ext)s%(cmp)s" % vars())
                    if os.path.exists(filename):
                        return filename

    return None
예제 #8
0
def generateCollaborators(institution):
    pis = NSFInvestigator.objects.filter(satc=True).extra(
        select={
            'lower_name': 'lower(lastname)'
        }).order_by('lower_name', 'firstname').filter(attendee=True)

    for pi in pis:
        line = pi.email + ", "

        piprojects = ProjectPIs.objects.filter(investigator=pi)
        projects = sorted(uniq([p.project for p in piprojects if p.project]),
                          key=lambda proj: proj.startDate)

        collaborators = sorted(uniq([
            p.investigator for proj in projects
            for p in ProjectPIs.objects.filter(project=proj)
        ] + [
            p.investigator for proj in projects for collab in
            [c.project2 for c in CollabProjects.objects.filter(project1=proj)]
            for p in ProjectPIs.objects.filter(project=collab)
        ]),
                               key=lambda pi: pi.lastname)

        if institution:
            institutions = pi.getInstitutions()
            for institution in institutions:
                pis = uniq([
                    ppi.investigator for ppi in ProjectPIs.objects.filter(
                        institution=institution)
                ])
                pis = [icollab for icollab in pis if pi.attendee]
                if pi in pis:
                    pis.remove(pi)
                else:
                    pass  # LogWarning("Not a self institution collaborator! " + pi.fullDisplay())

                collaborators += pis

        if pi in collaborators:
            collaborators.remove(pi)
        else:
            pass  # print "Not self-collaborator: " + pi.email

        line += ', '.join([
            collaborator.email for collaborator in collaborators
            if collaborator.attendee
        ])
        print line
예제 #9
0
def get_all_mutatee_sources(groupable, module, info):
	return utils.uniq(reduce(lambda a, b: set(a) | set(b),
		(map(lambda m: m['preprocessed_sources'],
		filter(lambda m: m['name'] != 'none'
			and is_valid_test(m) == 'true' and is_groupable(m, info) == groupable and get_module(m) == module,
			info['mutatees']))),
		[]))
예제 #10
0
def institutions(request, max_page=MAX_PAGE):
    title = 'Institutions'
    projects, explanation = NSFProject.selectProjectsFromRequest(request)
    institutions = uniq([project.institution for project in projects])
    institutions.sort(key=lambda inst: inst.name)
    explanation = str(len(institutions)) + " institutions hosting " + explanation 

    if max_page < len(institutions) < 1.85 * max_page:
        max_page = 2 * max_page # don't paginate 

    paginator = Paginator(institutions, max_page)
    page = request.GET.get('page')

    if page == 'all':
        showinstitutions = institutions
    else:
        try:
            showinstitutions = paginator.page(page)
        except PageNotAnInteger:
            showinstitutions = paginator.page(1)
        except EmptyPage:
            showinstitutions = paginator.page(paginator.num_pages)

    return render_to_response(
        'institutions.html',
        {'title': title,
         'menu': generate_menu(request),
         'explanation': explanation,
         'paginate': len(institutions) > len(showinstitutions),
         'institutions': showinstitutions },
        context_instance=RequestContext(request))
예제 #11
0
def generate_institution_graph(projects):
    edges = []
    institutions = []
    for project in projects:
        cols = []
        institution = project.institution
        if not institution in institutions:
            institutions.append(institution)
        collabs = uniq([c.project2 for c in CollabProjects.objects.filter(project1 = project)])
        for collab in collabs:
            collabinst = collab.institution
            if not collabinst in institutions:
                institutions.append(collabinst)
            cols.append(institutions.index(collabinst))

        val = 1
        val += int(project.amount / 1000000)
        if project.satc:
            val *= 2
        edges += make_edges(cols, val)

    edges = merge_edges(edges)
    json_obj = {'nodes': [], 'links': []}

    for institution in institutions:
        desc = {'name': institution.showName(), 'group': 1, 'instid': institution.id }
        json_obj['nodes'].append(desc)

    for edge in edges:
        desc = {"source": edge[0], "target": edge[1], "value": edge[2]}
        json_obj['links'].append(desc)

    json_str = json.dumps(json_obj)
    return json_str
예제 #12
0
 def group_textlines(self, laparams, lines):
     plane = Plane(self.bbox)
     plane.extend(lines)
     boxes = {}
     for line in lines:
         neighbors = line.find_neighbors(plane, laparams.line_margin)
         if line not in neighbors: continue
         members = []
         for obj1 in neighbors:
             members.append(obj1)
             if obj1 in boxes:
                 members.extend(boxes.pop(obj1))
         if isinstance(line, LTTextLineHorizontal):
             box = LTTextBoxHorizontal()
         else:
             box = LTTextBoxVertical()
         for obj in uniq(members):
             box.add(obj)
             boxes[obj] = box
     done = set()
     for line in lines:
         if line not in boxes: continue
         box = boxes[line]
         if box in done:
             continue
         done.add(box)
         if not box.is_empty():
             yield box
     return
예제 #13
0
 def get_textboxes(self, laparams, lines):
     plane = Plane(lines)
     boxes = {}
     for line in lines:
         neighbors = line.find_neighbors(plane, laparams.line_margin)
         assert line in neighbors, line
         members = []
         for obj1 in neighbors:
             members.append(obj1)
             if obj1 in boxes:
                 members.extend(boxes.pop(obj1))
         if isinstance(line, LTTextLineHorizontal):
             box = LTTextBoxHorizontal()
         else:
             box = LTTextBoxVertical()
         for obj in uniq(members):
             box.add(obj)
             boxes[obj] = box
     done = set()
     for line in lines:
         box = boxes[line]
         if box in done: continue
         done.add(box)
         if not box.is_empty():
             yield box
     return
예제 #14
0
def nomination_ids_for(congress, options={}):
    nomination_ids = []

    page = page_for(congress, options)
    if not page:
        logging.error("Couldn't download page for %d congress" % congress)
        return None

    # extract matching links
    doc = html.document_fromstring(page)
    raw_nomination_ids = doc.xpath('//div[@id="content"]/p[2]/a/text()')
    nomination_ids = []

    for raw_id in raw_nomination_ids:
        pieces = raw_id.split(' ')

        # ignore these
        if raw_id in ["PDF", "Text", "split into two or more parts"]:
            pass
        elif len(pieces) < 2:
            logging.error("Bad nomination ID detected: %s" % raw_id)
            return None
        else:
            nomination_ids.append(pieces[1])

    return utils.uniq(nomination_ids)
예제 #15
0
 def get_textboxes(self, laparams, lines):
     plane = Plane(lines)
     boxes = {}
     for line in lines:
         neighbors = line.find_neighbors(plane, laparams.line_margin)
         assert line in neighbors, line
         members = []
         for obj1 in neighbors:
             members.append(obj1)
             if obj1 in boxes:
                 members.extend(boxes.pop(obj1))
         if isinstance(line, LTTextLineHorizontal):
             box = LTTextBoxHorizontal()
         else:
             box = LTTextBoxVertical()
         for obj in uniq(members):
             box.add(obj)
             boxes[obj] = box
     done = set()
     for line in lines:
         box = boxes[line]
         if box in done: continue
         done.add(box)
         yield box.analyze(laparams)
     return
예제 #16
0
def edit_statements_page(title, csv_file, username, summary=None):
    """
    Edit a wiki page of suggested statements.
    """

    rows = csv.reader(csv_file)
    (header, property) = next(rows)
    assert header == "qid"

    statements = []
    for (qid, value) in rows:
        statements.append((qid, property, value))
    statements.sort()
    statements = list(uniq(statements))

    lines = []
    for (entity, property, value) in statements:
        lines.append("* {{Statement|" + entity + "|" + property + "|" +
                     str(value) + "}}")

    if statements:
        lines.append("")
        lines.append(
            wikitext.link("Add via QuickStatements",
                          quickstatements_url(statements)))

    lines.append("")
    text = "\n".join(lines)

    return page.edit(title, text, username, summary)
예제 #17
0
def institutions(request, max_page=MAX_PAGE):
    title = 'Institutions'
    projects, explanation = NSFProject.selectProjectsFromRequest(request)
    institutions = uniq([project.institution for project in projects])
    institutions.sort(key=lambda inst: inst.name)
    explanation = str(
        len(institutions)) + " institutions hosting " + explanation

    if max_page < len(institutions) < 1.85 * max_page:
        max_page = 2 * max_page  # don't paginate

    paginator = Paginator(institutions, max_page)
    page = request.GET.get('page')

    if page == 'all':
        showinstitutions = institutions
    else:
        try:
            showinstitutions = paginator.page(page)
        except PageNotAnInteger:
            showinstitutions = paginator.page(1)
        except EmptyPage:
            showinstitutions = paginator.page(paginator.num_pages)

    return render_to_response(
        'institutions.html', {
            'title': title,
            'menu': generate_menu(request),
            'explanation': explanation,
            'paginate': len(institutions) > len(showinstitutions),
            'institutions': showinstitutions
        },
        context_instance=RequestContext(request))
예제 #18
0
def index(options={}):
    concordance = defaultdict(list)
    files = [
        x for x in os.listdir(os.getcwd() + "/data/")
        if re.sub("\d+\.json", "", x) == ""
    ]
    if options.get('limit', False):
        files = files[:options.get('limit')]

    for file in files:
        sermon = json.load(open(os.getcwd() + "/data/" + file, 'r'))
        words = uniq(
            re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n',
                                                                ' ').lower()))
        '''
        if options.get("uniques", False):
            words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower()))
        else:
            words = re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower())
        '''
        for word in words:
            if len(word) > 2:
                concordance[word].append(file.replace('.json', ''))

    write(json.dumps(concordance, sort_keys=True, indent=2),
          os.getcwd() + "/src/data/index.json")
    write(json.dumps(concordance, sort_keys=True),
          os.getcwd() + "/src/data/index.min.json")
예제 #19
0
def get_all_mutatee_sources(groupable, module, info):
	return utils.uniq(reduce(lambda a, b: set(a) | set(b),
		(map(lambda m: m['preprocessed_sources'],
		filter(lambda m: m['name'] != 'none'
			and is_valid_test(m) == 'true' and is_groupable(m, info) == groupable and get_module(m) == module,
			info['mutatees']))),
		[]))
예제 #20
0
def getCollaborators(projects):
    collaborators = sorted(uniq([
        p.investigator for proj in projects
        for p in ProjectPIs.objects.filter(project=proj)
    ] + [
        p.investigator for proj in projects for collab in
        [c.project2 for c in CollabProjects.objects.filter(project1=proj)]
        for p in ProjectPIs.objects.filter(project=collab)
    ]),
                           key=lambda pi: pi.lastname)
    return collaborators
예제 #21
0
def getCollaborators(projects):
    collaborators = sorted(uniq(
            [p.investigator
             for proj in projects
             for p in ProjectPIs.objects.filter(project=proj)]
            +
            [p.investigator
             for proj in projects
             for collab in [c.project2 for c in CollabProjects.objects.filter(project1 = proj)]
             for p in ProjectPIs.objects.filter(project=collab)]),
                           key = lambda pi: pi.lastname)
    return collaborators
예제 #22
0
def generateCollaborators(institution):
    pis = NSFInvestigator.objects.filter(satc=True).extra(select={'lower_name': 'lower(lastname)'}).order_by('lower_name', 'firstname').filter(attendee=True)

    for pi in pis:
        line = pi.email + ", "

        piprojects = ProjectPIs.objects.filter(investigator=pi)
        projects = sorted(uniq([p.project for p in piprojects if p.project]), 
                          key=lambda proj: proj.startDate)

        collaborators = sorted(uniq(
                [p.investigator
                 for proj in projects
                 for p in ProjectPIs.objects.filter(project=proj)]
                +
                [p.investigator
                 for proj in projects
                 for collab in [c.project2 for c in CollabProjects.objects.filter(project1 = proj)]
                 for p in ProjectPIs.objects.filter(project=collab)]),
                               key = lambda pi: pi.lastname)

        if institution:
            institutions = pi.getInstitutions()
            for institution in institutions:
                pis = uniq([ppi.investigator for ppi in ProjectPIs.objects.filter(institution=institution)])
                pis = [icollab for icollab in pis if pi.attendee]
                if pi in pis:
                    pis.remove(pi)
                else:
                    pass # LogWarning("Not a self institution collaborator! " + pi.fullDisplay())

                collaborators += pis
                
        if pi in collaborators:
            collaborators.remove(pi)
        else:
            pass # print "Not self-collaborator: " + pi.email

        line += ', '.join([collaborator.email for collaborator in collaborators if collaborator.attendee])
        print line
예제 #23
0
def bill_ids_for(congress, options):
  bill_ids = []

  bill_type = options.get('bill_type', None)
  if bill_type:
    bill_types = [bill_type]
  else:
    bill_types = utils.thomas_types.keys()

  for bill_type in bill_types:
    
    # match only links to landing pages of this bill type
    # it shouldn't catch stray links outside of the confines of the 100 on the page,
    # but if it does, no big deal
    link_pattern = "^\s*%s\d+\s*$" % utils.thomas_types[bill_type][1]

    # loop through pages and collect the links on each page until 
    # we hit a page with < 100 results, or no results
    offset = 0
    while True:
      # download page, find the matching links
      page = utils.download(
        page_for(congress, bill_type, offset),
        page_cache_for(congress, bill_type, offset),
        options.get('force', False))

      if not page:
        logging.error("Couldn't download page with offset %i, aborting" % offset)
        return None

      # extract matching links
      doc = html.document_fromstring(page)
      links = doc.xpath(
        "//a[re:match(text(), '%s')]" % link_pattern, 
        namespaces={"re": "http://exslt.org/regular-expressions"})

      # extract the bill ID from each link
      for link in links:
        code = link.text.lower().replace(".", "").replace(" ", "")
        bill_ids.append("%s-%s" % (code, congress))

      if len(links) < 100:
        break

      offset += 100

      # sanity check, while True loops are dangerous
      if offset > 100000:
        break

  return utils.uniq(bill_ids)
예제 #24
0
def mat_h2_plus_old(bond_length, bspline_set, l_list):
    """ gives hamiltonian matrix and overlap matrix of hydrogem molecule ion

    Parameters
    ----------
    bond_length : Double
         bond length of hydrogen molecular ion
    bspline_set : BSplineSet
    l_list: list of non negative integer
         list of angular quauntum number to use

    Returns
    -------
    h_mat : numpy.ndarray
        hamiltonian matrix
    s_mat : numpy.ndarray
        overlap pmatrix
    """

    # compute r1 matrix  (B_i|O|B_j)
    # (-1/2 d^2/dr^2, 1, 1/r^2, {s^L/g^{L+1} | L<-l_list})
    rs = bspline_set.xs
    d2_rmat = bspline_set.d2_mat()
    r2_rmat = bspline_set.v_mat(1.0/(rs*rs))
    s_rmat = bspline_set.s_mat()
    tmp_L_list = uniq(flatten([ls_non_zero_YYY(L1, L2)
                               for L1 in l_list for L2 in l_list]))
    en_r1mat_L = {}
    for L in tmp_L_list:
        en_r1mat_L[L] = bspline_set.en_mat(L, bond_length/2.0)

    # compute r1Y matrix (B_iY_L1M1|O|B_jY_L2M2)
    def one_block(L1, L2):
        v = -2.0*sum([sqrt(4.0*pi/(2*L+1)) *
                      y1mat_Yqk((L1, 0), (L, 0), (L2, 0)) *
                      en_r1mat_L[L]
                      for L in ls_non_zero_YYY(L1, L2)])
        if L1 == L2:
            L = L1
            t = -0.5 * d2_rmat + L*(L+1)*0.5*r2_rmat
            return t+v
        else:
            return v

    H_mat = bmat([[one_block(L1, L2)
                   for L1 in l_list]
                  for L2 in l_list])
    S_mat = bmat([[s_rmat if L1 == L2 else None
                   for L1 in l_list]
                  for L2 in l_list])
    return (H_mat, S_mat)
예제 #25
0
 def getScopFamily(self, id, chainid=None, resnum=None):
     if len(id) == 4:
         families = []
         for entry in self.entriesByPdbid.get(id, []):
             if chainid is not None:
                 # check chainid
                 pass
             if resnum is not None:
                 # check resnum
                 pass
             families.append(entry.scopfamily)
         families = sort(uniq(families))
         return ';'.join(families)
     return ''
예제 #26
0
def nomination_ids_for(congress, options={}):
    nomination_ids = []

    page = page_for(congress)
    if not page:
        logging.error("Couldn't download page for %d congress" % congress)
        return None

    # extract matching links
    doc = html.document_fromstring(page)
    nomination_ids = doc.xpath('//div[@id="content"]/p[2]/a/text()')
    nomination_ids = [x.split(' ')[1] for x in nomination_ids]

    return utils.uniq(nomination_ids)
예제 #27
0
def vote_ids_for_house(congress, session_year, options):
    vote_ids = []

    index_page = "http://clerk.house.gov/evs/%s/index.asp" % session_year
    group_page = r"ROLL_(\d+)\.asp"
    link_pattern = r"http://clerk.house.gov/cgi-bin/vote.asp\?year=%s&rollnumber=(\d+)" % session_year

    # download index page, find the matching links to the paged listing of votes
    page = utils.download(
        index_page,
        "%s/votes/%s/pages/house.html" % (congress, session_year),
        options)

    if not page:
        logging.error("Couldn't download House vote index page, skipping")
        return None

    # extract matching links
    doc = html.document_fromstring(page)
    links = doc.xpath(
        "//a[re:match(@href, '%s')]" % group_page,
        namespaces={"re": "http://exslt.org/regular-expressions"})

    for link in links:
        # get some identifier for this inside page for caching
        grp = re.match(group_page, link.get("href")).group(1)

        # download inside page, find the matching links
        page = utils.download(
            urllib.parse.urljoin(index_page, link.get("href")),
            "%s/votes/%s/pages/house_%s.html" % (congress, session_year, grp),
            options)

        if not page:
            logging.error("Couldn't download House vote group page (%s), aborting" % grp)
            continue

        doc = html.document_fromstring(page)
        votelinks = doc.xpath(
            "//a[re:match(@href, '%s')]" % link_pattern,
            namespaces={"re": "http://exslt.org/regular-expressions"})

        for votelink in votelinks:
            num = re.match(link_pattern, votelink.get("href")).group(1)
            vote_id = "h" + num + "-" + str(congress) + "." + session_year
            if not should_process(vote_id, options):
                continue
            vote_ids.append(vote_id)

    return utils.uniq(vote_ids)
예제 #28
0
def nomination_ids_for(congress, options = {}):  
  nomination_ids = []

  page = page_for(congress)
  if not page:
    logging.error("Couldn't download page for %d congress" % congress)
    return None

  # extract matching links
  doc = html.document_fromstring(page)
  nomination_ids = doc.xpath('//div[@id="content"]/p[2]/a/text()')
  nomination_ids = [x.split(' ')[1] for x in nomination_ids]

  return utils.uniq(nomination_ids)
예제 #29
0
def vote_ids_for_house(congress, session_year, options):
    vote_ids = []

    index_page = "http://clerk.house.gov/evs/%s/index.asp" % session_year
    group_page = r"ROLL_(\d+)\.asp"
    link_pattern = r"http://clerk.house.gov/cgi-bin/vote.asp\?year=%s&rollnumber=(\d+)" % session_year

    # download index page, find the matching links to the paged listing of votes
    page = utils.download(
        index_page,
        "%s/votes/%s/pages/house.html" % (congress, session_year),
        options)

    if not page:
        logging.error("Couldn't download House vote index page, aborting")
        return None

    # extract matching links
    doc = html.document_fromstring(page)
    links = doc.xpath(
        "//a[re:match(@href, '%s')]" % group_page,
        namespaces={"re": "http://exslt.org/regular-expressions"})

    for link in links:
        # get some identifier for this inside page for caching
        grp = re.match(group_page, link.get("href")).group(1)

        # download inside page, find the matching links
        page = utils.download(
            urlparse.urljoin(index_page, link.get("href")),
            "%s/votes/%s/pages/house_%s.html" % (congress, session_year, grp),
            options)

        if not page:
            logging.error("Couldn't download House vote group page (%s), aborting" % grp)
            continue

        doc = html.document_fromstring(page)
        votelinks = doc.xpath(
            "//a[re:match(@href, '%s')]" % link_pattern,
            namespaces={"re": "http://exslt.org/regular-expressions"})

        for votelink in votelinks:
            num = re.match(link_pattern, votelink.get("href")).group(1)
            vote_id = "h" + num + "-" + str(congress) + "." + session_year
            if not should_process(vote_id, options):
                continue
            vote_ids.append(vote_id)

    return utils.uniq(vote_ids)
예제 #30
0
def mat_h2_plus(bond_length, bspline_set, l_list):
    """ gives hamiltonian matrix and overlap matrix of hydrogem molecule ion

    Parameters
    ----------
    bond_length : Double
         bond length of hydrogen molecular ion
    bspline_set : BSplineSet
    l_list: list of non negative integer
         list of angular quauntum number to use

    Returns
    -------
    h_mat : numpy.ndarray
        hamiltonian matrix
    s_mat : numpy.ndarray
        overlap pmatrix
    """

    # compute r1 matrix  (B_i|O|B_j)
    # (-1/2 d^2/dr^2, 1, 1/r^2, {s^L/g^{L+1} | L<-l_list})
    rs = bspline_set.xs
    d2_rmat = bspline_set.d2_mat()
    r2_rmat = bspline_set.v_mat(1.0/(rs*rs))
    s_rmat = bspline_set.s_mat()
    tmp_L_list = uniq(flatten([ls_non_zero_YYY(L1, L2)
                               for L1 in l_list for L2 in l_list]))
    en_r1mat_L = {}
    for L in tmp_L_list:
        en_r1mat_L[L] = bspline_set.en_mat(L, bond_length/2.0)

    # compute y1 matrix (Y_L1|P_L(w_A)|Y_L2)
    en_y1mat_L = {}
    for L in tmp_L_list:
        en_y1mat_L[L] = coo_matrix([[np.sqrt(4.0*np.pi/(2*L+1)) *
                                     y1mat_Yqk((L1, 0), (L, 0), (L2, 0))
                                     for L1 in l_list]
                                    for L2 in l_list])

    LL_y1mat = coo_matrix(np.diag([1.0*L*(L+1) for L in l_list]))
    diag_y1mat = coo_matrix(np.diag([1 for L in l_list]))

    # compute r1y1 matrix
    h_r1y1mat = (synthesis_mat(-0.5*d2_rmat, diag_y1mat) +
                 synthesis_mat(+0.5*r2_rmat, LL_y1mat) - 2.0 *
                 sum([synthesis_mat(en_r1mat_L[L], en_y1mat_L[L])
                      for L in tmp_L_list]))
    s_r1y1mat = synthesis_mat(s_rmat, diag_y1mat)

    return (h_r1y1mat, s_r1y1mat)
예제 #31
0
def generate_institution_graph(projects):
    edges = []
    institutions = []
    for project in projects:
        cols = []
        institution = project.institution
        if not institution in institutions:
            institutions.append(institution)
        collabs = uniq([
            c.project2 for c in CollabProjects.objects.filter(project1=project)
        ])
        for collab in collabs:
            collabinst = collab.institution
            if not collabinst in institutions:
                institutions.append(collabinst)
            cols.append(institutions.index(collabinst))

        val = 1
        val += int(project.amount / 1000000)
        if project.satc:
            val *= 2
        edges += make_edges(cols, val)

    edges = merge_edges(edges)
    json_obj = {'nodes': [], 'links': []}

    for institution in institutions:
        desc = {
            'name': institution.showName(),
            'group': 1,
            'instid': institution.id
        }
        json_obj['nodes'].append(desc)

    for edge in edges:
        desc = {"source": edge[0], "target": edge[1], "value": edge[2]}
        json_obj['links'].append(desc)

    json_str = json.dumps(json_obj)
    return json_str
예제 #32
0
def index(options = {}):
    concordance = defaultdict(list)
    files = [x for x in os.listdir(os.getcwd() + "/data/") if re.sub("\d+\.json", "", x) == ""]
    if options.get('limit', False):
        files = files[:options.get('limit')]

    for file in files:
        sermon = json.load(open(os.getcwd() + "/data/" + file, 'r'))
        words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower()))
                
        '''
        if options.get("uniques", False):
            words = uniq(re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower()))
        else:
            words = re.findall(r"\b[A-z]+\b", sermon['opening'].replace('\n', ' ').lower())
        '''
        for word in words:
            if len(word) > 2:
                concordance[word].append(file.replace('.json', ''))
                
    write(json.dumps(concordance, sort_keys=True, indent=2), os.getcwd() + "/src/data/index.json")
    write(json.dumps(concordance, sort_keys=True), os.getcwd() + "/src/data/index.min.json")
예제 #33
0
def displayPI(request, pi):
    piprojects = ProjectPIs.objects.filter(investigator=pi)
    projects = sorted(uniq([p.project for p in piprojects if p.project]),
                      key=lambda proj: proj.startDate)
    totalawarded = format_amount(sum([project.amount for project in projects]))

    collaborators = getCollaborators(projects)
    try:
        collaborators.remove(pi)
    except:
        LogWarning("Not a self-collaborator: " + pi.fullDisplay())

    institutions = pi.getInstitutions()

    return render_to_response('pi.html', {
        'pi': pi,
        'menu': generate_menu(request),
        'totalawarded': totalawarded,
        'institutions': institutions,
        'projects': projects,
        'collaborators': collaborators
    },
                              context_instance=RequestContext(request))
예제 #34
0
def displayPI(request, pi):
    piprojects = ProjectPIs.objects.filter(investigator=pi)
    projects = sorted(uniq([p.project for p in piprojects if p.project]), key=lambda proj: proj.startDate)
    totalawarded = format_amount(sum([project.amount for project in projects]))

    collaborators = getCollaborators(projects)
    try:
        collaborators.remove(pi)
    except:
        LogWarning("Not a self-collaborator: " + pi.fullDisplay())

    institutions = pi.getInstitutions()

    return render_to_response(
        'pi.html',
        {'pi': pi, 
         'menu': generate_menu(request),
         'totalawarded': totalawarded,
         'institutions': institutions,
         'projects': projects, 
         'collaborators': collaborators},
        context_instance=RequestContext(request)
    )
예제 #35
0
def bill_ids_for(congress, options, bill_states={}):

    # override if we're actually using this method to get amendments
    doing_amendments = options.get('amendments', False)

    bill_ids = []

    bill_type = options.get(
        'amendment_type' if doing_amendments else 'bill_type', None)
    if bill_type:
        bill_types = [bill_type]
    else:
        bill_types = utils.thomas_types.keys()

    for bill_type in bill_types:

        # This sub is re-used for pulling amendment IDs too.
        if (bill_type in ('samdt', 'hamdt', 'supamdt')) != doing_amendments:
            continue

        # match only links to landing pages of this bill type
        # it shouldn't catch stray links outside of the confines of the 100 on the page,
        # but if it does, no big deal
        link_pattern = "^\s*%s\d+\s*$" % utils.thomas_types[bill_type][1]

        # loop through pages and collect the links on each page until
        # we hit a page with < 100 results, or no results
        offset = 0
        while True:
            # download page, find the matching links
            page = utils.download(page_for(congress, bill_type, offset),
                                  page_cache_for(congress, bill_type, offset),
                                  options)

            if not page:
                logging.error(
                    "Couldn't download page with offset %i, aborting" % offset)
                return None

            # extract matching links
            doc = html.document_fromstring(page)
            links = doc.xpath(
                "//a[re:match(text(), '%s')]" % link_pattern,
                namespaces={"re": "http://exslt.org/regular-expressions"})

            # extract the bill ID from each link
            for link in links:
                code = link.text.lower().replace(".", "").replace(" ", "")
                bill_id = "%s-%s" % (code, congress)

                if options.get("fast", False):
                    fast_cache_path = utils.cache_dir(
                    ) + "/" + bill_info.bill_cache_for(bill_id,
                                                       "search_result.html")
                    old_state = utils.read(fast_cache_path)

                    # Compare all of the output in the search result's <p> tag, which
                    # has last major action, number of cosponsors, etc. to a cache on
                    # disk to see if any major information about the bill changed.
                    parent_node = link.getparent(
                    )  # the <p> tag containing the whole search hit
                    parent_node.remove(
                        parent_node.xpath("b")[0]
                    )  # remove the <b>###.</b> node that isn't relevant for comparison
                    new_state = etree.tostring(
                        parent_node)  # serialize this tag

                    if old_state == new_state:
                        logging.info("No change in search result listing: %s" %
                                     bill_id)
                        continue

                    bill_states[bill_id] = new_state

                bill_ids.append(bill_id)

            if len(links) < 100:
                break

            offset += 100

            # sanity check, while True loops are dangerous
            if offset > 100000:
                break

    return utils.uniq(bill_ids)
예제 #36
0
def print_mutators_list(out, mutator_dict, test_dict, info, platform):
    LibSuffix = platform['filename_conventions']['library_suffix']
    ObjSuffix = platform['filename_conventions']['object_suffix']

    out.write(
        "######################################################################\n"
    )
    out.write("# A list of all the mutators to be compiled\n")
    out.write(
        "######################################################################\n\n"
    )

    module_list = []
    for t in test_dict:
        module_list.append(t['module'])
    module_set = set(module_list)

    for m in module_set:
        out.write("\n")
        out.write("include_directories (\"..src/%s\")\n" % m)
        out.write("set (%s_MUTATORS\n" % (m))
        module_tests = filter(lambda t: m == t['module'], test_dict)
        module_mutators = map(lambda t: t['mutator'], module_tests)
        for t in utils.uniq(module_mutators):
            out.write("\t%s\n" % (t))
        out.write(")\n\n")
        out.write("set (%s_OBJS_ALL_MUTATORS\n" % (m))
        for t in utils.uniq(module_mutators):
            out.write("\t%s%s\n" % (t, ObjSuffix))
        out.write(")\n\n")

# We're doing this cmake list style, so we need multiple iterations
# since cmake doesn't support structs
# Iteration 1: print the list of libraries
    out.write("set (MUTATOR_NAME_LIST\n")
    for m in mutator_dict:
        out.write("\t%s\n" % m['name'])
    out.write("\t)\n\n")

    # Iteration 2: The appropriate module library for each mutator
    out.write("set (MUTATOR_MODULE_LIB_LIST\n")
    for m in mutator_dict:
        # Module info is stored with the "test" dictionary, not the
        # "mutator" dictionary
        tests = filter(lambda t: t['mutator'] == m['name'], test_dict)
        modules = map(lambda t: t['module'], tests)
        if (len(utils.uniq(modules)) != 1):
            print "ERROR: multiple modules for test " + m['name']
            raise
        module = modules.pop()
        out.write("\ttest%s\n" % module)
        # Keep this so we can provide source directories
        m['module'] = module
    out.write("\t)\n\n")

    # Iteration 3: print the list of sources for these libraries. Sources
    # must be singular (so, really, 'source')
    out.write("set (SRC src)\n")
    out.write("set (MUTATOR_SOURCE_LIST\n")
    for m in mutator_dict:
        if (len(m['sources']) != 1):
            print "ERROR: multiple sources for test " + m['name']
            raise
        out.write("\t${SRC}/%s/%s\n" % (m['module'], m['sources'][0]))
    out.write("\t)\n\n")

    # Now, iterate over these lists in parallel with a CMake foreach
    # statement to build the add_library directive
    out.write("foreach (val RANGE %d)\n" % (len(mutator_dict) - 1))
    out.write("\tlist (GET MUTATOR_NAME_LIST ${val} lib)\n")
    out.write("\tlist (GET MUTATOR_SOURCE_LIST ${val} source)\n")
    out.write("\tlist (GET MUTATOR_MODULE_LIB_LIST ${val} comp_dep)\n")
    out.write("\tset(SKIP FALSE)\n")
    out.write("\tforeach (dep ${comp_dep})\n")
    out.write("\t\tif(NOT TARGET ${dep})\n")
    out.write("\t\t\tset(SKIP TRUE)\n")
    out.write("\t\tendif()\n")
    out.write("\tendforeach()\n")
    out.write("\tif(NOT SKIP)\n")
    out.write("\t\tadd_library (${lib} ${source})\n")
    out.write(
        "\t\ttarget_link_libraries (${lib} ${comp_dep} ${LIBTESTSUITE})\n")
    out.write("\t\tinstall (TARGETS ${lib} \n")
    out.write("\t\t         RUNTIME DESTINATION ${INSTALL_DIR}\n")
    out.write("\t\t         LIBRARY DESTINATION ${INSTALL_DIR})\n")
    out.write("\tendif()\n")
    out.write("endforeach()\n\n")
예제 #37
0
def project_pis(projects):
    return sorted(uniq([ppi.investigator for ppi in itertools.chain.from_iterable([project.getPIs() for project in projects])]),key=lambda r: r.lastname + ' ' + r.firstname)
예제 #38
0
def project_pis(projects):
    return sorted(uniq([
        ppi.investigator for ppi in itertools.chain.from_iterable(
            [project.getPIs() for project in projects])
    ]),
                  key=lambda r: r.lastname + ' ' + r.firstname)
예제 #39
0
파일: main.py 프로젝트: davidcl/procmail-py
import time
from glob import iglob
import mailbox

# procmail-py - Email content and spam filtering
# MIT License
# © 2012 Noah K. Tilton <*****@*****.**>

from config import BASE_MAILDIR, MY_DOMAINS, addresses, mark_read
from spam import spamc, blacklisted
from utils import mv, spammy_spamc, mark_as_read, uniq

INBOXDIR            = os.path.join(BASE_MAILDIR, "INBOX")
maildirs_on_disk    = [os.path.basename(dir) for dir in iglob(os.path.join(BASE_MAILDIR, "*"))]
maildirs_in_file    = addresses.values() # <- some of these may not exist
maildirs            = uniq(maildirs_on_disk + maildirs_in_file)
mailboxes           = dict((d, mailbox.Maildir(os.path.join(BASE_MAILDIR, d), create=True)) for d in maildirs)


# N.B.: the order of the following filters matters.  note the return
# statements.  this short-circuiting is desirable, but has to be done
# carefully to avoid double-booking mails.
def filter(args):
    try:
        key, message = args

        # BLACKLISTED WORDS/PHRASES
        if not message.is_multipart():
            # Can't run blacklist logic against multipart messages
            # because random phrases such as "gucci" may show up in
            # base64-encoded strings ... and I'm too lazy to write a
예제 #40
0
def numberOfPrimeFactors(n):
    return len(uniq(primeFactors(n)))
예제 #41
0
파일: main.py 프로젝트: noah/procmail-py
import mailbox

# procmail-py - Email content and spam filtering
# MIT License
# © 2014 Noah K. Tilton <*****@*****.**>

from config import BASE_MAILDIR, MY_DOMAINS, addresses, mark_read
from spam import spamc, blacklisted
from utils import file, spammy_spamc, mark_as_read, uniq

INBOXDIR = os.path.join(BASE_MAILDIR, "INBOX")
maildirs_on_disk = [
    os.path.basename(dir) for dir in iglob(os.path.join(BASE_MAILDIR, "*"))
]
maildirs_in_file = addresses.values()  # <- some of these may not exist
maildirs = uniq(maildirs_on_disk + maildirs_in_file)
mailboxes = dict(
    (d, mailbox.Maildir(os.path.join(BASE_MAILDIR, d), create=True))
    for d in maildirs)


# N.B.: the order of the following filters matters.  note the return
# statements.  this short-circuiting is desirable, but has to be done
# carefully to avoid double-booking mails.
def mfilter(args):
    try:
        key, message = args

        # BLACKLISTED WORDS/PHRASES
        if not message.is_multipart():
            # Can't run blacklist logic against multipart messages
예제 #42
0
파일: bills.py 프로젝트: TTREN/congress
def bill_ids_for(congress, options, bill_states={}):

    # override if we're actually using this method to get amendments
    doing_amendments = options.get('amendments', False)

    bill_ids = []

    bill_type = options.get('amendment_type' if doing_amendments else 'bill_type', None)
    if bill_type:
        bill_types = [bill_type]
    else:
        bill_types = utils.thomas_types.keys()

    for bill_type in bill_types:

        # This sub is re-used for pulling amendment IDs too.
        if (bill_type in ('samdt', 'hamdt', 'supamdt')) != doing_amendments:
            continue

        # match only links to landing pages of this bill type
        # it shouldn't catch stray links outside of the confines of the 100 on the page,
        # but if it does, no big deal
        link_pattern = "^\s*%s\d+\s*$" % utils.thomas_types[bill_type][1]

        # loop through pages and collect the links on each page until
        # we hit a page with < 100 results, or no results
        offset = 0
        while True:
            # download page, find the matching links
            page = utils.download(
                page_for(congress, bill_type, offset),
                page_cache_for(congress, bill_type, offset),
                options)

            if not page:
                logging.error("Couldn't download page with offset %i, aborting" % offset)
                return None

            # extract matching links
            doc = html.document_fromstring(page)
            links = doc.xpath(
                "//a[re:match(text(), '%s')]" % link_pattern,
                namespaces={"re": "http://exslt.org/regular-expressions"})

            # extract the bill ID from each link
            for link in links:
                code = link.text.lower().replace(".", "").replace(" ", "")
                bill_id = "%s-%s" % (code, congress)

                if options.get("fast", False):
                    fast_cache_path = utils.cache_dir() + "/" + bill_info.bill_cache_for(bill_id, "search_result.html")
                    old_state = utils.read(fast_cache_path)

                    # Compare all of the output in the search result's <p> tag, which
                    # has last major action, number of cosponsors, etc. to a cache on
                    # disk to see if any major information about the bill changed.
                    parent_node = link.getparent()  # the <p> tag containing the whole search hit
                    parent_node.remove(parent_node.xpath("b")[0])  # remove the <b>###.</b> node that isn't relevant for comparison
                    new_state = etree.tostring(parent_node)  # serialize this tag

                    if old_state == new_state:
                        logging.info("No change in search result listing: %s" % bill_id)
                        continue

                    bill_states[bill_id] = new_state

                bill_ids.append(bill_id)

            if len(links) < 100:
                break

            offset += 100

            # sanity check, while True loops are dangerous
            if offset > 100000:
                break

    return utils.uniq(bill_ids)
예제 #43
0
def print_mutators_list(out, mutator_dict, test_dict, info, platform):
	LibSuffix = platform['filename_conventions']['library_suffix']
	ObjSuffix = platform['filename_conventions']['object_suffix']


	out.write("######################################################################\n")
	out.write("# A list of all the mutators to be compiled\n")
	out.write("######################################################################\n\n")

	module_list = []
	for t in test_dict:
		module_list.append(t['module'])
	module_set = set(module_list)

	for m in module_set:
		out.write("\n")
                out.write("include_directories (\"..src/%s\")\n" % m)
                out.write("set (%s_MUTATORS " % (m))
		module_tests = filter(lambda t: m == t['module'], test_dict)
		module_mutators = map(lambda t: t['mutator'], module_tests)
		for t in utils.uniq(module_mutators):
			out.write("%s " % (t))
		out.write(")\n\n")
		out.write("set (%s_OBJS_ALL_MUTATORS " % (m))
		for t in utils.uniq(module_mutators):
			out.write("%s%s " % (t, ObjSuffix))
		out.write(")\n\n")


        # We're doing this cmake list style, so we need multiple iterations 
        # since cmake doesn't support structs
        # Iteration 1: print the list of libraries
        out.write("set (MUTATOR_NAME_LIST\n")
	for m in mutator_dict:
           out.write("\t%s\n" % m['name'])
        out.write("\t)\n\n")

        # Iteration 2: The appropriate module library for each mutator
        out.write("set (MUTATOR_MODULE_LIB_LIST\n")
        for m in mutator_dict:
           # Module info is stored with the "test" dictionary, not the 
           # "mutator" dictionary
           tests = filter(lambda t: t['mutator'] == m['name'], test_dict)
           modules = map(lambda t: t['module'], tests)
           if (len(utils.uniq(modules)) != 1):
              print "ERROR: multiple modules for test " + m['name']
              raise
           module = modules.pop()
           out.write("\ttest%s\n" % module)
           # Keep this so we can provide source directories
           m['module'] = module
        out.write("\t)\n\n")

        # Iteration 3: print the list of sources for these libraries. Sources
        # must be singular (so, really, 'source')
        out.write("set (SRC src)\n")
        out.write("set (MUTATOR_SOURCE_LIST\n")
        for m in mutator_dict:
           if (len(m['sources']) != 1):
              print "ERROR: multiple sources for test " + m['name']
              raise
           out.write("\t${SRC}/%s/%s\n" % (m['module'], m['sources'][0]))
        out.write("\t)\n\n")

        # Now, iterate over these lists in parallel with a CMake foreach
        # statement to build the add_library directive
        out.write("foreach (val RANGE %d)\n" % (len(mutator_dict) - 1))
        out.write("\tlist (GET MUTATOR_NAME_LIST ${val} lib)\n")
        out.write("\tlist (GET MUTATOR_SOURCE_LIST ${val} source)\n")
        out.write("\tlist (GET MUTATOR_MODULE_LIB_LIST ${val} comp_dep)\n")
	out.write("\tset(SKIP FALSE)\n")
	out.write("\tforeach (dep ${comp_dep})\n")
	out.write("\t\tif(NOT TARGET ${dep})\n")
	out.write("\t\t\tset(SKIP TRUE)\n")
	out.write("\t\tendif()\n")
	out.write("\tendforeach()\n")
	out.write("\tif(NOT SKIP)\n")
        out.write("\t\tadd_library (${lib} ${source})\n")
        out.write("\t\ttarget_link_libraries (${lib} ${comp_dep} ${LIBTESTSUITE})\n")
	out.write("\t\tinstall (TARGETS ${lib} \n")
	out.write("\t\t         RUNTIME DESTINATION ${INSTALL_DIR}\n")
	out.write("\t\t         LIBRARY DESTINATION ${INSTALL_DIR})\n")
	out.write("\tendif()\n")
        out.write("endforeach()\n\n")
예제 #44
0
#!/usr/bin/python
# ADIC client program
#
# usage is the same as for "adiC" with additional
# required option "--username", e.g.:
# adic_client.py --username=nobody -vd gradient f.c
#
import sys, glob, socket
from ADIC import ADIC_Client
from utils import uniq, get_username, include_files, get_server, string

adic = ADIC_Client()
username, args = get_username(sys.argv[1:])
options,files = adic.check_options(args)
reduce(lambda x, y: x+y,map(glob.glob, files),[])      # expand unix wildcards 
files = uniq(include_files(files,adic.LanguageClass))  # add include files
try:
    host,port = get_server("r_adic")
except socket.error:
    sys.exit(1)
else:
    print adic.submit_request(host,port,username,string.join(options),files)

예제 #45
0
#!/usr/bin/python
# ADIC client program
#
# usage is the same as for "adiC" with additional
# required option "--username", e.g.:
# adic_client.py --username=nobody -vd gradient f.c
#
import sys, glob, socket
from ADIC import ADIC_Client
from utils import uniq, get_username, include_files, get_server, string

adic = ADIC_Client()
username, args = get_username(sys.argv[1:])
options, files = adic.check_options(args)
reduce(lambda x, y: x + y, map(glob.glob, files), [])  # expand unix wildcards
files = uniq(include_files(files, adic.LanguageClass))  # add include files
try:
    host, port = get_server("r_adic")
except socket.error:
    sys.exit(1)
else:
    print adic.submit_request(host, port, username, string.join(options),
                              files)
예제 #46
0
def collect(options = {}):
    #landing page with links to all guest prayers
    page  = fromstring(download('http://chaplain.house.gov/chaplaincy/guest_chaplains.html'))
    links = uniq(page.xpath("//td/a/@href"))
    limit = options.get("limit", False)
    if limit:
        links = links[:limit]
    
    for link in links:
        try:
            uid = link.split('id=')[1]
        except Exception, e:
            print e
            continue
        html = fromstring(download('http://chaplain.house.gov/chaplaincy/' + link, uid + '.html'))
        info = {}
        info['name'] = html.xpath("//h2/text()")[0]     
        
        #get h3 pairings, guess whether a church is listed based on number of hits
        hs = html.xpath("//h3/text()")
        if len(hs) > 1:
            info['church'] = hs[0].strip()
            info['location'] = hs[1].strip()
        else:
            info['location'] = hs[0].strip()

        # get boldface pairings
        for pair in html.xpath('//strong'):
            if pair.tail:
                label, data = pair.text.replace(':', '').strip(), pair.tail.strip()
                info[label.lower().split(" ")[0]] = data
            elif pair.getnext().tag == 'a':
                label, data = pair.text.replace(':', '').strip(), pair.getnext().xpath("text()")[0].strip()
                info[label.lower().split(" ")[0]] = data
    
        # add h4/p pairings
        for pair in html.xpath('//h4'):
            if pair.getnext().tag == 'p':
                label, data = pair.text.replace(':', '').strip(), '\n'.join([x.strip() for x in pair.getnext().xpath("text()")])
                info[label.lower().split(" ")[0]] = data
        if "one" in info:
            info["introduction"] = info["one"]
            info.pop("one")

        #sessions
        info["session"] = int(math.floor((int(info["date"].split("/")[-1]) - 1789) / 2) + 1)
        info['uid'] = uid
        info['member'] = {}
        
        #get bioguide match for sponsor
        if 'sponsor' in info:
            #fix a recurring typo on House Chaplain website            
            info['member'] = {}
            info['sponsor'] = info['sponsor'].replace("Rep. Rep.", "Rep.")
            pieces = re.search("\s(.+?), \(([A-Z])-([A-Z]{2})\)", info['sponsor']).groups()
            info['member']['name'] = pieces[0]
            info['member']['party'] = pieces[1]
            info['member']['state'] = pieces[2]
            member_info = lookup(info['member']['name'], info['session'], info['member']['state'], 'house')
            
            if member_info['status'] == 'Found':
                #use name info from API instead since it's more canonical 
                if not member_info['middle_name']:
                    member_info['middle_name'] = ''
                info['member']['name'] = member_info["first_name"] + " " + member_info['middle_name'] + " " + member_info['last_name']
                info['member']['name'] = info['member']['name'].replace("  ", " ")
                info['member']['state'] = member_info["state"]
                info['member']['bioguide'] = member_info['id']
            else:
                print member_info['status'], info['member']['name']
                print "Unable to find %s (%d) in the NYT API" % (info['sponsor'], info['session'])                  
                info['member']['bioguide'] = None
            info.pop("sponsor")
        write(json.dumps(info, indent=2), os.getcwd() + "/data/" +  uid + ".json")