Exemplo n.º 1
0
 def get_teachers(self):
     '''
     Retrieves the teachers from teilar.gr
     The output is dictionary with the following structure:
     teachers_from_teilar = {'url': ['name', 'email', 'department']}
     '''
     teachers_from_teilar = {}
     for pid in range(400):
         if pid == 386:
             '''
             Dirty workaround to avoid a teacher who has no matching department,
             probably because they are testing something
             '''
             continue
         '''
         Perform connections to each of the teacher's profile page. From the HTML
         output we grab the name, email and department
         '''
         url = 'http://www.teilar.gr/person.php?pid=%s' % pid
         output = teilar_anon_login(url)
         soup = BeautifulSoup(output)
         name = None
         email = None
         department = None
         try:
             name = soup.findAll('td', 'BlackText11Bold')[1].contents[0].strip()
         except IndexError:
             '''
             No teacher found, continue with the next item of the loop
             '''
             continue
         try:
             email = soup.findAll('td', 'BlackText11')[5].a.contents[0].split(' ')[0].strip()
         except AttributeError:
             try:
                 email = soup.findAll('td', 'BlackText11')[5].contents[0].strip()
             except IndexError:
                 pass
         except IndexError:
             pass
         try:
             '''
             The string replace in the end is to keep it in track with dionysos.teilar.gr
             '''
             department = soup.findAll('td', 'BlackText11')[2].contents[0].strip().replace(u'Τεχν.', u'Τεχνολογίας')
         except IndexError:
             pass
         teachers_from_teilar[url] = [name, email, department]
     return teachers_from_teilar
 def get_faculties(self):
     '''
     Retrieves the faculties from eclass.teilar.gr
     The output is dictionary with the following structure:
     faculties_from_eclass = {'url': ['name', 'code']}
     '''
     faculties_from_eclass = {}
     output = teilar_anon_login('http://openclass.teilar.gr/modules/auth/listfaculte.php')
     soup = BeautifulSoup(output)
     all_faculties = soup.table('td')
     for faculty in all_faculties:
         url = 'http://openclass.teilar.gr/modules/auth/' + faculty.a.get('href')
         name = faculty.a.contents[0].strip()
         code = faculty.small.contents[0].split(')')[0].replace('(', '').strip()
         faculties_from_eclass[url] = [name, code]
     return faculties_from_eclass
Exemplo n.º 3
0
 def get_departments(self):
     '''
     Retrieves the departments from teilar.gr
     The output is dictionary with the following structure:
     departments_from_teilar = {'url': 'name'}
     '''
     departments_from_teilar = {}
     output = teilar_anon_login('http://www.teilar.gr/schools.php')
     soup = BeautifulSoup(output)
     all_departments = soup.find_all('a', 'BlueText')
     for department in all_departments:
         url = 'http://www.teilar.gr/' + department.get('href')
         '''
         The string replace in the end is to keep it in track with dionysos.teilar.gr
         '''
         name = department.contents[0].replace(u'Τεχν.', u'Τεχνολογίας')
         departments_from_teilar[url] = name
     return departments_from_teilar
 def get_lessons(self, faculties_from_db_q):
     '''
     Retrieves the lessons from eclass.teilar.gr
     The output is dictionary with the following structure:
     lessons_from_eclass = {'url': ['name', 'teacher', 'faculty', 'ltype'] }
     '''
     lessons_from_eclass = {}
     for faculty in faculties_from_db_q:
         output = teilar_anon_login(faculty.url)
         soup = BeautifulSoup(output)
         for i in range(2):
             '''
             EclassLessons are grouped in three types:
             Undergraduate, Graduate, Other
             '''
             ltype = BeautifulSoup(str(soup.find_all('table', id='t%s' % i)))
             if not ltype:
                 '''
                 If the lesson type does not exist, then move on
                 '''
                 continue
             all_lessons = ltype.find_all('tr', 'even') + ltype.find_all('tr', 'odd')
             for lesson in all_lessons:
                 url = lesson.small.contents[0].replace('(', '').replace(')', '')
                 url = u'http://openclass.teilar.gr/courses/%s/' % url
                 try:
                     name = lesson.a.contents[0].strip()
                 except AttributeError:
                     name = lesson.find_all('td')[1].contents[0].strip()
                 try:
                     teacher = lesson.find_all('td')[2].contents[0].strip()
                 except IndexError:
                     teacher = None
                 if i == 0:
                     ltype = u'Προπτυχιακό'
                 elif i == 1:
                     ltype = u'Μεταπτυχιακό'
                 elif i == 2:
                     ltype == u'Άλλο'
                 lessons_from_eclass[url] = [unicode(name), unicode(teacher), faculty.name, ltype]
     return lessons_from_eclass
Exemplo n.º 5
0
def library(request):
    '''
    Perform search in library.teilar.gr and print the results
    '''
    msg = None
    results = []
    if request.method == 'GET':
        form = LibraryForm(request.GET)
        if form.is_valid():
            url = 'http://hermes.lib.teilar.gr/ipac20/ipac.jsp?session=A26772NR74250.24315&menu=search&aspect=subtab22&npp=40&ipp=20&spp=20&profile=multbl--1&ri=&term=%s&index=.GEN&x=0&y=0&aspect=subtab22' % str(request.GET.get('search'))
            output = teilar_anon_login(url)
            soup = BeautifulSoup(output).find_all('table')[24]
            temp_a_mediumboldanchor = soup.find_all('a', 'mediumBoldAnchor')
            temp_td = soup.find_all('td')
            i = 5
            for item in temp_a_mediumboldanchor:
                title = item.contents[0]
                '''
                The authors are in <i> tags. Take the list of them by
                taking a list of the contents of <i> tags, and then
                join the list with commas for prettier output
                '''
                authors = []
                tmp_authors = temp_td[i].find_all('i')
                for author in tmp_authors:
                    authors.append(author.contents[0].replace(',', '').strip())
                authors = ', '.join(authors)
                editor = temp_td[i+1].contents[0].contents[0].split(' : ')[1]
                city = temp_td[i+1].contents[0].contents[0].split(' : ')[0]
                i += 10
                results.append([title, authors, editor, city])
            if not results:
                msg = 'Δεν υπάρχουν αποτελέσματα'
    else:
        form = SearchForm()
    return render_to_response('library.html', {
            'form': form,
            'msg': msg,
            'results': results,
        }, context_instance = RequestContext(request))
Exemplo n.º 6
0
 def get_teilar(self):
     '''
     Grab announcements from the following websites and
     put them in separate custom RSS files:
     - http://teilar.gr/news.php?cid=1
     - http://teilar.gr/news.php?cid=2
     - http://teilar.gr/news.php?cid=5
     - http://teilar.gr/news.php?cid=6
     - http://teilar.gr/tmimatanews.php
     '''
     rss_filenames = {
         1: 'general.rss',
         2: 'teilar_ann.rss',
         5: 'council.rss',
         6: 'committee.rss',
         'tmimatanews.php': 'departments.rss',
     }
     for cid, rss_name in rss_filenames.iteritems():
         custom_rss = self.initialize_rss_file()
         if type(cid) == int:
             output = teilar_anon_login('http://www.teilar.gr/news.php?cid=%s' % cid)
         else:
             output = teilar_anon_login('http://www.teilar.gr/%s' % cid)
         soup = BeautifulSoup(output)
         try:
             announcements_all = soup.find_all('table')[17].find_all('a', 'BlackText11')[:10]
         except Exception as error:
             logger_syslog.error(error, extra = log_extra_data())
             logger_mail.exception(error)
         for item in announcements_all:
             '''
             Get inside the announcement to get the rest of the info
             '''
             ann_url = 'news_detail.php?nid=' + item['href'].split('nid=')[1]
             if type(cid) != int:
                 ann_url = 'tmimata/' + ann_url
             output = teilar_anon_login('http://www.teilar.gr/%s' % ann_url)
             soup = BeautifulSoup(output)
             try:
                 if type(cid) != int:
                     creator = soup.find('span', 'OraTextBold').contents[0].split(' >')[0].replace(u'Τεχν.', u'Τεχνολογίας')
                 else:
                     creator = None
                 temp_td_oratext = soup.find_all('td', 'OraText')
                 pubdate = temp_td_oratext[0].contents[0].split('/')
                 pubdate = date(int(pubdate[2]), int(pubdate[1]), int(pubdate[0]))
                 title = temp_td_oratext[1].contents[0]
                 description = soup.find('td', 'BlackText11').contents[0]
                 try:
                     enclosure_link = soup.find('a', 'BlackText11Bold')['href']
                     mimetypes.init()
                     enclosure_mimetype = mimetypes.type_map[enclosure_link.split('.')[-1]]
                     enclosure = feedgenerator.Enclosure(enclosure_link, 'Unknown', enclosure_mimetype)
                 except:
                     enclosure = None
             except Exception as error:
                 logger_syslog.error(error, extra = log_extra_data('http://teilar.gr' + ann_url))
                 logger_mail.exception(error)
             self.add_rss_item(custom_rss, title, 'http://teilar.gr/' + ann_url, pubdate, description, creator, enclosure)
         self.write_rss_file(custom_rss, rss_name)
     return
Exemplo n.º 7
0
 def get_teachers(self):
     '''
     Grab announcements from all the teachers, and put them in
     a custom RSS file.
     '''
     custom_rss = self.initialize_rss_file()
     output = teilar_anon_login('http://www.teilar.gr/profannnews.php')
     soup = BeautifulSoup(output)
     try:
         announcements_all = soup.find_all('a', 'BlackText11')
     except Exception as error:
         logger_syslog.error(error, extra = log_extra_data())
         logger_mail.exception(error)
     authors = {}
     for item in announcements_all:
         '''
         The teacher's announcements are all under one page instead
         of being each one in separate page. We count in the combined
         page how many times a teacher's name is mentioned, and we
         parse the same number of the teacher's top announcements.
         The results are kept in a dictionary with the following structure:
         authors = {'url': number_of_announcements}
         '''
         url = item['href']
         if url in authors.keys():
             authors[url] = authors[url] + 1
         else:
             authors[url] = 1
     for url, number in authors.iteritems():
         '''
         Get inside the teacher's page which contains all the announcements
         '''
         output = teilar_anon_login('http://www.teilar.gr/%s' % url)
         soup = BeautifulSoup(output)
         try:
             author_name = soup.find('td', 'BlueTextBold').i.contents[0]
         except Exception as error:
             logger_syslog.error(error, extra = log_extra_data(url))
             logger_mail.exception(error)
         '''
         Select only the number of announcements we want
         '''
         try:
             announcements_all = soup.find_all('td', 'LineDownDots')[0:number]
         except Exception as error:
             logger_syslog.error(error, extra = log_extra_data(url))
             logger_mail.exception(error)
         for announcement in announcements_all:
             '''
             Parse data from each announcement
             '''
             try:
                 temp_td_blacktext11 = announcement.find_all('td', 'BlackText11')
                 title = temp_td_blacktext11[0].b.contents[0]
                 pubdate = announcement.find('td', 'OraText').contents[0].split('/')
                 pubdate = date(int(pubdate[2]), int(pubdate[1]), int(pubdate[0]))
                 description = temp_td_blacktext11[1]
                 try:
                     enclosure_link = announcement.find('a', 'OraText')['href']
                     mimetypes = init()
                     enclosure_mimetype = mimetypes.type_map[enclosure_link.split('.')[-1]]
                     enclosure = feedgenerator.Enclosure(enclosure_link, 'Unknown', enclosure_mimetype)
                 except Exception as error:
                     enclosure = None
             except Exception as error:
                 logger_syslog.error(error, extra = log_extra_data(author_name))
                 logger_mail.exception(error)
             self.add_rss_item(custom_rss, title, 'http://teilar.gr/' + url, pubdate, description, author_name, enclosure)
     self.write_rss_file(custom_rss, 'teachers.rss')
     return