Пример #1
0
 def handle_data(self, data):
     # print('tag '+data) #DEBUG
     if self.is_header > 0:
         self.is_header += 1
         # print('Pippo '+str(self.is_header)) # DEBUG
     data = data.strip()
     # needed to manage multiple teacher lines
     is_append = False
     if self.immolation:
         # count the 4 lines of data for the lesson (subject, teacher, rooms, address)
         if self.dataLine <= 3:
             # pass #DEBUG
             if data == '/':
                 self.dataLine -= 1
                 is_append = True
             if self.dataLine == 0:
                 self.lesson.subject = data.upper()
                 print(self.lesson.subject)  #DEBUG
             if self.dataLine == 1:
                 self.lesson.teacher = self.lesson.teacher + data.upper()
                 if is_append == True:
                     self.dataLine -= 1
                     is_append = False
                 print(self.lesson.teacher)  #DEBUG
             if self.dataLine == 2:
                 self.lesson.rooms = data.upper()
                 print(self.lesson.rooms)  #DEBUG
             if self.dataLine == 3:
                 self.lesson.address = data.upper()
                 print(self.lesson.address)  #DEBUG
             # print(data.upper())  # DEBUG
             self.dataLine += 1
         # if is the last line
         if self.dataLine == 4:
             print("--")  # DEBUG
             self.lesson.semesterStartDate = self.semesterStartDate
             self.lesson.semesterEndDate = self.semesterEndDate
             self.dataLine = 0
             self.lesson.persist()
             #         reset the entity
             self.lesson = Lesson()
     if self.is_header == 9:  # find the correct line
         # print(data.upper()) #DEBUG
         # extract the data information and foreach date create the datetime object
         line = data.split(sep="-")
         self.semesterStartDate = datetime.datetime.strptime(
             line[0], ": %A %d %B %Y ")
         self.semesterEndDate = datetime.datetime.strptime(
             line[1], " %A %d %B %Y")
         # print(self.semesterStartDate) #DEBUG
         # print(self.semesterEndDate) #DEBUG
         self.is_header = 0
Пример #2
0
 def __init__(self):  # Override here
     HTMLParser.__init__(self)
     # todo: add documentation
     self.immolation = False
     # is the line counter for parsing the header data
     self.is_header = 0
     # is the time of the firts hour of lessons. Mandatory to calculate the start hour from offsets
     self.firstHour = datetime.datetime.strptime("08:30", "%H:%M")
     # the lesson database entity
     self.lesson = Lesson()
     self.dataLine = 0
     self.semesterStartDate = datetime.datetime
     self.semesterEndDate = datetime.datetime
Пример #3
0
 def __init__(self):  # Override here
     HTMLParser.__init__(self)
     # todo: add documentation
     self.immolation = False
     # is the line counter for parsing the header data
     self.is_header = 0
     # is the time of the firts hour of lessons. Mandatory to calculate the start hour from offsets
     self.firstHour = datetime.datetime.strptime("08:30", "%H:%M")
     # the lesson database entity
     self.lesson = Lesson()
     self.dataLine = 0
     self.semesterStartDate = datetime.datetime
     self.semesterEndDate = datetime.datetime
Пример #4
0
 def handle_data(self, data):
     # print('tag '+data) #DEBUG
     if self.is_header > 0:
         self.is_header += 1
         # print('Pippo '+str(self.is_header)) # DEBUG
     data = data.strip()
     # needed to manage multiple teacher lines
     is_append = False
     if self.immolation:
         # count the 4 lines of data for the lesson (subject, teacher, rooms, address)
         if self.dataLine <= 3:
             # pass #DEBUG
             if data == '/':
                 self.dataLine -= 1
                 is_append = True
             if self.dataLine == 0:
                 self.lesson.subject = data.upper()
                 print(self.lesson.subject) #DEBUG
             if self.dataLine == 1:
                 self.lesson.teacher = self.lesson.teacher+data.upper()
                 if is_append == True:
                     self.dataLine -=1
                     is_append = False
                 print(self.lesson.teacher) #DEBUG
             if self.dataLine == 2:
                 self.lesson.rooms = data.upper()
                 print(self.lesson.rooms) #DEBUG
             if self.dataLine == 3:
                 self.lesson.address = data.upper()
                 print(self.lesson.address) #DEBUG
             # print(data.upper())  # DEBUG
             self.dataLine += 1
         # if is the last line
         if self.dataLine == 4:
             print("--")  # DEBUG
             self.lesson.semesterStartDate = self.semesterStartDate
             self.lesson.semesterEndDate = self.semesterEndDate
             self.dataLine = 0
             self.lesson.persist()
     #         reset the entity
             self.lesson = Lesson()
     if self.is_header == 9:  # find the correct line
         # print(data.upper()) #DEBUG
         # extract the data information and foreach date create the datetime object
         line = data.split(sep="-")
         self.semesterStartDate = datetime.datetime.strptime(line[0], ": %A %d %B %Y ")
         self.semesterEndDate = datetime.datetime.strptime(line[1], " %A %d %B %Y")
         # print(self.semesterStartDate) #DEBUG
         # print(self.semesterEndDate) #DEBUG
         self.is_header = 0
Пример #5
0
class MLStripper(HTMLParser):
    def __init__(self):  # Override here
        HTMLParser.__init__(self)
        # todo: add documentation
        self.immolation = False
        # is the line counter for parsing the header data
        self.is_header = 0
        # is the time of the firts hour of lessons. Mandatory to calculate the start hour from offsets
        self.firstHour = datetime.datetime.strptime("08:30", "%H:%M")
        # the lesson database entity
        self.lesson = Lesson()
        self.dataLine = 0
        self.semesterStartDate = datetime.datetime
        self.semesterEndDate = datetime.datetime

    def handle_starttag(self, tag, attrs):
        if tag == 'table':
            for attr in attrs:
                if attr[0] == 'class' and attr[1] == 'cellTabs':
                    break
                if attr[0] == 'id':
                    self.immolation = True
                    # Hours coded from 0 to 9 as the nine hours avalaible in a standard lesson day from 08:30 to 09:30
                    # Day coded form 0 to 6 where 0 is monday and 6 is sunday.
                    coords = attr[1].split(sep='_')  # Format as follow: DAY_HOUR
                    self.lesson.day = coords[1]
                    # calculate the correct start hour for the lesson
                    dt = self.firstHour + datetime.timedelta(hours=int(coords[2]))
                    self.lesson.hour = dt.time()
                    # print(self.lesson.hour) #DEBUG
                    # print(coords) #DEBUG
        elif tag == 'td':
            for attr in attrs:
                # find the header with the information regarding the semester
                if attr[1] == 'ttTitleTD':
                    # count the lines of the header
                    self.is_header += 1
                    # print('Pippo '+str(self.is_header)) #DEBUG

    def handle_endtag(self, tag):
        if tag == 'table':  # or tag == 'br' #DEBUG
            self.immolation = False

    def handle_data(self, data):
        # print('tag '+data) #DEBUG
        if self.is_header > 0:
            self.is_header += 1
            # print('Pippo '+str(self.is_header)) # DEBUG
        data = data.strip()
        # needed to manage multiple teacher lines
        is_append = False
        if self.immolation:
            # count the 4 lines of data for the lesson (subject, teacher, rooms, address)
            if self.dataLine <= 3:
                # pass #DEBUG
                if data == '/':
                    self.dataLine -= 1
                    is_append = True
                if self.dataLine == 0:
                    self.lesson.subject = data.upper()
                    print(self.lesson.subject) #DEBUG
                if self.dataLine == 1:
                    self.lesson.teacher = self.lesson.teacher+data.upper()
                    if is_append == True:
                        self.dataLine -=1
                        is_append = False
                    print(self.lesson.teacher) #DEBUG
                if self.dataLine == 2:
                    self.lesson.rooms = data.upper()
                    print(self.lesson.rooms) #DEBUG
                if self.dataLine == 3:
                    self.lesson.address = data.upper()
                    print(self.lesson.address) #DEBUG
                # print(data.upper())  # DEBUG
                self.dataLine += 1
            # if is the last line
            if self.dataLine == 4:
                print("--")  # DEBUG
                self.lesson.semesterStartDate = self.semesterStartDate
                self.lesson.semesterEndDate = self.semesterEndDate
                self.dataLine = 0
                self.lesson.persist()
        #         reset the entity
                self.lesson = Lesson()
        if self.is_header == 9:  # find the correct line
            # print(data.upper()) #DEBUG
            # extract the data information and foreach date create the datetime object
            line = data.split(sep="-")
            self.semesterStartDate = datetime.datetime.strptime(line[0], ": %A %d %B %Y ")
            self.semesterEndDate = datetime.datetime.strptime(line[1], " %A %d %B %Y")
            # print(self.semesterStartDate) #DEBUG
            # print(self.semesterEndDate) #DEBUG
            self.is_header = 0
Пример #6
0
class MLStripper(HTMLParser):
    def __init__(self):  # Override here
        HTMLParser.__init__(self)
        # todo: add documentation
        self.immolation = False
        # is the line counter for parsing the header data
        self.is_header = 0
        # is the time of the firts hour of lessons. Mandatory to calculate the start hour from offsets
        self.firstHour = datetime.datetime.strptime("08:30", "%H:%M")
        # the lesson database entity
        self.lesson = Lesson()
        self.dataLine = 0
        self.semesterStartDate = datetime.datetime
        self.semesterEndDate = datetime.datetime

    def handle_starttag(self, tag, attrs):
        if tag == 'table':
            for attr in attrs:
                if attr[0] == 'class' and attr[1] == 'cellTabs':
                    break
                if attr[0] == 'id':
                    self.immolation = True
                    # Hours coded from 0 to 9 as the nine hours avalaible in a standard lesson day from 08:30 to 09:30
                    # Day coded form 0 to 6 where 0 is monday and 6 is sunday.
                    coords = attr[1].split(
                        sep='_')  # Format as follow: DAY_HOUR
                    self.lesson.day = coords[1]
                    # calculate the correct start hour for the lesson
                    dt = self.firstHour + datetime.timedelta(
                        hours=int(coords[2]))
                    self.lesson.hour = dt.time()
                    # print(self.lesson.hour) #DEBUG
                    # print(coords) #DEBUG
        elif tag == 'td':
            for attr in attrs:
                # find the header with the information regarding the semester
                if attr[1] == 'ttTitleTD':
                    # count the lines of the header
                    self.is_header += 1
                    # print('Pippo '+str(self.is_header)) #DEBUG

    def handle_endtag(self, tag):
        if tag == 'table':  # or tag == 'br' #DEBUG
            self.immolation = False

    def handle_data(self, data):
        # print('tag '+data) #DEBUG
        if self.is_header > 0:
            self.is_header += 1
            # print('Pippo '+str(self.is_header)) # DEBUG
        data = data.strip()
        # needed to manage multiple teacher lines
        is_append = False
        if self.immolation:
            # count the 4 lines of data for the lesson (subject, teacher, rooms, address)
            if self.dataLine <= 3:
                # pass #DEBUG
                if data == '/':
                    self.dataLine -= 1
                    is_append = True
                if self.dataLine == 0:
                    self.lesson.subject = data.upper()
                    print(self.lesson.subject)  #DEBUG
                if self.dataLine == 1:
                    self.lesson.teacher = self.lesson.teacher + data.upper()
                    if is_append == True:
                        self.dataLine -= 1
                        is_append = False
                    print(self.lesson.teacher)  #DEBUG
                if self.dataLine == 2:
                    self.lesson.rooms = data.upper()
                    print(self.lesson.rooms)  #DEBUG
                if self.dataLine == 3:
                    self.lesson.address = data.upper()
                    print(self.lesson.address)  #DEBUG
                # print(data.upper())  # DEBUG
                self.dataLine += 1
            # if is the last line
            if self.dataLine == 4:
                print("--")  # DEBUG
                self.lesson.semesterStartDate = self.semesterStartDate
                self.lesson.semesterEndDate = self.semesterEndDate
                self.dataLine = 0
                self.lesson.persist()
                #         reset the entity
                self.lesson = Lesson()
        if self.is_header == 9:  # find the correct line
            # print(data.upper()) #DEBUG
            # extract the data information and foreach date create the datetime object
            line = data.split(sep="-")
            self.semesterStartDate = datetime.datetime.strptime(
                line[0], ": %A %d %B %Y ")
            self.semesterEndDate = datetime.datetime.strptime(
                line[1], " %A %d %B %Y")
            # print(self.semesterStartDate) #DEBUG
            # print(self.semesterEndDate) #DEBUG
            self.is_header = 0