def handle_data(self, data): # print('tag '+data) #DEBUG if self.is_header > 0: self.is_header += 1 # print('Pippo '+str(self.is_header)) # DEBUG data = data.strip() # needed to manage multiple teacher lines is_append = False if self.immolation: # count the 4 lines of data for the lesson (subject, teacher, rooms, address) if self.dataLine <= 3: # pass #DEBUG if data == '/': self.dataLine -= 1 is_append = True if self.dataLine == 0: self.lesson.subject = data.upper() print(self.lesson.subject) #DEBUG if self.dataLine == 1: self.lesson.teacher = self.lesson.teacher + data.upper() if is_append == True: self.dataLine -= 1 is_append = False print(self.lesson.teacher) #DEBUG if self.dataLine == 2: self.lesson.rooms = data.upper() print(self.lesson.rooms) #DEBUG if self.dataLine == 3: self.lesson.address = data.upper() print(self.lesson.address) #DEBUG # print(data.upper()) # DEBUG self.dataLine += 1 # if is the last line if self.dataLine == 4: print("--") # DEBUG self.lesson.semesterStartDate = self.semesterStartDate self.lesson.semesterEndDate = self.semesterEndDate self.dataLine = 0 self.lesson.persist() # reset the entity self.lesson = Lesson() if self.is_header == 9: # find the correct line # print(data.upper()) #DEBUG # extract the data information and foreach date create the datetime object line = data.split(sep="-") self.semesterStartDate = datetime.datetime.strptime( line[0], ": %A %d %B %Y ") self.semesterEndDate = datetime.datetime.strptime( line[1], " %A %d %B %Y") # print(self.semesterStartDate) #DEBUG # print(self.semesterEndDate) #DEBUG self.is_header = 0
def __init__(self): # Override here HTMLParser.__init__(self) # todo: add documentation self.immolation = False # is the line counter for parsing the header data self.is_header = 0 # is the time of the firts hour of lessons. Mandatory to calculate the start hour from offsets self.firstHour = datetime.datetime.strptime("08:30", "%H:%M") # the lesson database entity self.lesson = Lesson() self.dataLine = 0 self.semesterStartDate = datetime.datetime self.semesterEndDate = datetime.datetime
def handle_data(self, data): # print('tag '+data) #DEBUG if self.is_header > 0: self.is_header += 1 # print('Pippo '+str(self.is_header)) # DEBUG data = data.strip() # needed to manage multiple teacher lines is_append = False if self.immolation: # count the 4 lines of data for the lesson (subject, teacher, rooms, address) if self.dataLine <= 3: # pass #DEBUG if data == '/': self.dataLine -= 1 is_append = True if self.dataLine == 0: self.lesson.subject = data.upper() print(self.lesson.subject) #DEBUG if self.dataLine == 1: self.lesson.teacher = self.lesson.teacher+data.upper() if is_append == True: self.dataLine -=1 is_append = False print(self.lesson.teacher) #DEBUG if self.dataLine == 2: self.lesson.rooms = data.upper() print(self.lesson.rooms) #DEBUG if self.dataLine == 3: self.lesson.address = data.upper() print(self.lesson.address) #DEBUG # print(data.upper()) # DEBUG self.dataLine += 1 # if is the last line if self.dataLine == 4: print("--") # DEBUG self.lesson.semesterStartDate = self.semesterStartDate self.lesson.semesterEndDate = self.semesterEndDate self.dataLine = 0 self.lesson.persist() # reset the entity self.lesson = Lesson() if self.is_header == 9: # find the correct line # print(data.upper()) #DEBUG # extract the data information and foreach date create the datetime object line = data.split(sep="-") self.semesterStartDate = datetime.datetime.strptime(line[0], ": %A %d %B %Y ") self.semesterEndDate = datetime.datetime.strptime(line[1], " %A %d %B %Y") # print(self.semesterStartDate) #DEBUG # print(self.semesterEndDate) #DEBUG self.is_header = 0
class MLStripper(HTMLParser): def __init__(self): # Override here HTMLParser.__init__(self) # todo: add documentation self.immolation = False # is the line counter for parsing the header data self.is_header = 0 # is the time of the firts hour of lessons. Mandatory to calculate the start hour from offsets self.firstHour = datetime.datetime.strptime("08:30", "%H:%M") # the lesson database entity self.lesson = Lesson() self.dataLine = 0 self.semesterStartDate = datetime.datetime self.semesterEndDate = datetime.datetime def handle_starttag(self, tag, attrs): if tag == 'table': for attr in attrs: if attr[0] == 'class' and attr[1] == 'cellTabs': break if attr[0] == 'id': self.immolation = True # Hours coded from 0 to 9 as the nine hours avalaible in a standard lesson day from 08:30 to 09:30 # Day coded form 0 to 6 where 0 is monday and 6 is sunday. coords = attr[1].split(sep='_') # Format as follow: DAY_HOUR self.lesson.day = coords[1] # calculate the correct start hour for the lesson dt = self.firstHour + datetime.timedelta(hours=int(coords[2])) self.lesson.hour = dt.time() # print(self.lesson.hour) #DEBUG # print(coords) #DEBUG elif tag == 'td': for attr in attrs: # find the header with the information regarding the semester if attr[1] == 'ttTitleTD': # count the lines of the header self.is_header += 1 # print('Pippo '+str(self.is_header)) #DEBUG def handle_endtag(self, tag): if tag == 'table': # or tag == 'br' #DEBUG self.immolation = False def handle_data(self, data): # print('tag '+data) #DEBUG if self.is_header > 0: self.is_header += 1 # print('Pippo '+str(self.is_header)) # DEBUG data = data.strip() # needed to manage multiple teacher lines is_append = False if self.immolation: # count the 4 lines of data for the lesson (subject, teacher, rooms, address) if self.dataLine <= 3: # pass #DEBUG if data == '/': self.dataLine -= 1 is_append = True if self.dataLine == 0: self.lesson.subject = data.upper() print(self.lesson.subject) #DEBUG if self.dataLine == 1: self.lesson.teacher = self.lesson.teacher+data.upper() if is_append == True: self.dataLine -=1 is_append = False print(self.lesson.teacher) #DEBUG if self.dataLine == 2: self.lesson.rooms = data.upper() print(self.lesson.rooms) #DEBUG if self.dataLine == 3: self.lesson.address = data.upper() print(self.lesson.address) #DEBUG # print(data.upper()) # DEBUG self.dataLine += 1 # if is the last line if self.dataLine == 4: print("--") # DEBUG self.lesson.semesterStartDate = self.semesterStartDate self.lesson.semesterEndDate = self.semesterEndDate self.dataLine = 0 self.lesson.persist() # reset the entity self.lesson = Lesson() if self.is_header == 9: # find the correct line # print(data.upper()) #DEBUG # extract the data information and foreach date create the datetime object line = data.split(sep="-") self.semesterStartDate = datetime.datetime.strptime(line[0], ": %A %d %B %Y ") self.semesterEndDate = datetime.datetime.strptime(line[1], " %A %d %B %Y") # print(self.semesterStartDate) #DEBUG # print(self.semesterEndDate) #DEBUG self.is_header = 0
class MLStripper(HTMLParser): def __init__(self): # Override here HTMLParser.__init__(self) # todo: add documentation self.immolation = False # is the line counter for parsing the header data self.is_header = 0 # is the time of the firts hour of lessons. Mandatory to calculate the start hour from offsets self.firstHour = datetime.datetime.strptime("08:30", "%H:%M") # the lesson database entity self.lesson = Lesson() self.dataLine = 0 self.semesterStartDate = datetime.datetime self.semesterEndDate = datetime.datetime def handle_starttag(self, tag, attrs): if tag == 'table': for attr in attrs: if attr[0] == 'class' and attr[1] == 'cellTabs': break if attr[0] == 'id': self.immolation = True # Hours coded from 0 to 9 as the nine hours avalaible in a standard lesson day from 08:30 to 09:30 # Day coded form 0 to 6 where 0 is monday and 6 is sunday. coords = attr[1].split( sep='_') # Format as follow: DAY_HOUR self.lesson.day = coords[1] # calculate the correct start hour for the lesson dt = self.firstHour + datetime.timedelta( hours=int(coords[2])) self.lesson.hour = dt.time() # print(self.lesson.hour) #DEBUG # print(coords) #DEBUG elif tag == 'td': for attr in attrs: # find the header with the information regarding the semester if attr[1] == 'ttTitleTD': # count the lines of the header self.is_header += 1 # print('Pippo '+str(self.is_header)) #DEBUG def handle_endtag(self, tag): if tag == 'table': # or tag == 'br' #DEBUG self.immolation = False def handle_data(self, data): # print('tag '+data) #DEBUG if self.is_header > 0: self.is_header += 1 # print('Pippo '+str(self.is_header)) # DEBUG data = data.strip() # needed to manage multiple teacher lines is_append = False if self.immolation: # count the 4 lines of data for the lesson (subject, teacher, rooms, address) if self.dataLine <= 3: # pass #DEBUG if data == '/': self.dataLine -= 1 is_append = True if self.dataLine == 0: self.lesson.subject = data.upper() print(self.lesson.subject) #DEBUG if self.dataLine == 1: self.lesson.teacher = self.lesson.teacher + data.upper() if is_append == True: self.dataLine -= 1 is_append = False print(self.lesson.teacher) #DEBUG if self.dataLine == 2: self.lesson.rooms = data.upper() print(self.lesson.rooms) #DEBUG if self.dataLine == 3: self.lesson.address = data.upper() print(self.lesson.address) #DEBUG # print(data.upper()) # DEBUG self.dataLine += 1 # if is the last line if self.dataLine == 4: print("--") # DEBUG self.lesson.semesterStartDate = self.semesterStartDate self.lesson.semesterEndDate = self.semesterEndDate self.dataLine = 0 self.lesson.persist() # reset the entity self.lesson = Lesson() if self.is_header == 9: # find the correct line # print(data.upper()) #DEBUG # extract the data information and foreach date create the datetime object line = data.split(sep="-") self.semesterStartDate = datetime.datetime.strptime( line[0], ": %A %d %B %Y ") self.semesterEndDate = datetime.datetime.strptime( line[1], " %A %d %B %Y") # print(self.semesterStartDate) #DEBUG # print(self.semesterEndDate) #DEBUG self.is_header = 0