def badge(request, **kwargs): # NOTE: a downloaded image is part the current page, meaning that the coding keys # should NOT be rotated; this is achieved by passing "NOT" as test code. context = generic_context(request, 'NOT') try: # check whether user can have student role if not (has_role(context, 'Student') or has_role(context, 'Instructor')): raise Exception('No access') # if a sample badge is asked for, render it in the requested color bc = kwargs.get('bc', '') if bc: # render a participant badge (image only, not certified) return render_sample_badge(int(bc)) # otherwise, a hex-encoded badge ID is needed h = kwargs.get('hex', '') # verify that hex code is valid # NOTE: since keys have not been rotated, use the ENcoder here! bid = decode(h, context['user_session'].encoder) b = PrestoBadge.objects.get(pk=bid) # for speed, the reward gallery requests tiny images (80x80 px) if kwargs.get('tiny', ''): return render_tiny_badge_image(b) # if otherwise render the certified badge image return render_certified_badge(b) except Exception, e: log_message('ERROR while rendering badge: %s' % str(e), context['user']) with open(os.path.join(settings.IMAGE_DIR, 'not-found.png'), "rb") as f: return HttpResponse(f.read(), content_type="image/png")
def render_certified_badge(badge): # capture and log any error that may occur try: # get the badge image if badge.participant: img = participant_badge_image(badge.attained_level, badge.course.badge_color) elif badge.referee: img = referee_badge_image(badge.attained_level, badge.course.badge_color) else: raise ValueError('Undefined badge owner') # spray about 50% of the image with noise pix = img.load() for y in range(0, BADGE_HEIGHT): for x in range(0, BADGE_WIDTH): if randint(0, 1): wipe_bit(pix, x, y) # get the badge as dictionary bd = badge.as_dict() # encode it as a string of '0' and '1' data_bits = dict_to_binary(bd) # throw an exception if data to be stored exceeds 16 thousand bits bit_count = len(data_bits) if bit_count > MAX_DATA_BITS: raise ValueError('Badge data exceeds %d bytes' % MAX_DATA_BITS // 8) # compute a hash (for validation purposes) and render it as string of '0' and '1' bits_hash = hash_to_binary(hexlify(pbkdf2_hmac("sha256", data_bits, settings.BADGE_SALT, settings.BADGE_ITERATIONS))) # throw an exception if hash is not exactly 256 bits if len(bits_hash) != 256: raise ValueError('Badge signature is not 256 bits') # write the hash in the first 256 "random" pixels for i, b in enumerate(bits_hash): code_bit(pix, i, b) # write the bit string length in the next 14 pixels (2^14 - 1 = 16385 suffices) bc_bits = format(bit_count, '014b') for i, b in enumerate(bc_bits): code_bit(pix, 256 + i, b) # then write the badge data bits for i, b in enumerate(data_bits): code_bit(pix, 270 + i, b) # update the rendering parameters badge.time_last_rendered = timezone.now() badge.rendering_count += 1 badge.save() log_message('Rendered this badge: ' + unicode(badge)) # output image to browser (do NOT save it as a file) response = HttpResponse(content_type='image/png') img.save(response, 'PNG') return response except Exception, e: log_message('Failed to render badge: ' + str(e)) return None
def scan_one_assignment(): # NOTE: the two lines below show how to clear scan results for a specific repaly # Assignment.objects.filter(participant__estafette__id=27, leg__number=1).update( # time_scanned=DEFAULT_DATE, scan_result=0) try: a = Assignment.objects.exclude(time_uploaded__lte=DEFAULT_DATE).filter( time_scanned__lte=DEFAULT_DATE, clone_of=None, participant__student__dummy_index=0).values_list('id', flat=True)[:1] if a: scan_assignment(a[0]) except Exception, e: # catch errors to log them (should not be visible to participant) log_message('ERROR during single scan: %s' % str(e))
def ascii_from_doc(path, text_to_ignore=[]): ext = os.path.splitext(path)[1].lower() if ext == '.docx': f = open(path, 'rb') doc = Document(f) f.close() text = [] for par in doc.paragraphs: # first convert non-breaking spaces to normal ones, and then reduce all whitespace to 1 space text.append(' '.join( par.text.replace(unichr(160), ' ').strip().split())) # convert to ASCII to get rid of special chars like curled quotes ascii = (' '.join([s for s in text])).encode('ascii', 'ignore') for t in text_to_ignore: ascii = ascii.replace(t.encode('ascii', 'ignore'), '') # NOTE: Word files are also scanned for the images they contain (in xl/media), # and for their creation date (from docProps/core) # NOTE: we signal this the start of the "tell-tale list" with a separator ascii += TELLTALE_SEPARATOR error = '' try: with ZipFile(path, 'r') as zf: # append for each image file a line "<extension>=<file size>+<CRC>" for i in zf.infolist(): if i.filename[:11] == 'word/media/': ascii += '\n%s=%d+%d' % (os.path.splitext( i.filename)[1].lower(), i.file_size, i.CRC) elif i.filename == 'docProps/core.xml': try: # try to extract the creation time stamp core = zf.read(i) #log_message('%d bytes read from core' % len(core)) p = core.index(TIME_CREATED_TAG) + len( TIME_CREATED_TAG) cdt = core[p:p + 16] # ignore the presto "undefined" date if cdt != '2001-01-01T00:00': ascii += '\ncreated=%s' % cdt except Exception: # ignore any errors while checking core.xml pass except Exception, e: error = str(e) if error: log_message('WARNING: Failed to scan file %s as ZIP archive\n%s' % (path, error)) return ascii
def scan(request, **kwargs): content = 'One scan completed' try: scan_one_assignment() # below is a provisionary patch to remedy double assignments # NOTE: the uniqueness constraint on Assignment is NOT effectuated because # the "cloneOf" foreign key field can be NULL (a Django-specific issue) doubles = Assignment.objects.filter(is_rejected=False, clone_of__isnull=True ).values('participant__id', 'leg__id' ).annotate(same_leg_cnt=Count('id') ).values('participant__id', 'leg__id', 'same_leg_cnt' ).order_by().filter(same_leg_cnt__gt=1) for d in doubles: a_set = Assignment.objects.filter(participant__id=d['participant__id'], leg__id=d['leg__id']).order_by('-id') # double-check that there is a duplicate assignment if len(a_set) > 1: # get the latest one a = a_set.first() # also get the one to keep b = a_set[1] # log that we found a duplicate log_message('WATCHDOG: Found duplicate assignment #%d-%s%d--%s' % (a.id, a.case.letter, a.leg.number, a.participant.student.dummy_name())) log_message('-- original assignment: #%d-%s%d--%s' % (b.id, b.case.letter, b.leg.number, b.participant.student.dummy_name())) # also log the name of the relay log_message('-- Relay: %s' % a.participant.estafette.title()) # ensure that there are NO uploads (because these CASCADE delete) puc = ParticipantUpload.objects.filter(assignment=a).count() if puc > 0: log_message('-- NOT deleted because it has associated uploads') else: try: # NOTE: deleting duplicate will set its predecessor's successor field to NULL a.delete() log_message('-- duplicate now deleted') # NOTE: we must now restore predecessor's successor field!! b.predecessor.successor = b b.predecessor.save() log_message('-- predecessor-successor restored') except: # signal that assignment ID is a foreign key of some other record log_message('-- NOT deleted (probably related assignments or reviews)') except Exception, e: content = 'ERROR during scan: %s' % str(e)
def developer(request, **kwargs): context = generic_context(request) # check whether user can have developer role if not change_role(context, 'Developer'): return render(request, 'presto/forbidden.html', context) # check whether a template must be deleted if kwargs.get('action', '') == 'delete-template': try: h = kwargs.get('hex', '') context = generic_context(request, h) etid = decode(h, context['user_session'].decoder) et = EstafetteTemplate.objects.get(pk=etid) log_message('Deleting template %s' % et.name, context['user']) et.delete() except Exception, e: report_error(request, context, e) return render(request, 'presto/error.html', context)
def scan_N_assignments(n): a = Assignment.objects.exclude(time_uploaded__lte=DEFAULT_DATE).filter( time_scanned__lte=DEFAULT_DATE, clone_of=None).values_list('id', flat=True) print "%d assignments still need scanning" % a.count() if a: n = min(n, a.count()) else: print "No scan needed" return i = 0 while i < n: i += 1 print "Scanning #%d of %d" % (i, n) try: scan_assignment(a[i - 1]) except Exception, e: # catch errors to log them (should not be visible to participant) log_message('ERROR during single scan: %s' % str(e))
def log_file(request, **kwargs): ymd = kwargs.get('date', '') if ymd == '': ymd = timezone.now().strftime('%Y%m%d') context = generic_context(request) try: log_message('Viewing log file %s' % ymd, context['user']) if not has_role(context, 'Administrator'): raise IOError('No permission to view log files') path = os.path.join(settings.LOG_DIR, 'presto-%s.log' % ymd) with codecs.open(path, 'r', encoding='utf8') as log: content = log.read() lines = kwargs.get('lines', '') pattern = unquote(kwargs.get('pattern', '')).decode('utf8') if lines: # show last N lines content = '\n'.join(content.split('\n')[int(lines):]) elif pattern: # show pattern-matching lines, separated by blank line content = '\n\n'.join(re.findall('^.*' + pattern + '.*$', content, re.MULTILINE)) except IOError, e: report_error(context, e) return render(request, 'presto/error.html', context)
def course(request, **kwargs): h = kwargs.get('hex', '') act = kwargs.get('action', '') context = generic_context(request, h) # check whether user can view this course try: cid = decode(h, context['user_session'].decoder) if act == 'delete-relay': # in this case, the course relay ID is passed as hex ce = CourseEstafette.objects.get(pk=cid) c = ce.course else: # otherwise the course ID c = Course.objects.get(pk=cid) # ensure that user is instructor in the course if not (c.manager == context['user'] or c.instructors.filter(id=context['user'].id)): log_message('ACCESS DENIED: Invalid course parameter', context['user']) return render(request, 'presto/forbidden.html', context) except Exception, e: report_error(context, e) return render(request, 'presto/error.html', context)
def picture_queue(request, **kwargs): h = kwargs.get('hex', '') act = kwargs.get('action', '') # check whether user can view this course try: if act in ['delete', 'get']: # NOTE: when getting a picture, the coding keys should NOT be rotated context = generic_context(request, 'NOT') # and the day code should be used to decode the hexed queue picture ID qpid = decode(h, day_code(PQ_DAY_CODE)) qp = QueuePicture.objects.get(pk=qpid) c = qp.course else: # the hex code should be a course ID, and key rotation should proceed as usual context = generic_context(request, h) cid = decode(h, context['user_session'].decoder) c = Course.objects.get(pk=cid) # always ensure that the user is instructor in the course if not (c.manager == context['user'] or c.instructors.filter(id=context['user'].id)): log_message('ACCESS DENIED: Invalid course parameter', context['user']) return render(request, 'presto/forbidden.html', context) except Exception, e: report_error(context, e) return render(request, 'presto/error.html', context)
ext = os.path.splitext(p.name)[1] # NOTE: mime dict may need to be extended mime = { '.jpg': 'jpeg', '.png': 'png', } w = FileWrapper(file(p.path, 'rb')) return HttpResponse(w, 'image/' + mime.get(ext, '*')) # check mail server for new pictures for this course try: mailbox = poplib.POP3(settings.PICTURE_QUEUE_SERVER) mailbox.user(settings.PICTURE_QUEUE_MAIL) mailbox.pass_(settings.PICTURE_QUEUE_PWD) msg_count = len(mailbox.list()[1]) log_message('Picture queue found %d message(s) in pq mailbox' % msg_count, context['user']) for i in range(msg_count): response, msg_as_list, size = mailbox.retr(i + 1) msg = email.message_from_string('\r\n'.join(msg_as_list)) sender = email.utils.parseaddr(msg['From'].strip()) subject = msg['Subject'].strip() # convert the Received field to a timezone-aware datetime object received = msg['Received'].split(';') t = email.utils.parsedate_tz(received[-1].strip()) time_received = timezone.localtime(datetime(*t[0:6], tzinfo=tzutc())) # see if any course matches the course code from the subject if subject: c_code = Course.objects.filter(code__iexact=subject) if c_code: c_code = c_code.first().code else:
def clear_metadata(src, dst): src = settings.LEADING_SLASH + src dst = settings.LEADING_SLASH + dst ext = os.path.splitext(dst)[1] # assumed to be in lower case! meta_fields = [ 'author', 'category', 'comments', 'content_status', 'identifier', 'keywords', 'last_modified_by', 'language', 'subject', 'title', 'version' ] try: if ext in ['.docx']: f = open(src, 'rb') doc = Document(f) f.close() for meta_field in meta_fields: setattr(doc.core_properties, meta_field, '') setattr(doc.core_properties, 'created', DEFAULT_DATE) setattr(doc.core_properties, 'modified', DEFAULT_DATE) setattr(doc.core_properties, 'last_printed', DEFAULT_DATE) setattr(doc.core_properties, 'revision', 1) doc.save(dst) clean_xml_in_zip(dst) elif ext in ['.pptx']: prs = Presentation(src) for meta_field in meta_fields: setattr(prs.core_properties, meta_field, '') setattr(prs.core_properties, 'created', DEFAULT_DATE) setattr(prs.core_properties, 'modified', DEFAULT_DATE) setattr(prs.core_properties, 'last_printed', DEFAULT_DATE) setattr(prs.core_properties, 'revision', 1) prs.save(dst) clean_xml_in_zip(dst) elif ext == '.pdf': fin = file(src, 'rb') inp = PdfFileReader(fin) outp = PdfFileWriter() for page in range(inp.getNumPages()): outp.addPage(inp.getPage(page)) infoDict = outp._info.getObject() infoDict.update({ NameObject('/Title'): createStringObject(u''), NameObject('/Author'): createStringObject(u''), NameObject('/Subject'): createStringObject(u''), NameObject('/Creator'): createStringObject(u'') }) fout = open(dst, 'wb') outp.write(fout) fin.close() fout.close() elif ext == '.xlsx': file_to_clear = 'docProps/core.xml' # create a copy of the Excel file while "cleaning" docProps/core.xml with ZipFile(src, 'r') as src_zip: with ZipFile(dst, 'w') as dst_zip: dst_zip.comment = src_zip.comment # preserve the comment (if any) for item in src_zip.infolist(): if item.filename == file_to_clear: # read the XML tree from the file xml = src_zip.read(item.filename) xml = re.sub(r'<dc:title>[^<]{1,1000}</dc:title>', '<dc:title></dc:title>', xml) xml = re.sub( r'<dc:subject>[^<]{1,500}</dc:subject>', '<dc:subject></dc:subject>', xml) xml = re.sub( r'<dc:creator>[^<]{1,300}</dc:creator>', '<dc:creator></dc:creator>', xml) xml = re.sub( r'<dc:description>[^<]{1,2500}</dc:description>', '<dc:description></dc:description>', xml) xml = re.sub( r'<cp:keywords>[^<]{1,1000}</cp:keywords>', '<cp:keywords></cp:keywords>', xml) xml = re.sub( r'<cp:lastModifiedBy>[^<]{1,300}</cp:lastModifiedBy>', '<cp:lastModifiedBy></cp:lastModifiedBy>', xml) xml = re.sub( r'<cp:category>[^<]{1,300}</cp:category>', '<cp:category></cp:category>', xml) xml = re.sub( r'<cp:contentStatus>[^<]{1,100}</cp:contentStatus>', '<cp:contentStatus></cp:contentStatus>', xml) xml = re.sub( r'<cp:revision>[^<]{1,10}</cp:revision>', '<cp:revision></cp:revision', xml) # replace all date-time fields with the default date xml = re.sub( r':W3CDTF">[^<]{1,25}</dcterms:', ':W3CDTF">2001-01-01T00:00:00Z</dcterms:', xml) dst_zip.writestr(item, xml) else: dst_zip.writestr(item, src_zip.read(item.filename)) except Exception, e: log_message('Exception while removing metadata from a %s file: %s' % (ext, str(e)))
def verify_certified_image(img): try: # make pixels accessible as pix[x, y] pix = img.load() w, h = img.size # check for approriate dimensions if h != BADGE_HEIGHT or w != BADGE_WIDTH: raise ValueError('Badge should be 256x256 pixels') # get the 256-bit signature signature = ''.join([test_bit(pix, i) for i in range(0, 256)]) # get the length of the "payload" coded in the next 14 bits bits = ''.join([test_bit(pix, i) for i in range(256, 270)]) bit_count = int(bits, 2) if bit_count > MAX_DATA_BITS: raise ValueError('Invalid data size (%d)' % bit_count) # get the actual data bits = ''.join([test_bit(pix, i) for i in range(270, 270 + bit_count)]) # check integrity of bits bits_hash = hash_to_binary(hexlify(pbkdf2_hmac("sha256", bits, settings.BADGE_SALT, settings.BADGE_ITERATIONS))) if bits_hash != signature: raise ValueError('Corrupted badge data') # decode the bits bd = binary_to_dict(bits) # see if the standard badge properties exist mf = list(set(['ID', 'CC', 'CN', 'AL', 'TI', 'PR', 'FN', 'EM']) - set(bd.keys())) if mf: raise ValueError('Incomplete data (missing: %s)' % ', '.join(mf)) # see if the badge exists in the database # NOTE: use filter instead of get so that we can generate our own error message b = PrestoBadge.objects.filter(pk=bd['ID']) if b.count() == 0: raise ValueError('Unmatched badge ID') # get the first element (should be the only one) b = b.first() if b.participant: u = b.participant.student.user pr = b.participant.estafette.estafette.name if b.referee: u = b.referee.user pr = b.referee.estafette_leg.template.name # see if badge data match those in database if prefixed_user_name(u) != bd['FN']: raise ValueError('Holder name (%s) does not match "%s"' % (bd['FN'], prefixed_user_name(u))) if u.email != bd['EM']: raise ValueError('Holder e-mail address (%s) does not match "%s"' % (bd['EM'], u.email)) if b.course.code != bd['CC']: raise ValueError('Course code (%s) does not match "%s"' % (bd['CC'], b.course.code)) if b.course.name != bd['CN']: raise ValueError('Course name (%s) does not match "%s"' % (bd['CN'], b.course.name)) if pr != bd['PR']: raise ValueError('Project relay name (%s) does not match "%s"' % (bd['PR'], pr)) if b.attained_level != bd['AL']: raise ValueError('Attained level (%d) should have been %d' % (bd['AL'], b.attained_level)) # update badge verification parameters b.time_last_verified = timezone.now() b.verification_count += 1 b.save() # return the badge object return b except Exception, e: log_message('Failed to validate badge: ' + str(e)) return False
def progress(request, **kwargs): # NOTE: a downloaded image is part the current page, meaning that the coding keys # should NOT be rotated; this is achieved by passing "NOT" as test code. context = generic_context(request, 'NOT') try: # check whether user can have student role if not (has_role(context, 'Student') or has_role(context, 'Instructor')): raise Exception('No access') h = kwargs.get('hex', '') # verify that hex code is valid # NOTE: since keys have not been rotated, use the ENcoder here! oid = decode(h, context['user_session'].encoder) # check whether oid indeed refers to an existing participant or course estafette p_or_ce = kwargs.get('p_or_ce', '') if p_or_ce == 'p': p = Participant.objects.get(pk=oid) ce = p.estafette else: p = None ce = CourseEstafette.objects.get(pk=oid) # get the basic bar chart img = update_progress_chart(ce) # if image requested by a participant, add orange markers for his/her uploads if p: draw = ImageDraw.Draw(img) # get a font (merely to draw nicely anti-aliased circular outlines) fnt = ImageFont.truetype( os.path.join(settings.FONT_DIR, 'segoeui.ttf'), 25) # calculate how many seconds of estafette time is represented by one bar time_step = int( (ce.end_time - ce.start_time).total_seconds() / BAR_CNT) + 1 # get the number of registered participants (basis for 100%) p_count = Participant.objects.filter(estafette=ce).count() # get leg number and upload time all uploaded assignments for this participant a_list = Assignment.objects.filter(participant=p).filter( time_uploaded__gt=DEFAULT_DATE).filter( clone_of__isnull=True).values('leg__number', 'time_uploaded') for a in a_list: # get the number of assignments submitted earlier cnt = Assignment.objects.filter( participant__estafette=ce ).filter(leg__number=a['leg__number']).filter( time_uploaded__gt=DEFAULT_DATE).filter( clone_of__isnull=True).exclude( time_uploaded__gt=a['time_uploaded']).count() bar = int( (a['time_uploaded'] - ce.start_time).total_seconds() / time_step) perc = round(250 * cnt / p_count) x = V_AXIS_X + bar * BAR_WIDTH y = H_AXIS_Y - perc - 5 # mark uploads with orange & white outline (10 pixels diameter) draw.ellipse([x, y, x + 10, y + 10], fill=(236, 127, 44, 255), outline=None) # draw white letter o to produce neat circular outline draw.text((x - 1.5, y - 14.5), 'o', font=fnt, fill=(255, 255, 255, 255)) # get nr and submission time for this participant's final reviews nr_of_steps = ce.estafette.template.nr_of_legs() r_set = PeerReview.objects.filter(reviewer=p).filter( assignment__leg__number=nr_of_steps).filter( time_submitted__gt=DEFAULT_DATE).values( 'reviewer__id', 'time_submitted').order_by('reviewer__id', 'time_submitted') r_index = 0 for r in r_set: r_index += 1 # get the number of final reviews submitted earlier cnt = PeerReview.objects.filter(reviewer__estafette=ce).filter( assignment__leg__number=nr_of_steps).filter( time_submitted__gt=DEFAULT_DATE).exclude( time_submitted__gt=r['time_submitted']).values( 'reviewer__id', 'time_submitted').order_by( 'reviewer__id', 'time_submitted').annotate( rev_cnt=Count('reviewer_id')).filter( rev_cnt=r_index).count() bar = int( (r['time_submitted'] - ce.start_time).total_seconds() / time_step) perc = round(250 * cnt / p_count) x = V_AXIS_X + bar * BAR_WIDTH y = H_AXIS_Y - perc - 5 # mark final reviews with orange draw.ellipse([x, y, x + 10, y + 10], fill=(236, 127, 44, 255), outline=None) # draw black letter o to produce neat circular outline draw.text((x - 1.5, y - 14.5), 'o', font=fnt, fill=(0, 0, 0, 255)) # output image to browser (do NOT save it as a file) response = HttpResponse(content_type='image/png') img.save(response, 'PNG') return response except Exception, e: log_message('ERROR while generating progress chart: %s' % str(e), context['user']) with open(os.path.join(settings.IMAGE_DIR, 'not-found.png'), "rb") as f: return HttpResponse(f.read(), content_type="image/png")
def scan_assignment(aid): # track time needed for this (partial) scan start_time = time.time() # get the assignment to be scanned a = Assignment.objects.select_related('participant__student').get(pk=aid) author = a.participant.student.dummy_name() # ignore if no uploaded work or clone if a.time_uploaded == DEFAULT_DATE or a.clone_of: return (0, '') upl_dir = os.path.join(settings.MEDIA_ROOT, a.participant.upload_dir) # directory may not exist yet (typically because relay template has no required files) if not os.path.exists(upl_dir): os.mkdir(upl_dir) # prepare to use two text files: one for progress and draft report, one for complete report report_path = os.path.join(upl_dir, 'scan_%s%d.txt' % (a.case.letter, a.leg.number)) progress_path = os.path.join( upl_dir, 'progress_%s%d.txt' % (a.case.letter, a.leg.number)) # if database record shows completed scan, check if report exists if a.time_scanned != DEFAULT_DATE: # if report indeed exists, read it, get the max. percentage, and return its contents if os.path.isfile(report_path): content = unicode(open(report_path, 'r').read(), errors='ignore') return (a.scan_result, markdown(content).replace( '<h2>', '<h2 style="color: %s">' % status_color(a.scan_result), 1)) # if progress file exists, resume the scan resuming = False if os.path.isfile(progress_path): try: data = loads( unicode(open(progress_path, 'r').read(), errors='ignore')) # get time since data was saved t_diff = round(time.time() - data['start']) # first verify that assignment IDs match if data['aid'] != a.id: log_message( 'ERROR: Resuming scan ID mismatch (got #%d while expecting #%d)' % (data['aid'], a.id)) # resume only if partial scan is less than 15 minutes old AND not busy elif t_diff < 900 and not ('busy' in data): # restore "legitimate source" ID list from data prid_list = data['prids'] # restore "strings to ignore" to_ignore = data['ignore'] # get min and max percentages min_perc = data['min_perc'] max_perc = data['max_perc'] # restore scan report list fsr = data['fsr'] fs_cnt = data['fs_cnt'] # restore "assignments to scan" ID list and record list said_list = data['saids'] sa_dict = {} for sa in Assignment.objects.filter( id__in=said_list).select_related( 'participant__student'): sa_dict[sa.id] = { 'id': sa.id, 'leg': sa.leg.number, 'author': sa.participant.student.dummy_name(), 'uploaded': sa.time_uploaded } # get the IDs of file uploads already scanned spuid_list = data['spuids'] log_message( 'Resuming scan of #%d by %s (%d seconds ago; %d scanned)' % (data['aid'], data['author'], t_diff, len(spuid_list))) # NOTE: set resuming to TRUE to indicate successful resume resuming = True else: log_message('ABANDONED scan of #%d by %s (%d seconds ago)' % (a.id, author, t_diff)) except Exception, e: # log error and then ignore it log_message('WARNING: Ignoring resume failure: %s' % str(e))
course=c, estafette=Estafette.objects.get(pk=eid), suffix=request.POST.get('suffix', ''), start_time=datetime.strptime(request.POST.get('starts', ''), '%Y-%m-%d %H:%M'), deadline=datetime.strptime(request.POST.get('deadline', ''), '%Y-%m-%d %H:%M'), review_deadline=datetime.strptime( request.POST.get('revsdue', ''), '%Y-%m-%d %H:%M'), end_time=datetime.strptime(request.POST.get('ends', ''), '%Y-%m-%d %H:%M'), questionnaire_template=QuestionnaireTemplate.objects.get( pk=qid), final_reviews=int(request.POST.get('reviews', ''))) ce.save() log_message('Added new estafette to course', context['user']) except Exception, e: report_error(context, e) return render(request, 'presto/error.html', context) # add course properties that need conversion to context context['course'] = { 'object': c, 'start': c.language.fdate(c.start_date), 'end': c.language.fdate(c.end_date), 'manager': prefixed_user_name(c.manager), 'owned':
def scan_report(text, req_file, path, aid, author, upload_time, related, min_length, text_to_ignore=[]): # NOTE: we also scan for matching "tell-tales" tell_tales = '' tt_percent = 0 # test whether a "tell-tale" scan is needed if TELLTALE_SEPARATOR in text: matches = [] parts = text.split(TELLTALE_SEPARATOR) # separate the text content from the "tell-tale" list if len(parts) == 1: # text only contained "tell-tales" (typical for .XLSX files) text = '' pairs = parts[0].strip() else: text = parts[0].strip() pairs = parts[1].strip() # check if there are any "tell-tales" pairs = pairs.split('\n') if len(pairs) == 0: tell_tales = '_(No tell-tales in file `%s`)_' % os.path.basename( path) else: # get the text content from the file to scan scan_text = ascii_from_doc(path) # return warning report if no "tell-tales" detected if TELLTALE_SEPARATOR not in scan_text: tell_tales = ( '_**WARNING:** File `%s` did not scan as OpenDocument._' % os.path.basename(path)) else: # parse tell tales (these all have form "key|value") parts = scan_text.split(TELLTALE_SEPARATOR) # separate the text content from the "tell-tale" list if len(parts) == 1: # text only contained "tell-tales" (typical for .XLSX files) scan_text = '' scan_pairs = parts[0].strip() else: scan_text = parts[0].strip() scan_pairs = parts[1].strip() scan_pairs = scan_pairs.split('\n') # collect matching pairs for p in scan_pairs: if p and p in pairs: matches.append(p) # report matching "tell-tales" (if any) if len(matches) > 0: tt_percent = int(100 * len(matches) / len(pairs)) tell_tales = 'Tell-tales: %d%% match (%s)' % (tt_percent, ', '.join(matches)) # now scan for text matches l = len(text) # assume no match n = 0 matching_text = '' percentage = 0 epolm = 0 # end position of last match if l: s = SequenceMatcher(None, text, ascii_from_doc(path, text_to_ignore)) for match in s.get_matching_blocks(): if match.size >= min_length: n += match.size if epolm > 0: matching_text += BLUE_ELLIPSIS % (match.a - epolm) epolm = match.a + match.size matching_text += text[match.a:match.a + match.size].decode('utf-8') # NOTE: matches that are (only a few characters) longer than an ingnorable string # are not ignored; hence we strip "to ignore" text fragments from the report mtl = len(matching_text) for t in text_to_ignore: matching_text = matching_text.replace(t, '') # adjust n by subtracting the number of removed characters n -= mtl - len(matching_text) # report if sufficient matching text or "tell-tales" if n > TOTAL_MATCH_THRESHOLD or tt_percent > 0: if related: author = 'RELATED ' + author matching_text = '_(matching text and tell-tales omitted because source is legitimate)_' tell_tales = '' percentage = int(100 * n / l) if l else 0 report = ( '####%d%% text match (%d characters) with `%s` <small>(submitted on %s by %s as `%s`)</small>\n' % (percentage, n, req_file, upload_time, author, os.path.basename(path))) + '<small>' + matching_text + '</small>' # append "tell-tale" report if matching tell-tales were found if tt_percent > 0: report += tell_tales percentage = max(percentage, tt_percent) # NOTE: a very large match (80% or more) with related work may indicate NO own contribution, # hence such submissions are not "cleared" by setting percentage to 0 if related and percentage < 80: # return 0 as percentage, as this match is not to be considered as plagiarism percentage = 0 else: report = 'NO text match with `%s` (%s)' % (os.path.basename(path), author) if related: report += ' _(NOTE: despite being **related** work!)_ ' # only log suspect scans if ((percentage > SUSPICION_THRESHOLD or n > 500) and not related) or percentage >= 80: log_message( '-- %d%% (%d characters) match with %s submitted on %s by %s (#%d)' % (percentage, n, req_file, upload_time, author, aid)) # indicate matches with related work as a negative percentage if related: percentage = -percentage return (percentage, report)
# get the image data as raw IO bytes bio = BytesIO(a2b_base64(b64[1])) bio.seek(0) # this data should then be readable for PIL img = Image.open(bio) # verify that the image is a valid badge badge = verify_certified_image(img) # if so, return the validation plus the badge properties if badge: jd['r'] = 'VALID' jd.update(badge.as_dict()) # also add the learning goals for the completed step jd['lg'] = EstafetteLeg.objects.filter(number=badge.attained_level, template__id=jd['TID']).first().learning_objectives # only log successful authentications, as failures log their error message log_message('Badge authenticated for %s' % jd['FN']) elif a == 'authenticate letter': # by default, assume the authentication code is NOT valid jd['r'] = 'INVALID' c = request.POST.get('c', '') # authentication codes are 32 digit hexadecimal numbers if len(c) == 32: # verify that letter exists loa = LetterOfAcknowledgement.objects.filter(authentication_code=c) # if so, return the validation plus the LoA properties if loa: jd['r'] = 'VALID' loa = loa.first() jd.update(loa.as_dict()) loa.verification_count += 1 loa.time_last_verified = timezone.now()
if error: log_message('WARNING: Failed to scan file %s as ZIP archive\n%s' % (path, error)) return ascii elif ext == '.pdf': ascii = '' try: # extract text from PDF as 7-bit ASCII (note: this also removes 'hard' spaces) ascii = check_output(['pdftotext', '-enc', 'ASCII7', path, '-']) # remove sequences of 3+ periods (typically occur in table of contents) ascii = ' '.join(ascii.replace('...', '').strip().split()) # remove text to ignore for t in text_to_ignore: ascii = ascii.replace(t.encode('ascii', 'ignore'), '') except Exception, e: log_message('ERROR: Failed to execute pdftotext for file %s\n%s' % (path, str(e))) return ascii elif ext == '.xlsx': # NOTE: Excel files are not scanned for text, but -- similar to Word files -- for the images # they contain (in xl/media), for their shared strings (in xl/sharedStrings.xml), # and for their creation date (from docProps/core) # NOTE: we signal this the start of the "tell-tale list" with a separator ascii = TELLTALE_SEPARATOR error = '' try: with ZipFile(path, 'r') as zf: # append for each image file a line "<extension>=<file size>+<CRC>" for i in zf.infolist(): if i.filename[:9] == 'xl/media/': ascii += '\n%s=%d+%d' % (os.path.splitext( i.filename)[1].lower(), i.file_size, i.CRC)
def ack_letter(request, **kwargs): # NOTE: downloading a file opens a NEW browser tab/window, meaning that # the coding keys should NOT be rotated; this is achieved by passing "NOT" as test code. context = generic_context(request, 'NOT') # check whether user can have student role if not has_role(context, 'Student'): return render(request, 'presto/forbidden.html', context) try: h = kwargs.get('hex', '') # verify that letter exists # NOTE: since keys have not been rotated, use the ENcoder here! lid = decode(h, context['user_session'].encoder) # get letter properties loa = LetterOfAcknowledgement.objects.get(id=lid) # update fields, but do not save yet because errors may still prevent effective rendering loa.time_last_rendered = timezone.now() loa.rendering_count += 1 # get the dict with relevant LoA properties in user-readable form rd = loa.as_dict() # create letter as PDF pdf = MyFPDF() pdf.add_font('DejaVu', '', DEJAVU_FONT, uni=True) pdf.add_font('DejaVu', 'I', DEJAVU_OBLIQUE_FONT, uni=True) pdf.add_font('DejaVu', 'B', DEJAVU_BOLD_FONT, uni=True) pdf.add_font('DejaVu', 'BI', DEJAVU_BOLD_OBLIQUE_FONT, uni=True) pdf.add_page() # see whether course has a description; if so, make a reference to page 2 and # and prepare the text for this page 2 if rd['CD']: see_page_2 = ' described on page 2' else: see_page_2 = '' # NOTE: if the RID entry (= the referee ID) equals zero, the letter is a participant LoA! if rd['RID'] == 0: pdf.letter_head(rd['AC'], rd['DI'], 'Acknowledgement of Project Relay completion') # add the participant acknowledgement text to the letter text = ''.join([ 'To whom it may concern,\n\n', 'With this letter, DelftX, an on-line learning initiative of Delft University of', ' Technology through edX, congratulates ', rd['FN'], '* for having completed', ' the project relay ', rd['PR'], ' offered as part of the online course ', rd['CN'], see_page_2, '.\n\n', 'A project relay comprises a series of steps: assignments that follow on from', ' each other. In each step, participants must first peer review, appraise, and', ' then build on the preceding step submitted by another participant.\n\n', 'The project relay ', rd['PR'], ' comprised ', rd['SL'], ', where each step posed an intellectual challenge that will have required', ' several hours of work. DelftX appreciates in particular the contribution that', ' participants make to the learning of other participants by giving feedback', ' on their work.\n\n\n', rd['SN'], '\n', rd['SP'] ]) else: pdf.letter_head(rd['AC'], rd['DI'], 'Project Relay Referee Letter of Acknowledgement') # adapt some text fragments to attribute values cases = plural_s(rd['ACC'], 'appeal case') hours = plural_s(rd['XH'], 'hour') # average appreciation is scaled between -1 and 1 if rd['AA'] > 0: appr = ' The participants involved in the appeal were appreciative of the arbitration.' elif rd['AA'] < -0.5: appr = ' Regrettably, the participants involved in the appeal were generally not appreciative of the arbitration.' else: appr = '' if rd['DFC'] == rd['DLC']: period = 'On ' + rd['DLC'] else: period = 'In the period between %s and %s' % (rd['DFC'], rd['DLC']) # add the referee acknowledgement text to the letter text = ''.join([ 'To whom it may concern,\n\n', 'With this letter, DelftX, an on-line learning initiative of Delft University of Technology', ' through edX, wishes to express its gratitude for the additional efforts made by ', rd['FN'], '* while participating in the project relay ', rd['PR'], ' offered as part of the online course ', rd['CN'], see_page_2, '.\n\n', 'A project relay comprises a series of steps: assignments that follow on from each other. ', 'In each step, participants must first peer review, appraise, and then build on the ', 'preceding step submitted by another participant. Participant ', rd['FN'], ' has not only completed the course, but also passed the referee test for ', rd['SL'], ' of the ', rd['PR'], ' project relay. This implies having a better command of the subject', ' taught than regular participants.\n\n', 'Referees arbitrate appeal cases, i.e., situations where the reviewed participant ', 'disagrees with the reviewer\'s critique and/or appraisal. ', period, ', participant ', rd['FN'], ' has arbitrated on ', cases, '. This corresponds to approximately ', hours, ' of work.', appr, '\n\nThe role of referee is indispensable to run project ', 'relays on a large scale. DelftX therefore greatly values participants volunteering to ', 'act as such, since it requires significant effort on top of the regular assignments.\n\n\n', rd['SN'], '\n', rd['SP'] ]) pdf.main_text(text) # add footnote with disclaimer pdf.footnote(rd['EM']) if see_page_2: pdf.page_2(rd) # set document properties if rd['RID'] == 0: task = 'completing a project relay' else: task = 'work as project relay referee' pdf.set_properties(rd['AC'], task, rd['FN'], rd['RC'], rd['TLR']) # output to temporary file temp_file = mkstemp()[1] pdf.output(temp_file, 'F') log_message('Rendering acknowledgement letter for %s' % rd['PR'], context['user']) # push the PDF as attachment to the browser w = FileWrapper(file(temp_file, 'rb')) response = HttpResponse(w, content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename="presto-LoA.pdf"' # now we can assume that the PDF will appear, so the updated letter data can be saved loa.save() return response except Exception, e: report_error(context, e) return render(request, 'presto/error.html', context)
def download(request, **kwargs): # NOTE: downloading a file opens a NEW browser tab/window, meaning that # the coding keys should NOT be rotated; this is achieved by passing "NOT" as test code. context = generic_context(request, 'NOT') # check whether user can have student or instructor role is_instructor = has_role(context, 'Instructor') if not (has_role(context, 'Student') or is_instructor): return render(request, 'presto/forbidden.html', context) try: h = kwargs.get('hex', '') # verify hex key # NOTE: since keys have not been rotated, use the ENcoder here! aid = decode(h, context['user_session'].encoder) file_name = kwargs.get('file_name', '') # file_name = 'case' indicates a download request for a case attachment if file_name == 'case': ec = EstafetteCase.objects.get(pk=aid) if ec.upload == None: raise ValueError('No attachment file for this case') f = ec.upload.upload_file ext = os.path.splitext(f.name)[1] w = FileWrapper(file(f.path, 'rb')) response = HttpResponse(w, 'application/octet-stream') response['Content-Disposition'] = ( 'attachment; filename="attachment-case-%s%s"' % (ec.letter, ext)) return response # no case attachment? then the download request must concern an assignment work = kwargs.get('work', '') dwnldr = kwargs.get('dwnldr', '') # verify that download is for an existing assignment log_message('Looking for assignment #%d' % aid, context['user']) a = Assignment.objects.get(pk=aid) # get the list of participant uploads for this assignment (or its clone original) # and also the full path to the upload directory if a.clone_of: original = a.clone_of # in case a clone was cloned, keep looking until the "true" original has been found while original.clone_of: original = original.clone_of pul = ParticipantUpload.objects.filter(assignment=original) upl_dir = os.path.join(settings.MEDIA_ROOT, original.participant.upload_dir) else: pul = ParticipantUpload.objects.filter(assignment=a) upl_dir = os.path.join(settings.MEDIA_ROOT, a.participant.upload_dir) log_message('Upload dir = ' + upl_dir, context['user']) # create an empty temporary dir to hold copies of uploaded files temp_dir = os.path.join(upl_dir, 'temp') try: rmtree(temp_dir) except: pass os.mkdir(temp_dir) log_message('TEMP dir: ' + temp_dir, context['user']) if file_name == 'all-zipped': pr_work = 'pr-step%d%s' % (a.leg.number, a.case.letter) zip_dir = os.path.join(temp_dir, pr_work) os.mkdir(zip_dir) # copy the upladed files to the temporary dir ... for pu in pul: real_name = os.path.join(upl_dir, os.path.basename(pu.upload_file.name)) # ... under their formal name, not their actual ext = os.path.splitext(pu.upload_file.name)[1].lower() formal_name = os_path( os.path.join(zip_dir, pu.file_name) + ext) if is_instructor: log_message( 'Copying %s "as is" to ZIP as %s' % (real_name, formal_name), context['user']) # NOTE: for instructors, do NOT anonymize the document copy2(real_name, formal_name) else: log_message( 'Copy-cleaning %s to ZIP as %s' % (real_name, formal_name), context['user']) # strip author data from file and write it to the "work" dir clear_metadata(real_name, formal_name) # compress the files into a single zip file zip_file = make_archive(zip_dir, 'zip', temp_dir, pr_work) response = HttpResponse(FileWrapper(file(zip_file, 'rb')), content_type='application/zip') response['Content-Disposition'] = ( 'attachment; filename="%s.zip"' % pr_work) # always record download in database UserDownload.objects.create(user=context['user_session'].user, assignment=a) # only change time_first_download if it concerns a predecessor's work! if work == 'pre' and a.time_first_download == DEFAULT_DATE: a.time_first_download = timezone.now() a.time_last_download = timezone.now() a.save() return response else: # check whether file name is on "required files" list fl = a.leg.file_list() rf = False for f in fl: if f['name'] == file_name: rf = f if not rf: raise ValueError('Unknown file name: %s' % file_name) # find the corresponding upload pul = pul.filter(file_name=rf['name']) if not pul: raise ValueError('File "%s" not found' % rf['name']) pu = pul.first() # the real file name should not be known to the user real_name = os.path.join(upl_dir, os.path.basename(pu.upload_file.name)) ext = os.path.splitext(pu.upload_file.name)[1] # the formal name is the requested file field plus the document's extension formal_name = os_path(os.path.join(temp_dir, pu.file_name) + ext) if is_instructor: log_message( 'Copying %s "as is" to ZIP as %s' % (real_name, formal_name), context['user']) # NOTE: for instructors, do NOT anonymize the document copy2(real_name, formal_name) else: # strip author data from the file log_message( 'Copy-cleaning %s to %s' % (real_name, formal_name), context['user']) clear_metadata(real_name, formal_name) mime = { '.pdf': 'application/pdf', '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation' } w = FileWrapper(file(settings.LEADING_SLASH + formal_name, 'rb')) response = HttpResponse(w, content_type=mime[ext]) response['Content-Disposition'] = ( 'attachment; filename="%s-%d%s%s"' % (file_name, a.leg.number, a.case.letter, ext)) # always record download in database UserDownload.objects.create(user=context['user_session'].user, assignment=a) # only change time_first_download if it concerns a predecessor's work! if work == 'pre' and a.time_first_download == DEFAULT_DATE: a.time_first_download = timezone.now() a.time_last_download = timezone.now() a.save() # if work is downloaded for the first time by a referee, this should be registered if dwnldr == 'ref': ap = Appeal.objects.filter(review__assignment=a).first() if not ap: raise ValueError('Appeal not found') if ap.time_first_viewed == DEFAULT_DATE: ap.time_first_viewed = timezone.now() ap.save() log_message('First view by referee: ' + unicode(ap), context['user']) return response except Exception, e: report_error(context, e) return render(request, 'presto/error.html', context)