def handle_page_added(project, added_version, page_path, page_simple_path): page = Page.objects.create(project=project, path=page_simple_path, current_version=added_version) print "created page '%s' for path '%s' at version '%s'" % ( page.slug, page.path, added_version.revision) page_change = PageChange.objects.create(version=added_version, page=page, action='added') print "created page_change '%s' for page '%s'" % ( page_change.action, page_change.page.path, ) file_handler = get_file_handler(page) page_content = open(page_path).read() paragraphs = file_handler.split(page_content, file_name=page_path) for ordinal, paragraph_content in enumerate(paragraphs): paragraph = Paragraph.objects.create(page=page, current_version=added_version, original=paragraph_content, ordinal=ordinal + 1) # print "created paragraph '%s' for page '%s'" % (paragraph.ordinal, paragraph.page.path, ) page.check() print
def check(self): file_handler = get_file_handler(self) paragraphs_in_db = Paragraph.objects.filter(page = self, current_version = self.current_version).order_by('ordinal') for i, paragraph in enumerate(paragraphs_in_db): if i+1 != paragraph.ordinal: raise ValidationError, u"page '%s' ordinals didn't continuous in db\n\n%s\n\n" % (self.path, ",".join(str(o) for o in paragraphs_in_db.values_list('ordinal', flat=True))) page_content = open(os.path.join(self.project.doc_path, self.path)).read() file_handler.compare(page_content, file_handler.concat(paragraphs_in_db), self)
def write_page(self): exprot_dir = os.path.join(settings.DOCS_DIR, self.project.slug, settings.TRANSLATION_LANGUAGE) paragraph_objects = self.current_paragraphs.order_by('ordinal') page_file = open(os.path.join(exprot_dir, self.path), 'w') translations =[p.original_or_translation for p in paragraph_objects] file_handler = get_file_handler(self) file_content = file_handler.concat_translations(translations) file_content = file_content.encode('utf-8') page_file.write(file_content) page_file.close()
def write_page(self): exprot_dir = os.path.join(settings.DOCS_DIR, self.project.slug, settings.TRANSLATION_LANGUAGE) paragraph_objects = self.current_paragraphs.order_by('ordinal') page_file = open(os.path.join(exprot_dir, self.path), 'w') translations = [p.original_or_translation for p in paragraph_objects] file_handler = get_file_handler(self) file_content = file_handler.concat_translations(translations) file_content = file_content.encode('utf-8') page_file.write(file_content) page_file.close()
def handle_page_added(project, added_version, page_path, page_simple_path): page = Page.objects.create(project = project, path = page_simple_path, current_version = added_version) print "created page '%s' for path '%s' at version '%s'" % (page.slug, page.path, added_version.revision) page_change = PageChange.objects.create(version = added_version, page = page, action = 'added') print "created page_change '%s' for page '%s'" % (page_change.action, page_change.page.path, ) file_handler = get_file_handler(page) page_content = open(page_path).read() paragraphs = file_handler.split(page_content, file_name = page_path) for ordinal, paragraph_content in enumerate(paragraphs): paragraph = Paragraph.objects.create(page = page, current_version = added_version, original = paragraph_content, ordinal = ordinal + 1) # print "created paragraph '%s' for page '%s'" % (paragraph.ordinal, paragraph.page.path, ) page.check() print
def check(self): file_handler = get_file_handler(self) paragraphs_in_db = Paragraph.objects.filter( page=self, current_version=self.current_version).order_by('ordinal') for i, paragraph in enumerate(paragraphs_in_db): if i + 1 != paragraph.ordinal: raise ValidationError, u"page '%s' ordinals didn't continuous in db\n\n%s\n\n" % ( self.path, ",".join( str(o) for o in paragraphs_in_db.values_list('ordinal', flat=True))) page_content = open(os.path.join(self.project.doc_path, self.path)).read() file_handler.compare(page_content, file_handler.concat(paragraphs_in_db), self)
def handle_page_modified(project, before_version, modified_version, page_path, page_simple_path): page = Page.objects.get(project=project, path=page_simple_path, current_version=before_version) print "found page '%s' for path '%s' at before_version '%s'" % ( page.slug, page.path, page.current_version.revision) page.current_version = modified_version page.save() page_change = PageChange.objects.create(version=modified_version, page=page, action='modified') print "created page_change '%s' for page '%s'" % ( page_change.action, page_change.page.path, ) file_handler = get_file_handler(page) page_content = open(page_path).read() paragraphs = file_handler.split(page_content, file_name=page_path) paragraphs_list = [] matched_paragraphs = [] for ordinal, paragraph_content in enumerate(paragraphs): N = paragraphs[:ordinal].count(paragraph_content) # print 'N %s' % (N) try: history_paragraph = Paragraph.objects.filter( page=page, current_version=before_version, original=paragraph_content)[N] if history_paragraph not in matched_paragraphs: # print 'found history_paragraph %s' % (history_paragraph.id) matched_paragraphs.append(history_paragraph) else: history_paragraph = None except IndexError: history_paragraph = None paragraphs_list.append( [paragraph_content, ordinal + 1, history_paragraph, False]) #(paragraph_content, ordinal+ 1, history_paragraph, 'handled?') print 'len(paragraphs_list) %s' % len( [p for p in paragraphs_list if not p[3]]) for i, p in enumerate(paragraphs_list): if p[2] and not p[3]: print 'updating history_paragraph %s' % (p[2].id) p[2].ordinal = p[1] p[2].current_version = modified_version p[2].save() p[3] = True print 'len(paragraphs_list) %s' % len( [p for p in paragraphs_list if not p[3]]) for i, p in enumerate(paragraphs_list): if not p[3]: paragraph_content_set = set( line.strip() for line in p[0].decode('utf-8').splitlines()) for p_in_db in Paragraph.objects.filter( page=page, current_version=before_version, modified_paragraph=None): p_in_db_content_set = set( line.strip() for line in p_in_db.original.splitlines()) similarity = 1.0 * len(paragraph_content_set & p_in_db_content_set) / len( paragraph_content_set) if similarity > min_similarity: paragraph = Paragraph.objects.create( page=page, current_version=modified_version, original=p[0], ordinal=p[1], history_paragraph=p_in_db) history_translation = p_in_db.latest_translation if history_translation: new_translation = Translation.objects.create( paragraph=paragraph, history_translation=history_translation, translator=history_translation.translator, content=history_translation.content, ip=history_translation.ip, ) print 'similarity %s' % similarity print 'paragraph %s(ord:%s) found history_paragraph %s(ord:%s)' % ( paragraph.id, paragraph.ordinal, paragraph.history_paragraph.id, paragraph.history_paragraph.ordinal) page_change.paragraphs.add(paragraph) p[3] = True break print 'len(paragraphs_list) %s' % len( [p for p in paragraphs_list if not p[3]]) for i, p in enumerate(paragraphs_list): if not p[3]: paragraph = Paragraph.objects.create( page=page, current_version=modified_version, original=p[0], ordinal=p[1]) page_change.paragraphs.add(paragraph) p[3] = True unhandled_length = len([p for p in paragraphs_list if not p[3]]) print 'len(paragraphs_list) %s' % unhandled_length if unhandled_length != 0: raise ValidationError, u"paragraphs %s didn't handled " % (u",".join( [str(p[1]) for p in paragraphs_list if not p[3]]), ) paragraphs_count_in_db = Paragraph.objects.filter( page=page, current_version=modified_version).count() if len(paragraphs) != paragraphs_count_in_db: raise ValidationError, u"paragraphs count in file '%s' diff from count in db %s " % ( len(paragraphs), paragraphs_count_in_db) page.check() print
def handle_page_modified(project, before_version, modified_version, page_path, page_simple_path): page = Page.objects.get(project = project, path = page_simple_path, current_version = before_version) print "found page '%s' for path '%s' at before_version '%s'" % (page.slug, page.path, page.current_version.revision) page.current_version = modified_version page.save() page_change = PageChange.objects.create(version = modified_version, page = page, action = 'modified') print "created page_change '%s' for page '%s'" % (page_change.action, page_change.page.path, ) file_handler = get_file_handler(page) page_content = open(page_path).read() paragraphs = file_handler.split(page_content, file_name = page_path) paragraphs_list = [] matched_paragraphs = [] for ordinal, paragraph_content in enumerate(paragraphs): N = paragraphs[:ordinal].count(paragraph_content) # print 'N %s' % (N) try: history_paragraph = Paragraph.objects.filter(page = page, current_version = before_version, original = paragraph_content)[N] if history_paragraph not in matched_paragraphs: # print 'found history_paragraph %s' % (history_paragraph.id) matched_paragraphs.append(history_paragraph) else: history_paragraph = None except IndexError: history_paragraph = None paragraphs_list.append([paragraph_content, ordinal+ 1, history_paragraph, False]) #(paragraph_content, ordinal+ 1, history_paragraph, 'handled?') print 'len(paragraphs_list) %s' % len([p for p in paragraphs_list if not p[3]]) for i, p in enumerate(paragraphs_list): if p[2] and not p[3]: print 'updating history_paragraph %s' % (p[2].id) p[2].ordinal = p[1] p[2].current_version = modified_version p[2].save() p[3] = True print 'len(paragraphs_list) %s' % len([p for p in paragraphs_list if not p[3]]) for i, p in enumerate(paragraphs_list): if not p[3]: paragraph_content_set = set(line.strip() for line in p[0].decode('utf-8').splitlines()) for p_in_db in Paragraph.objects.filter(page = page, current_version = before_version, modified_paragraph = None): p_in_db_content_set = set(line.strip() for line in p_in_db.original.splitlines()) similarity = 1.0 * len(paragraph_content_set&p_in_db_content_set) / len(paragraph_content_set) if similarity > min_similarity: paragraph = Paragraph.objects.create(page = page, current_version = modified_version, original = p[0], ordinal = p[1], history_paragraph = p_in_db) history_translation = p_in_db.latest_translation if history_translation: new_translation = Translation.objects.create(paragraph = paragraph, history_translation = history_translation, translator = history_translation.translator, content = history_translation.content, ip = history_translation.ip, ) print 'similarity %s' % similarity print 'paragraph %s(ord:%s) found history_paragraph %s(ord:%s)' % (paragraph.id, paragraph.ordinal, paragraph.history_paragraph.id, paragraph.history_paragraph.ordinal) page_change.paragraphs.add(paragraph) p[3] = True break print 'len(paragraphs_list) %s' % len([p for p in paragraphs_list if not p[3]]) for i, p in enumerate(paragraphs_list): if not p[3]: paragraph = Paragraph.objects.create(page = page, current_version = modified_version, original = p[0], ordinal = p[1]) page_change.paragraphs.add(paragraph) p[3] = True unhandled_length = len([p for p in paragraphs_list if not p[3]]) print 'len(paragraphs_list) %s' % unhandled_length if unhandled_length != 0: raise ValidationError, u"paragraphs %s didn't handled " % (u",".join([str(p[1]) for p in paragraphs_list if not p[3]]), ) paragraphs_count_in_db = Paragraph.objects.filter(page = page, current_version = modified_version).count() if len(paragraphs) != paragraphs_count_in_db: raise ValidationError, u"paragraphs count in file '%s' diff from count in db %s " % (len(paragraphs), paragraphs_count_in_db) page.check() print