示例#1
0
class Globby_Text_Editor(tk.Frame):
    def __init__(self, parent_widget, settings):
        # some initial values
        # TODO this Values are obsolete since Project_Settings covers them
        # --> self.settings.projects_path
        self.hash_opened_filename = None
        self.opened_filename = None
        self.settings = settings

        self.edit_button_list=[
            {'text':'new page', 'cmd':self.on_new_page,
                'keytxt':'CTRL+n','hotkey':'<Control-n>'},
            {'text':'del page', 'cmd':self.on_del_page,
                'keytxt':'CTRL+n','hotkey':'<DELETE>'} ,
            {'text':'save', 'cmd':self.on_save,
                'keytxt':'CTRL+s','hotkey':'<Control-s>'},
            {'text':'undo', 'cmd':self.on_undo,
                'keytxt':'CTRL+z','hotkey':'<Control-z>'},
            {'text':'redo', 'cmd':self.on_redo,
                'keytxt':'CTRL+y','hotkey':'<Control-y>'}]

        self.syntax_button_list=[
            {'text':'**bold**', 'cmd':self.on_tag_insert, 'open_tag':'**',
                'close_tag':'**','keytxt':'CTRL+b','hotkey':'<Control-b>'},
            {'text':'//italic//', 'cmd':self.on_tag_insert, 'open_tag':'//',
                'close_tag':'//', 'keytxt':'CTRL+i','hotkey':'<Control-i>'},
            {'text':'__underline__', 'cmd':self.on_tag_insert, 'open_tag':'__',
                'close_tag':'__', 'keytxt':'CTRL+u','hotkey':'<Control-u>'},
            {'text':'[Link]', 'cmd':self.on_tag_insert, 'open_tag':'[',
                'close_tag':']', 'keytxt':'CTRL+l','hotkey':'<Control-l>'},
            {'text':'¸¸sub¸¸', 'cmd':self.on_tag_insert, 'open_tag':'¸¸',
                'close_tag':'¸¸', 'keytxt':'CTRL+d','hotkey':'<Control-d>'},
            {'text':'^^upper^^', 'cmd':self.on_tag_insert, 'open_tag':'^^',
                'close_tag':'^^', 'keytxt':'CTRL+q','hotkey':'<Control-q>'},
            {'text':'-~smaller~-', 'cmd':self.on_tag_insert, 'open_tag':'-~',
                'close_tag':'~-', 'keytxt':'CTRL+w','hotkey':'<Control-w>'},
            {'text':'+~bigger~+', 'cmd':self.on_tag_insert, 'open_tag':'+~',
                'close_tag':'~+', 'keytxt':'CTRL+e','hotkey':'<Control-e>'},
            {'text':'~~strike_thru~~', 'cmd':self.on_tag_insert, 'open_tag':'~~',
                'close_tag':'~~', 'keytxt':'CTRL+t','hotkey':'<Control-t>'} ]

        # build Widgets
        tk.Frame.__init__(self, parent_widget)
        self.pack(fill=tk.BOTH, expand=tk.YES)

        #self.baseframe = tk.Frame(parent_widget)
        #self.baseframe.pack(fill=tk.BOTH, expand=tk.YES)
        self.editor()
        self.button_frame()

        # start tracking text changes inside the editfield
        thread.start_new_thread(self.on_txt_changes, ('',))



    def editor(self):
        """ combine some Widgets to an enhanced editor (incl. Scrollbar)

        --> self.text
                the text widget itself

        --> self.opened_file_label
                Label on top of the editfield to show the name of the current
                opened File
                It can be used to show textchanges
        """
        # build widgets
        self.txtfrm = tk.Frame(self)
        self.txtfrm.pack(fill=tk.BOTH, side=tk.LEFT, expand=tk.YES)
        self.opened_file_label = tk.Label(self.txtfrm, text="No File chosen")
        self.opened_file_label.pack(fill=tk.X)
        self.text = ScrolledText(self.txtfrm, bg="white",
                                undo=1, maxundo=30,
                                wrap=tk.WORD)
        self.text.pack(fill=tk.BOTH, expand=tk.YES, side=tk.LEFT)
        self.text.insert(1.0, u"Please open a File to edit")

        # build first(reference -- new name??) hash for comparison on changes
        self.hash_opened_filename = hash(self.text.get(1.0,tk.END))

        # Set focus on textwidget and move cursor to the upper left
        self.text.focus_set()
        self.text.mark_set(tk.INSERT, '0.0')      # goto line
        self.text.see(tk.INSERT)                  # scroll to line


    def label_button_row(self, parent_widget=None,
                            btnlst=None, start_count=0):
        """Build a 2 column table with a label beside each button in a row.
        Bind a keyboard sequence to the button command.
        Display this keyboard sequence on the label.

        todo:
            - think about a parameter for the widget to bind the Hotkeys
            - rename to: labled_button_row, draw_labled_button_row

        Parameter:
        --> parent_widget:
                Parent widget to place the table

        --> btnlst:
                Type: List of dicts representing a button
                Example:
                    {'text':'**bold**',     # displayed on the Button (string)
                    'cmd':self.on_tag_insert,   # command
                    'open_tag':'**',        # chars representing the beginning
                                            # of a tag for inserting (string)
                    'close_tag':'**',       # chars representing the end
                                            # of a tag for inserting (string)
                    'keytxt':'CTRL+b',      # displayed on the Label (string)
                    'hotkey':'<Control-b>'} # keyboard sequence (string)
                Note:
                    The existence of 'open_tag' and 'close_tag' in btnlst
                    decides which command is bound to the Button.
                    If they aren't there 'cmd' must be a function without
                    parameters!!!
                    otherwise 'cmd' needs following parameters:
                        otag = btn['open_tag']
                        ctag = btn['close_tag']
                        event = None  # Placeholder for a keysequence

        --> start_count:
                Type: int

                Description:
                    The table is relized with tkinter grid layout manager.
                    start_count is used if there is already a grid
                    (with a Label beside a button).
                    start_count can add the automatic genrated
                    buttons under the existing.
                    In Globby_Editor it is used to put a label_button_row
                    under a Tkinter menubutton(file choose, headlines).
        """
        i = start_count
        for btn in btnlst:
            try:
                otag = btn['open_tag']
                ctag = btn['close_tag']
                event = None
                doit = lambda e=event, o=otag, c=ctag:self.on_tag_insert(e,o,c)
                tk.Button(parent_widget, text=btn['text'], command=doit,
                        relief=tk.RIDGE
                        ).grid(column=0, row=i, sticky=tk.W+tk.E)
                self.text.bind(btn['hotkey'],doit)
            except KeyError:
                tk.Button(parent_widget, text=btn['text'], command=btn['cmd'],
                        relief=tk.RIDGE
                        ).grid(column=0, row=i, sticky=tk.W+tk.E)
            tk.Label(parent_widget, text=btn['keytxt'], relief=tk.FLAT
                ).grid(column=1, row=i, sticky=tk.W)
            i +=1


    def button_frame(self):
        """draws a frame to hold a edit- and syntax-buttons under each other
        """
        self.btnfrm = tk.Frame(self)
        self.btnfrm.pack(fill=tk.BOTH, side=tk.LEFT)
        self.edit_buttons()
        self.syntax_buttons()


    def edit_buttons(self):
        """draws a frame with buttons for editing (save, undo, redo, open)
        """

        # genrate a labelframe
        self.efrm = tk.LabelFrame(self.btnfrm, text="Edit Buttons")
        self.efrm.pack(fill=tk.BOTH, padx=5, pady=5)

        # generate a button with a pulldown menue to open a file to edit
        self.file_open_mbtn = tk.Menubutton(self.efrm, text='Open File')
        # generate the pulldown menue
        self.file_open_menu = tk.Menu(self.file_open_mbtn,
                                        postcommand=self.gen_file2edit_menu)
        # bind the pulldown menue to the menubutton
        self.file_open_mbtn.config(menu=self.file_open_menu, relief=tk.RIDGE)


        self.file_open_mbtn.grid(column=0,row=0, sticky=tk.W+tk.E)

        # label beside the Button to display the associated keyboard shortcut
        self.file_open_lbl = tk.Label(self.efrm, text='CTRL+o', relief=tk.FLAT)
        self.file_open_lbl.grid(column=1, row=0, sticky=tk.W+tk.E)


        # generate buttons as described in self.edit_button_list
        self.label_button_row(self.efrm, self.edit_button_list, 2)


        # bind keyboard shortcut to the menue
        self.text.bind('<Control-o>',
                lambda e: self.file_open_menu.tk_popup(e.x_root, e.y_root))


    def gen_file2edit_menu(self):
        """generates a (new) menu bound to the file chooser button
        so every time when a project is created or deleted
        gen_choose_project_menu should be called
        """
        # delete all existing menue entrys
        self.file_open_menu.delete(0,tk.END)
        proj_path = os.path.join(self.settings.projects_path,
                                self.settings.current_project )
        print "proj_path", proj_path
        for this_file in os.listdir(proj_path):
            splitted = os.path.splitext(this_file)
            if splitted[1] == ".txt" and splitted[0] != "menue":
                #print "this_file",this_file
                open_file = os.path.join(proj_path, this_file)
                do_it = lambda bla = open_file:self.on_open(bla)
                self.file_open_menu.add_command(label=splitted, command=do_it)




    def syntax_buttons(self):
        """draws a frame with buttons for insert (wiki)markup

        idea: new parameter for on_tag_insert()
            jump_in_between=True/False so a pulldown list for different levels
            of headlines arn't necessary
        """

        # genrate a labelframe
        self.sfrm = tk.LabelFrame(self.btnfrm, text="Syntax Buttons")
        self.sfrm.pack(fill=tk.BOTH, padx=5, pady=5)

        # generate a button with a pulldown menue für headline Syntax
        self.headln_menubtn = tk.Menubutton(self.sfrm, text='= Headlines =')
        # generate the pulldown menue
        self.headln_menu = tk.Menu(self.headln_menubtn)
        # bind the pulldown menue to the menubutton
        self.headln_menubtn.config(menu=self.headln_menu, relief=tk.RIDGE)
        # generate menue entrys
        i=1
        for entry in ('h1','h2','h3','h4','h5','h6'):
            otag = '\n\n'+'='*i+' '
            ctag = ' '+'='*i+'\n\n'
            doit = lambda event=None, o=otag, c=ctag:self.on_tag_insert(event,o,c)
            self.headln_menu.add_command(label=entry, command=doit)
            i+=1
        self.headln_menubtn.grid(column=0,row=0, sticky=tk.W+tk.E)

        # label beside the Button to display the associated keyboard shortcut
        self.headln_lbl = tk.Label(self.sfrm, text='CTRL+h', relief=tk.FLAT)
        self.headln_lbl.grid(column=1, row=0, sticky=tk.W+tk.E)

        # generate buttons as described in self.edit_button_list
        self.label_button_row(self.sfrm, self.syntax_button_list, 1)

        # bind keyboard shortcut to the menue
        self.text.bind('<Control-h>',
                lambda e: self.headln_menu.tk_popup(e.x_root, e.y_root))


    def on_txt_changes(self, dummy_value=tk.NONE):
        """ tracks text changes inside the editfield by comparing hash values
        new name: visualize_txt_changes???
        """
        while True:
            new_hash = hash(self.text.get(1.0, tk.END))
            if new_hash != self.hash_opened_filename:
                #print "changes"
                self.opened_file_label.configure(fg="red")
            else:
                #print "no changes"
                self.opened_file_label.configure(fg="black")
            sleep(0.2)


    def on_open(self, file_to_open=None):
        """- opens a *.txt file from project folder
        - generates a reference hash.
        - Brings the cursor to the upper left and show this position
          in the textfield

        Parameter:
        --> file_to_open:
                complete path for file to open
        idea:
            - rename file_to_open to openfile or file_to_open
        """
        self.opened_file_to_open = file_to_open
        self.opened_file_label.configure(text=file_to_open)
        self.text.delete(1.0, tk.END)

        self.opened_filename = os.path.basename(file_to_open)


        # write file content into the editfield
        editfile = codecs.open(file_to_open,'r', 'utf-8')
        self.text.insert(1.0, editfile.read())
        editfile.close()

        # generate reference hash for a comparison to track text changes
        self.hash_opened_filename = hash(self.text.get(1.0,tk.END))

        self.text.edit_reset()                  # clear tk's undo/redo stacks
        self.text.focus_set()                   # focus to textfield
        self.text.mark_set(tk.INSERT, '0.0')    # place cursor to upper left
        self.text.see(tk.INSERT)                # and display this line


    def on_save(self):
        """ Safes the current edited file"""
        if self.opened_filename:
            print "on_safe_"
            print "  self.opened_filename",self.opened_filename

            self.hash_opened_filename = hash(self.text.get(1.0,tk.END))


            path_to_safe_file = os.path.join(self.settings.projects_path,
                                    self.settings.current_project,
                                    self.opened_filename)

            safefile = codecs.open(path_to_safe_file,'w', 'utf-8')
            safefile.write(self.text.get(1.0,tk.END))
            safefile.close()
            self.text.edit_reset()        #clear tk's undo/redo stacks
        else:
            showinfo('Globby Text Editor','No File to save \n\n'
                    'You need to choose a File before editing')


    def on_undo(self):
        try:                                    # tk8.4 keeps undo/redo stacks
            self.text.edit_undo( )              # exception if stacks empty
        except tk.TclError:
            showinfo('Globby Text Editor', 'Nothing to undo')


    def on_redo(self):
        print "redo"
        try:                                  # tk8.4 keeps undo/redo stacks
            self.text.edit_redo()             # exception if stacks empty
        except tk.TclError:
            showinfo('Globby Text Editor', 'Nothing to redo')


    def on_new_page(self):
        """ Ask the user to name the new File, create a blank File and load it
        into the Editorwidget

        TODO:   check if file with the new filename allready exists
                check if Filename contains Specialchars
        """
        print "on_new_page"
        nfile_name = tkSimpleDialog.askstring("New File Name",
                                    "Fill in a new File Name")
        proj_path = os.path.join(self.settings.projects_path,
                                self.settings.current_project)
        nfile_name = os.path.join(proj_path, nfile_name.strip()+'.txt')
        nfile = codecs.open(nfile_name, 'w', 'utf-8')

        current_project = self.settings.current_project
        infostring1 = u'# Diese Datei wurde automatisch mit '
        infostring2 = u'dem Projekt "%s" erstellt' % current_project
        nfile.write(infostring1+infostring2 )
        nfile.close()

        self.on_open(nfile_name)

    def on_del_page(self):
        """"""
        print "del page"
        # self.settings.current_project
        del_file = os.path.join(self.settings.projects_path,
                                    self.settings.current_project,
                                    self.opened_filename)

        del_page = askyesno("Do you really want to delete ", del_file)

        if del_page:
            #self.set_project(self.new_project_name)
            print "%s geloescht" % del_file
            os.remove(del_file)


    def on_tag_insert(self, event=None, open_tag=None, close_tag=None):
        """ inserts a (wiki)tag to the current cursor position.

        If there is no text marked in the editfield, open_tag and close_tag
        are inserted to the current cursor position behind each other and the
        cursor jumps in between.
        Otherwise the marked string is enclosed by open_tag and close_tag and
        inserted to the current cursor position. Here the new cursor position
        is right behind the complete inserted string with tags.

        At this moment this behavior is quite buggy :-(

        idea:
            - new parameter for on_tag_insert()
              jump_in_between=True/False so a pulldown list for different levels
              of headlines arn't necessary
            - rename to: on_insert_tag?? on_tag_insert

        Parameter:
        --> event                       # keyboard shortcut
        --> open_tag                    # string
        --> close_tag                   # string

        """
        #print 'event',event
        #print 'open_tag',open_tag
        #print 'close_tag',close_tag

        ## when no String is selected:
        if not self.text.tag_ranges(tk.SEL):
            print "no String is selected"
            insert_point = self.text.index('insert')
            insertline = insert_point.split('.')[0]
            addit = 1
            if event != None:
                print "event not None"
                addit = 2
            insertrow = str(int(insert_point.split('.')[1])+len(open_tag)+addit)
            new_insert_point = insertline+'.'+ insertrow
            self.text.insert(insert_point, open_tag+''+close_tag)
            # place cursor to insert_point
            self.text.mark_set(tk.INSERT, new_insert_point)
            # display this position on the editfield
            self.text.see(tk.INSERT)

        ## when a String is selected:
        else:
            #print "im else"
            marked_text = self.text.get(self.text.index(tk.SEL_FIRST),
                                        self.text.index(tk.SEL_LAST))
            replace_index = self.text.index(tk.SEL_FIRST)
            print "replace_index in selected", replace_index
            self.text.delete(self.text.index(tk.SEL_FIRST),
                            self.text.index(tk.SEL_LAST))
            self.text.insert(replace_index, open_tag+marked_text+close_tag)
示例#2
0
class ClustererGui(ttk.Frame):
    """GUI to open/save xml/text-files and visualize clustering."""
    def __init__(self, master=None):
        """Init GUI - get auto-split-sentences-option and standard test-file-folder from config-file."""
        ttk.Frame.__init__(self, master)
        self.grid(sticky=tk.N + tk.S + tk.E + tk.W)

        self.createWidgets()
        self.filepath = None
        self.xml_filepath = None
        self.filename = None
        self.article_id = None
        self.extraction = None
        self.author_no = None
        self.correct = None
        self.result = None
        self.colors = []

        config = ConfigParser.ConfigParser()
        config.read("config.cfg")
        params = dict(config.items("params"))
        article_dir = params['test_file_dir']
        self.auto_split_sentences = bool(int(params['auto_split_sentences']))
        self.show_knee_point = bool(int(params['show_knee_point']))
        self.show_knee_point = False  # currently not supported in GUI-mode
        self.last_dir = article_dir

    def createWidgets(self):
        """Organize GUI."""
        top = self.winfo_toplevel()
        top.rowconfigure(0, weight=1)
        top.columnconfigure(0, weight=1)

        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=0)
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=0)

        left_frame = ttk.Frame(self, relief="raised", borderwidth=1)
        left_frame.grid(row=0, column=0, sticky=tk.N + tk.S + tk.E + tk.W)
        left_frame.rowconfigure(0, weight=0)
        left_frame.rowconfigure(1, weight=1)
        left_frame.columnconfigure(0, weight=1)

        buttons_topleft = ttk.Frame(left_frame)
        buttons_topleft.grid(row=0, column=0)

        self.choose_file_btn = ttk.Button(buttons_topleft,
                                          text='choose file...',
                                          command=self.choose_file)
        self.choose_file_btn.grid(row=0, column=0)

        self.save_file_btn = ttk.Button(buttons_topleft,
                                        text='save file...',
                                        command=self.save_file)
        self.save_file_btn.grid(row=0, column=1)

        self.extract_feat_btn = ttk.Button(
            buttons_topleft,
            text='process',
            command=self.start_featureextr_thread)
        self.extract_feat_btn.grid(row=0, column=2)

        right_frame = ttk.Frame(self)
        right_frame.grid(row=0, column=1, sticky=tk.N + tk.S + tk.E + tk.W)
        right_frame.rowconfigure(0, weight=1)
        right_frame.rowconfigure(1, weight=1)

        self.distr_entry = ScrolledText(right_frame, width=30, height=30)
        self.distr_entry.grid(row=0, column=0, columnspan=2, sticky=tk.N)

        self.test_entry = ScrolledText(right_frame, width=30)
        self.test_entry.grid(row=1, column=0, columnspan=2, sticky=tk.N)

        self.scrolledText = ScrolledText(left_frame, undo=True, wrap=tk.WORD)
        self.scrolledText['font'] = ('Helvetica', '12')
        self.scrolledText.tag_configure('lines',
                                        background="#dddddd",
                                        foreground="black",
                                        font=('Helvetica', 9))
        self.scrolledText.tag_configure('blanks',
                                        background="#ffffff",
                                        foreground="black",
                                        font=('Helvetica', 9))
        self.scrolledText.grid(row=1,
                               column=0,
                               sticky=tk.N + tk.S + tk.E + tk.W)

        status_bar = ttk.Frame(self)
        status_bar.grid(row=1, column=0, columnspan=2, sticky=tk.W)
        status_bar.columnconfigure(0, weight=1, minsize=100)
        status_bar.columnconfigure(1, weight=1)

        self.status = tk.StringVar()
        self.status.set("ready")
        self.status_label = ttk.Label(status_bar, textvariable=self.status)
        self.status_label.grid(row=0, column=1, padx=10)

        self.progressbar = ttk.Progressbar(status_bar,
                                           mode='indeterminate',
                                           length=200)
        self.progressbar.grid(row=0, column=0, padx=3)

    def choose_file(self):
        """Choose text or xml file dialog."""
        self.filepath = askopenfilename(initialdir=self.last_dir,
                                        filetypes=(("text and xml files",
                                                    ("*.txt", "*.xml")), ))
        if self.filepath:
            ext = os.path.splitext(self.filepath)[1]

            if ext == ".xml":
                '''save raw-text of xml-file to a new file and print it'''
                self.xml_filepath = self.filepath
                self.filepath = self.create_text_fromXML()

            base = os.path.split(self.filepath)[0]
            self.article_id = os.path.split(base)[1]
            self.filename = os.path.split(self.filepath)[1]
            self.scrolledText.delete(1.0, tk.END)
            self.print_raw_text()
            self.scrolledText.edit_reset()

    def create_text_fromXML(self):
        """Create text-file out of given xml-file."""
        new_filepath = os.path.splitext(self.filepath)[0] + ".txt"
        with codecs.open(self.filepath, 'r', 'UTF-8') as xml_file:
            xml_tree = etree.parse(xml_file)

        with codecs.open(new_filepath, 'w', 'UTF-8') as newFile:
            first_entry = True
            for entry in xml_tree.getroot():
                if entry.text is not None:
                    if not first_entry:
                        newFile.write("\n\n")
                    else:
                        first_entry = False
                    newFile.write(entry.text)
        return new_filepath

    def save_file(self):
        """Save text-file-dialog."""
        text = self.scrolledText.get("0.0", tk.END)
        if self.filepath is None:
            name = asksaveasfilename(initialdir=self.last_dir,
                                     defaultextension=".txt")
            if name:
                self.filepath = name
            else:
                return
        try:
            with codecs.open(self.filepath, 'w', 'UTF-8') as newFile:
                newFile.write(text.strip())
            self.scrolledText.edit_reset()
            base = os.path.split(self.filepath)[0]
            self.article_id = os.path.split(base)[1]
            self.filename = os.path.split(self.filepath)[1]
            return True
        except Exception:  # as e:
            raise

    def start_featureextr_thread(self):
        """Start thread for feature extraction."""
        self.distr_entry.delete(1.0, tk.END)
        self.status.set("processing...")
        if self.filepath is None or self.article_id is None:
            tkMessageBox.showwarning("Save File",
                                     "Save file for feature extraction.")
            return
        try:
            self.scrolledText.edit_undo()
            self.scrolledText.edit_redo()

            tkMessageBox.showwarning("File changed",
                                     "File was changed, please save.")
            return
        except tk.TclError:
            self.extraction = clusterer.Clusterer(self.article_id,
                                                  self.filepath,
                                                  self.xml_filepath,
                                                  self.auto_split_sentences,
                                                  self.show_knee_point)

            self.ftr_extr_thread = threading.Thread(
                target=self.extract_features)
            self.ftr_extr_thread.daemon = True
            self.progressbar.start()
            self.ftr_extr_thread.start()
            self.after(1000, self.check_feat_thread)

    def check_feat_thread(self):
        """Check if feature extraction thread is still working - if not: visualize cluster-results."""
        if self.ftr_extr_thread.is_alive():
            self.after(1000, self.check_feat_thread)
        else:
            self.status.set("ready")

            # generate author-colormap
            self.colors = [None] * len(set(self.clusters))
            for k in set(self.clusters):
                temp_color = plt.cm.spectral(
                    np.float(k) / (np.max(self.clusters) + 1))
                if k == 0:
                    temp_color = plt.cm.spectral(0.05)
                self.colors[k] = self.convert_to_hex(temp_color)
            self.configure_colors()

            self.progressbar.stop()
            self.print_author_distr()
            self.print_text()
            if self.correct is not None and self.author_no is not None:
                self.test_entry.delete(1.0, tk.END)
                s = "authors found: {}".format(len(set(self.clusters)))
                s += "\n believe-score: {:.4f}".format(self.believe_score)
                s += "\n\n true number of authors: {}".format(self.author_no)
                s += "\n precision: {:.4f}".format(self.scores[0])
                s += "\n recall: {:.4f}".format(self.scores[1])
                s += "\n f1-score: {:.4f}".format(self.scores[2])
                s += "\n adjusted-rand-index: {:.4f}".format(self.scores[3])
                self.test_entry.insert(tk.INSERT, s)
            else:
                self.test_entry.delete(1.0, tk.END)
                s = "authors found: {}".format(len(set(self.clusters)))
                s += "\n believe-score: {:.4f}".format(self.believe_score)
                self.test_entry.insert(tk.INSERT, s)

    def extract_features(self):
        """Start feature extraction."""
        self.clusters, self.result, self.author_no, self.believe_score, self.scores = self.extraction.calc_cluster(
        )

        if self.result is not None:
            c = Counter(self.result)
            self.correct = c[True] / sum(c.values()) * 100

    def print_text(self):
        """Print raw text with specified author-colors."""
        self.scrolledText.delete(1.0, tk.END)
        f = open(self.filepath)

        line_number = 0
        actual_line_number = 0
        for line in f:
            actual_line_number += 1
            if line.strip():
                s = str(line_number) + ' ' + str(
                    self.clusters[line_number]) + ' ' + line
                s = line
                line_cluster = str(line_number) + ' ' + str(
                    self.clusters[line_number]) + ' '
                line_cluster = ('{:^' + str(14 - len(line_cluster)) +
                                '}').format(line_cluster)
                self.scrolledText.insert(tk.INSERT, line_cluster, 'lines')
                try:
                    self.scrolledText.insert(tk.INSERT, s,
                                             str(self.clusters[line_number]))
                    # if self.result[line_number]:
                    #     # correct assignment - print text foreground in white
                    #     self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]))
                    # else:
                    #     # false assignment - print text foreground in black
                    #     self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]*10**2))
                except IndexError:
                    self.scrolledText.insert(tk.INSERT, s)
                except TypeError:
                    self.scrolledText.insert(tk.INSERT, s,
                                             str(self.clusters[line_number]))
                line_number += 1
            else:
                s = line
                self.scrolledText.insert(tk.INSERT, s, 'blanks')
        f.close()

    def print_raw_text(self):
        """Print raw text."""
        f = open(self.filepath)
        for line in f:
            self.scrolledText.insert(tk.INSERT, line)
        f.close()

    def get_distribution(self, l=None):
        """Return Counter with author distribution in percent."""
        if l is None:
            l = self.clusters
        counter = Counter(l)
        sum_counter = sum(counter.values())

        for key in counter.iterkeys():
            counter[key] = counter[key] / sum_counter * 100
        return counter

    def print_author_distr(self):
        """Print author distribution with specified author-colors."""
        self.distr_entry.delete(1.0, tk.END)
        distr = self.get_distribution(self.clusters)

        for index, count in distr.most_common():
            author_i = "author " + str(index) + "{:>20}%\n".format(
                locale.format(u'%.2f', count))
            self.distr_entry.insert(tk.INSERT, author_i, str(index))

    def convert_to_hex(self, col):
        """Convert inter-tuple to hex-coded string."""
        red = int(col[0] * 255)
        green = int(col[1] * 255)
        blue = int(col[2] * 255)
        return '#{r:02x}{g:02x}{b:02x}'.format(r=red, g=green, b=blue)

    def configure_colors(self):
        """Configure author-specific colors for author-distribution and cluster-results."""
        for i, c in enumerate(self.colors):
            self.scrolledText.tag_configure(str(i),
                                            background=c,
                                            foreground="white")
            self.distr_entry.tag_configure(str(i),
                                           background=c,
                                           foreground="white")
class ClustererGui(ttk.Frame):
    """GUI to open/save xml/text-files and visualize clustering."""

    def __init__(self, master=None):
        """Init GUI - get auto-split-sentences-option and standard test-file-folder from config-file."""
        ttk.Frame.__init__(self, master)
        self.grid(sticky=tk.N+tk.S+tk.E+tk.W)

        self.createWidgets()
        self.filepath = None
        self.xml_filepath = None
        self.filename = None
        self.article_id = None
        self.extraction = None
        self.author_no = None
        self.correct = None
        self.result = None
        self.colors = []

        config = ConfigParser.ConfigParser()
        config.read("config.cfg")
        params = dict(config.items("params"))
        article_dir = params['test_file_dir']
        self.auto_split_sentences = bool(int(params['auto_split_sentences']))
        self.show_knee_point = bool(int(params['show_knee_point']))
        self.show_knee_point = False # currently not supported in GUI-mode
        self.last_dir = article_dir


    def createWidgets(self):
        """Organize GUI."""
        top=self.winfo_toplevel()
        top.rowconfigure(0, weight=1)
        top.columnconfigure(0, weight=1)

        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=0)
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=0)

        left_frame = ttk.Frame(self, relief="raised", borderwidth=1)
        left_frame.grid(row=0, column=0, sticky=tk.N+tk.S+tk.E+tk.W)
        left_frame.rowconfigure(0, weight=0)
        left_frame.rowconfigure(1, weight=1)
        left_frame.columnconfigure(0, weight=1)
        
        buttons_topleft = ttk.Frame(left_frame)
        buttons_topleft.grid(row=0, column=0)

        self.choose_file_btn = ttk.Button(buttons_topleft, text='choose file...',
            command=self.choose_file)
        self.choose_file_btn.grid(row=0, column=0)

        self.save_file_btn = ttk.Button(buttons_topleft, text='save file...',
            command=self.save_file)
        self.save_file_btn.grid(row=0, column=1)
        
        self.extract_feat_btn = ttk.Button(buttons_topleft, text='process',
            command=self.start_featureextr_thread)
        self.extract_feat_btn.grid(row=0, column=2)

        right_frame = ttk.Frame(self)
        right_frame.grid(row=0, column=1, sticky=tk.N+tk.S+tk.E+tk.W)
        right_frame.rowconfigure(0, weight=1)
        right_frame.rowconfigure(1, weight=1)
        
        self.distr_entry = ScrolledText(right_frame, width=30, height=30)
        self.distr_entry.grid(row=0, column=0, columnspan=2, sticky=tk.N)

        self.test_entry = ScrolledText(right_frame, width=30)
        self.test_entry.grid(row=1, column=0, columnspan=2, sticky=tk.N)

        self.scrolledText = ScrolledText(left_frame, undo=True, wrap=tk.WORD)
        self.scrolledText['font'] = ('Helvetica', '12')
        self.scrolledText.tag_configure('lines', background="#dddddd", foreground="black", font=('Helvetica', 9))
        self.scrolledText.tag_configure('blanks', background="#ffffff", foreground="black", font=('Helvetica', 9))        
        self.scrolledText.grid(row=1, column=0, sticky=tk.N+tk.S+tk.E+tk.W)

        status_bar = ttk.Frame(self)
        status_bar.grid(row=1, column=0, columnspan=2, sticky=tk.W)
        status_bar.columnconfigure(0, weight=1, minsize=100)
        status_bar.columnconfigure(1, weight=1)

        self.status = tk.StringVar()
        self.status.set("ready")
        self.status_label = ttk.Label(status_bar, textvariable=self.status)
        self.status_label.grid(row=0, column=1, padx=10)

        self.progressbar = ttk.Progressbar(status_bar, mode='indeterminate', length=200)
        self.progressbar.grid(row=0, column=0, padx=3)
    
    def choose_file(self):
        """Choose text or xml file dialog."""
        self.filepath = askopenfilename(initialdir=self.last_dir, filetypes=(("text and xml files", ("*.txt","*.xml")),))
        if self.filepath:
            ext = os.path.splitext(self.filepath)[1]

            if ext == ".xml":
                '''save raw-text of xml-file to a new file and print it'''
                self.xml_filepath = self.filepath
                self.filepath = self.create_text_fromXML()

            base = os.path.split(self.filepath)[0]
            self.article_id = os.path.split(base)[1]
            self.filename = os.path.split(self.filepath)[1]
            self.scrolledText.delete(1.0, tk.END)
            self.print_raw_text()
            self.scrolledText.edit_reset()

    def create_text_fromXML(self):
        """Create text-file out of given xml-file."""
        new_filepath = os.path.splitext(self.filepath)[0] + ".txt"
        with codecs.open(self.filepath, 'r', 'UTF-8') as xml_file:
            xml_tree = etree.parse(xml_file)
        
        with codecs.open(new_filepath, 'w', 'UTF-8') as newFile:
            first_entry = True
            for entry in xml_tree.getroot():
                if entry.text is not None:
                    if not first_entry:
                        newFile.write("\n\n")
                    else:
                        first_entry = False
                    newFile.write(entry.text)
        return new_filepath



    def save_file(self):
        """Save text-file-dialog."""
        text = self.scrolledText.get("0.0", tk.END)
        if self.filepath is None:
            name = asksaveasfilename(initialdir=self.last_dir, defaultextension=".txt")
            if name:
                self.filepath = name
            else:
                return
        try:
            with codecs.open(self.filepath, 'w', 'UTF-8') as newFile:
                newFile.write(text.strip())
            self.scrolledText.edit_reset()
            base = os.path.split(self.filepath)[0]
            self.article_id = os.path.split(base)[1]
            self.filename = os.path.split(self.filepath)[1]
            return True
        except Exception:# as e:
            raise


    def start_featureextr_thread(self):
        """Start thread for feature extraction."""
        self.distr_entry.delete(1.0, tk.END)
        self.status.set("processing...")
        if self.filepath is None or self.article_id is None:
            tkMessageBox.showwarning(
                "Save File",
                "Save file for feature extraction.")
            return
        try:
            self.scrolledText.edit_undo()
            self.scrolledText.edit_redo()

            tkMessageBox.showwarning(
                "File changed",
                "File was changed, please save.")
            return
        except tk.TclError:
            self.extraction = clusterer.Clusterer(self.article_id, self.filepath, self.xml_filepath, self.auto_split_sentences, self.show_knee_point)

            self.ftr_extr_thread = threading.Thread(target=self.extract_features)
            self.ftr_extr_thread.daemon = True
            self.progressbar.start()
            self.ftr_extr_thread.start()
            self.after(1000, self.check_feat_thread)

    def check_feat_thread(self):
        """Check if feature extraction thread is still working - if not: visualize cluster-results."""
        if self.ftr_extr_thread.is_alive():
            self.after(1000, self.check_feat_thread)
        else:
            self.status.set("ready")

            # generate author-colormap
            self.colors = [None]*len(set(self.clusters))
            for k in set(self.clusters):  
                temp_color = plt.cm.spectral(np.float(k) / (np.max(self.clusters) + 1))
                if k == 0:
                    temp_color = plt.cm.spectral(0.05)
                self.colors[k] = self.convert_to_hex(temp_color)
            self.configure_colors()

            self.progressbar.stop()
            self.print_author_distr()
            self.print_text()
            if self.correct is not None and self.author_no is not None:
                self.test_entry.delete(1.0, tk.END)
                s = "authors found: {}".format(len(set(self.clusters)))
                s += "\n believe-score: {:.4f}".format(self.believe_score)
                s += "\n\n true number of authors: {}".format(self.author_no)
                s += "\n precision: {:.4f}".format(self.scores[0])
                s += "\n recall: {:.4f}".format(self.scores[1])
                s += "\n f1-score: {:.4f}".format(self.scores[2])
                s += "\n adjusted-rand-index: {:.4f}".format(self.scores[3])
                self.test_entry.insert(tk.INSERT, s)
            else:
                self.test_entry.delete(1.0, tk.END)
                s = "authors found: {}".format(len(set(self.clusters)))
                s += "\n believe-score: {:.4f}".format(self.believe_score)
                self.test_entry.insert(tk.INSERT, s)

    def extract_features(self):
        """Start feature extraction."""
        self.clusters, self.result, self.author_no, self.believe_score, self.scores = self.extraction.calc_cluster()

        if self.result is not None:
            c = Counter(self.result)
            self.correct = c[True] / sum(c.values()) * 100

    def print_text(self):
        """Print raw text with specified author-colors."""
        self.scrolledText.delete(1.0, tk.END)
        f = open(self.filepath)

        line_number = 0
        actual_line_number = 0
        for line in f:
            actual_line_number += 1
            if line.strip():
                s = str(line_number) + ' '+str(self.clusters[line_number]) + ' '+line
                s = line
                line_cluster = str(line_number) + ' '+str(self.clusters[line_number])+ ' '
                line_cluster = ('{:^'+str(14-len(line_cluster))+'}').format(line_cluster)
                self.scrolledText.insert(tk.INSERT, line_cluster, 'lines')
                try:
                    self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]))
                    # if self.result[line_number]:
                    #     # correct assignment - print text foreground in white
                    #     self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]))
                    # else:
                    #     # false assignment - print text foreground in black
                    #     self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]*10**2))
                except IndexError:
                    self.scrolledText.insert(tk.INSERT, s)
                except TypeError:
                        self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]))
                line_number += 1
            else:
                s = line
                self.scrolledText.insert(tk.INSERT, s, 'blanks')
        f.close()

    def print_raw_text(self):
        """Print raw text."""
        f = open(self.filepath)
        for line in f:
            self.scrolledText.insert(tk.INSERT, line)
        f.close()

    def get_distribution(self, l=None):
        """Return Counter with author distribution in percent."""
        if l is None:
            l = self.clusters
        counter = Counter(l)
        sum_counter = sum(counter.values())

        for key in counter.iterkeys():
            counter[key] = counter[key] / sum_counter * 100
        return counter

    def print_author_distr(self):
        """Print author distribution with specified author-colors."""
        self.distr_entry.delete(1.0, tk.END)
        distr = self.get_distribution(self.clusters)

        for index, count in distr.most_common():
            author_i = "author "+str(index)+"{:>20}%\n".format(locale.format(u'%.2f',count))
            self.distr_entry.insert(tk.INSERT, author_i, str(index))

    def convert_to_hex(self, col):
        """Convert inter-tuple to hex-coded string."""
        red = int(col[0]*255)
        green = int(col[1]*255)
        blue = int(col[2]*255)
        return '#{r:02x}{g:02x}{b:02x}'.format(r=red,g=green,b=blue)

    def configure_colors(self):
        """Configure author-specific colors for author-distribution and cluster-results."""
        for i,c in enumerate(self.colors):
            self.scrolledText.tag_configure(str(i), background=c, foreground="white")            
            self.distr_entry.tag_configure(str(i), background=c, foreground="white")