Tkinter ScrolledText search() function not working beyond the first line

Question:

I’m creating a tkinter application that highlights misspelled words in a ScrolledText() widget. To find the index of the misspelled word I use the check() function. However, when I move to a new line in the scrolled text, the index returned by check() is stuck on the index of the last word in the previous line.

import nltk
from nltk.corpus import words
from nltk.corpus import wordnet
import re
import tkinter as tk
from tkinter.scrolledtext import ScrolledText


nltk.download("words")
nltk.download("wordnet")
w= words.words()+list(wordnet.words())
w.append("typing")
word_set= set(w) #set of correctly spelled words from NLTK corpus
root= tk.Tk()
root.geometry("600x600")
text= ScrolledText(root, font = ("Arial", 14))
old_spaces= 0

def check(self):
    global old_spaces
    global old_lines
    global prev_len
    content= text.get("1.0", tk.END)
    space_count= content.count(" ")

    if space_count != old_spaces:  ##updates ScrolledText tags when spacebar is hit
        old_spaces = space_count
        print(content.split())
        for tag in text.tag_names():  ##resets and deletes all previous tags
            text.tag_delete(tag)


        for word in content.split(" "):
            if  re.sub("[^w]", "", word.lower()) not in word_set: ##checks if word is spelt correctly 

                count_var= tk.StringVar()
                start_pos= text.search(word, '1.0',  count= count_var)  ##the search function
                end_pos = str(start_pos) + "+" + count_var.get() + "c"
                print(end_pos)
                text.tag_add(word, f"{start_pos}", f"{end_pos}")
                text.tag_config(word, foreground= "red")

                
                    

                                  
text.bind("<KeyRelease>",check)
text.pack()


root.mainloop()

[Tkinter output: "First" in the first line and "Second" in the second line*(https://i.stack.imgur.com/HqwbJ.png)*]Terminal window output. The word "Second", though in the second line, has the same index as "First"

Asked By: Tushar Rao

||

Answers:

You can get rid of all the globals and the regex. Also just create one tag and reuse it. You are searching on every word from position 1.0 but you should be searching from the last end_pos.

import nltk
from nltk.corpus import words
from nltk.corpus import wordnet
import re
import tkinter as tk
from tkinter.scrolledtext import ScrolledText


#nltk.download("words")
#nltk.download("wordnet")
w= words.words()+list(wordnet.words())
w.append("typing")
word_set= set(w) #set of correctly spelled words from NLTK corpus
root= tk.Tk()
root.geometry("600x600")
text= ScrolledText(root, font = ("Arial", 14))


def check(self):
    #start over with misspelled tag
    text.tag_delete('misspelled')
    text.tag_configure('misspelled', foreground= "red")
    
    words = text.get('1.0', tk.END).replace('n', ' ').split(' ')
    end_pos = '1.0'
    
    for word in words:
        if not (word.lower() in word_set):
            count_var= tk.StringVar()
            #note that we start at the last start position instead of '1.0'
            start_pos= text.search(word, end_pos, tk.END, count= count_var)  ##the search function
            end_pos  = str(start_pos) + "+" + count_var.get() + "c"
            text.tag_add('misspelled', start_pos, end_pos)
        
for key in ('Return', 'space'):
    text.bind(f"<KeyRelease-{key}>", check)
    
text.pack()
text.tag_configure('misspelled', foreground= "red")


root.mainloop()

This way is not very efficient, though. On every single check you recheck the entire contents. Another way to go, is to simply check the word that was just typed. One way you can do this is with a mark.

import nltk
from nltk.corpus import words
from nltk.corpus import wordnet
import re
import tkinter as tk
from tkinter.scrolledtext import ScrolledText


#nltk.download("words")
#nltk.download("wordnet")
w= words.words()+list(wordnet.words())
w.append("typing")
word_set= set(w) #set of correctly spelled words from NLTK corpus
root= tk.Tk()
root.geometry("600x600")
text= ScrolledText(root, font = ("Arial", 14))


def check(self):
    #get mark
    mark = text.mark_previous('lpos')

    #get characters from mark to just before last space or return
    word = text.get("lpos", f'{tk.INSERT}-1c')

    #if that word is not in the word list, tag it
    if not (word.lower() in word_set):
        text.tag_add('misspelled', "lpos", f'{tk.INSERT}-1c')

    #move mark to caret position
    text.mark_set('lpos', tk.INSERT)

#check on return and space
for key in ('Return', 'space'):
    text.bind(f"<KeyRelease-{key}>", check)
    
text.pack()

#init mark at first position with gravity set to left so it wont move as you type
text.mark_set('lpos', "1.0") 
text.mark_gravity('lpos', tk.LEFT)

#make 1 tag and reuse it
text.tag_configure('misspelled', foreground= "red")

root.mainloop()

You will need to play with this more to handle situations like backspace, maybe tab, delete and any other situation where the text is changed in a non-linear way.

enter image description here

Answered By: OysterShucker