xlwings find specific char text start and end position and format it

Question:

I have multiple excel files with 12 sheets in each file.

enter image description here

So, in each sheet, I have a fixed text like below – "Project has been blocked"

So, I would like to do the below

a) Find the text "Project has been blocked" wherever it appears and change it is formatting like below (with bold red color) as shown below

b) save the excel file as .xlsx

I tried the below

req_text = "Project has been blocked"

for a_cell in ws.used_range:
        if a_cell.value == req_text:
            print(a_cell.address)
            col = a_cell.address[0]
            ws[col].characters.font.bold = True  #how to get the start and end position of my text
            ws[col].characters.font.color = (255, 0, 0)

But this doesn’t work correctly. As I am unable to get the start and end position of my text.

I expect my output to be like as below

enter image description here

update

files = glob.glob(os.getcwd()+'\sheet_replaced_files\'+'YR2022_*.xlsx')
for old_filename in files:
    old_filename = old_filename.split('sheet_replaced_files\')[1]
    print(old_filename)
    with xw.App(visible=False) as app:
        app.display_alerts = False
        
        def find_next_cell(start_cell):
            found_cell = ws.api.UsedRange.Find(req_text,
                                       After=start_cell,
                                       LookIn=xlValues,
                                       SearchOrder=xlByColumns,
                                       SearchDirection=xlNext,
                                       MatchCase=False)
            return found_cell
        
        workbook = xw.Book(os.getcwd()+'\sheet_replaced_files\'+old_filename)
        ws = workbook.sheets('5) Development')
        # First cell to start searching for req_text
        search_from_cell = ws.api.Range('A1')
        count = 0
        first_search_cell = ''
        


        while True:
            # Search for next cell to update
            update_cell = find_next_cell(search_from_cell)
            print(update_cell) #returns None here
            # Excel search will restart search again from the beginning after the last match
            # is found exit the loop when find the first match again
            if update_cell._inner.Address != first_search_cell:
                print(update_cell._inner.Address)
                # Set the address of the first found cell
                if count == 0:
                    first_search_cell = update_cell._inner.Address
                cell_column = update_cell._inner.Column
                cell_row = update_cell._inner.Row

                text = ws.range(cell_row, cell_column).value
                len_req_text = len(req_text)

                # Create a List of the start position for all instances of the req_text
                # tsi = text position index
                tsi_list = [index for index in range(len(text)) if text.startswith(req_text, index)]

                # Iterate the tsi list
                for i in range(len(tsi_list)):
                    # Get the index of the text position, tps = text position start
                    tps = tsi_list[i]
                    # Use the tps as start of the character position of the req_text
                    # and (tps + length of req_text) for the end character position
                    ws.range(cell_row, cell_column).characters[tps:tps + len_req_text].font.bold = False
                    ws.range(cell_row, cell_column).characters[tps:tps + len_req_text].font.color = (0,255,0)

                search_from_cell = ws.api.Range(update_cell._inner.Address.replace('$', ''))

                count += 1

            else:
                break

        workbook.save(os.getcwd()+'\colored_text\'+old_filename)

Sometimes, I also get an error message like below


----
com_error                                 Traceback (most recent call last)
<timed exec> in <module>

<timed exec> in find_next_cell(start_cell)

~Anaconda3libsite-packagesxlwings_xlwindows.py in __call__(self, *args, **kwargs)
    103         while True:
    104             try:
--> 105                 v = self.__method(*args, **kwargs)
    106                 if isinstance(v, (CDispatch, CoClassBaseClass, DispatchBaseClass)):
    107                     return COMRetryObjectWrapper(v)

C:UsersAksh~1AppDataLocalTempgen_py3.90020813-0000-0000-C000-000000000046x0x1x9.py in Find(self, What, After, LookIn, LookAt, SearchOrder, SearchDirection, MatchCase, MatchByte, SearchFormat)
  33513     def Find(self, What=defaultNamedNotOptArg, After=defaultNamedNotOptArg, LookIn=defaultNamedNotOptArg, LookAt=defaultNamedNotOptArg
  33514             , SearchOrder=defaultNamedNotOptArg, SearchDirection=1, MatchCase=defaultNamedOptArg, MatchByte=defaultNamedOptArg, SearchFormat=defaultNamedOptArg):
> 33515         ret = self._oleobj_.InvokeTypes(398, LCID, 1, (9, 0), ((12, 1), (12, 17), (12, 17), (12, 17), (12, 17), (3, 49), (12, 17), (12, 17), (12, 17)),What
  33516                         , After, LookIn, LookAt, SearchOrder, SearchDirection
  33517             , MatchCase, MatchByte, SearchFormat)

com_error: (-2147352567, 'Exception occurred.', (0, None, None, None, 0, -2147352571), 9)

UPDATE

enter image description here

Asked By: The Great

||

Answers:

I have changed the code to include an Excel search for the text in the used data of the sheet, and then update that cell text with Bold-Red as desired per the original code.
I ended up having to use a while loop for the Excel search and break out when the search looped back to the first found cell. Therefore the code keeps track of the first_search_cell to compare with the next found cell in the while loop.
I’ve left the Excel search variables as constants so you know what the name and value is if you want to change the search options. Obviously you can delete the ones you don’t want or use import from Xlwings constants.
Otherwise its pretty much the same.

...
# Excel Search constants
# class LookAt:
xlPart = 2  # from enum XlLookAt
xlWhole = 1  # from enum XlLookAt
# class FindLookIn:
xlComments = -4144  # from enum XlFindLookIn
xlFormulas = -4123  # from enum XlFindLookIn
xlValues = -4163  # from enum XlFindLookIn
# class SearchOrder:
xlByColumns = 2  # from enum XlSearchOrder
xlByRows = 1  # from enum XlSearchOrder
# class SearchDirection:
xlNext = 1  # from enum XlSearchDirection
xlPrevious = 2  # from enum XlSearchDirection


def find_next_cell(start_cell):
    # Import xlwings constants for search parameters
    from xlwings import constants
    found_cell = ws.api.UsedRange.Find(req_text,
                                       After=start_cell,
                                       LookIn=constants.FindLookIn.xlValues,
                                       SearchOrder=constants.SearchOrder.xlByColumns,
                                       SearchDirection=constants.SearchDirection.xlNext,
                                       MatchCase=False)
    return found_cell


wb = xw.Book('foo.xlsx')
ws = wb.sheets('Sheet1')

req_text = "Project has been blocked"

# First cell to start searching for req_text
search_from_cell = ws.api.Range('A1')

count = 0
first_search_cell = ''
while True:
    # Search for next cell to update
    update_cell = find_next_cell(search_from_cell)

    # Excel search will restart search again from the beginning after the last match
    # is found exit the loop when find the first match again
    if update_cell._inner.Address != first_search_cell:
        print(update_cell._inner.Address)
        # Set the address of the first found cell
        if count == 0:
            first_search_cell = update_cell._inner.Address

        cell_column = update_cell._inner.Column
        cell_row = update_cell._inner.Row

        text = ws.range(cell_row, cell_column).value
        len_req_text = len(req_text)

        # Create a List of the start position for all instances of the req_text
        # tsi = text position index
        tsi_list = [index for index in range(len(text)) if text.startswith(req_text, index)]

        # Iterate the tsi list
        for i in range(len(tsi_list)):
            # Get the index of the text position, tps = text position start
            tps = tsi_list[i]
            # Use the tps as start of the character position of the req_text
            # and (tps + length of req_text) for the end character position
            ws.range(cell_row, cell_column).characters[tps:tps + len_req_text].font.bold = True
            ws.range(cell_row, cell_column).characters[tps:tps + len_req_text].font.color = (255, 0, 0)

        search_from_cell = ws.api.Range(update_cell._inner.Address.replace('$', ''))

        count += 1

    else:
        break

wb.save('foo.xlsx')
...
Answered By: moken