Why are my janitor functions not working in Tkinter?

Question:

I have functions that work perfectly fine in a standard python script but when I put them in Tkinter they error out. I can’t seem to find the issue. I’ve tried to write this out as simple as I can code. The data that will be pasted into the input will come in this same format.

# Standard Python Code that works:

import pandas as pd
import janitor as jn


raw_data = """Straight
08/17/2022 8:37PM
A
B
C
Over door
D
E
F
G
H
I
J
K
08/17/2022 8:35PM
L
M
N
Under door
O
P
Q
R
S
T
U"""


def data_clean(data):

    new_list = data.split("n")

    data_chunks = [new_list[i:i + 13] for i in range(0, len(new_list), 13)]

    cols = ['useless0', 'col0', 'useless10', 'col1', 'col2', "col3", "useless11", "useless1",
            "useless2", "useless3", 'useless12', "useless13", "useless14"]

    DF = pd.DataFrame(data_chunks)

    DF.columns = cols

    DF = DF[DF.columns.drop(list(DF.filter(regex='useless')))]

    # Side Bet
    DF = jn.case_when(DF,

                      DF['col3'].str.contains("Over"), "Over",
                      DF['col3'].str.contains("Under"), "Under",
                      "Check",

                      column_name='Over_Under')

    return(DF)

df_test = data_clean(raw_data)

print(df_test)

I’ve tried other variations of the code but I still cant seem to get this jn to work.

# This code throws an error when I input the same data in the input box. 

# I am copy and pasting into the input directly as it appears in raw_data 
# from the working code without the """ """ of course.

import tkinter as tk
from tkinter import ttk
import pandas as pd
import janitor as jn

def data_clean(data):

    new_list = data.split("n")

    data_chunks = [new_list[i:i + 13] for i in range(0, len(new_list), 13)]

    cols = ['useless0', 'col0', 'useless10', 'col1', 'col2', "col3", "useless11", "useless1",
            "useless2", "useless3", 'useless12', "useless13", "useless14"]

    DF = pd.DataFrame(data_chunks)

    DF.columns = cols

    DF = DF[DF.columns.drop(list(DF.filter(regex='useless')))]

    # Side Bet
    DF = jn.case_when(DF,

                      DF['col3'].str.contains("Over"), "Over",
                      DF['col3'].str.contains("Under"), "Under",
                      "Check",

                      column_name='Over_Under')

    return(DF)

def submit_data():

    data = data_input.get(1.0, tk.END)

    df = data_clean(data)

    print(df)

root = tk.Tk()
root.title("Test")
root.geometry("300x300")

## -- Input Data frame -- ##

InputFrame = tk.LabelFrame(root, text="Input", height=400, width=100)
InputFrame.grid(row=1, column=1, stick="n")

# Text input
data_input = tk.Text(InputFrame, height=10, width=5)
data_input.pack()

# Add Buttons
tk.Button(InputFrame, text="Submit", command=submit_data).pack()

root.mainloop()

Below is the error:

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:Python_Projectstestvenvlibsite-packagesjanitorfunctionscase_when.py", line 123, in case_when
    default = default.mask(condition, value)
  File "C:Python_Projectstestvenvlibsite-packagespandasutil_decorators.py", line 311, in wrapper
    return func(*args, **kwargs)
  File "C:Python_Projectstestvenvlibsite-packagespandascoreseries.py", line 5584, in mask
    return super().mask(cond, other, inplace, axis, level, errors, try_cast)
  File "C:Python_Projectstestvenvlibsite-packagespandascoregeneric.py", line 9345, in mask
    ~cond,
  File "C:Python_Projectstestvenvlibsite-packagespandascoregeneric.py", line 1522, in __invert__
    new_data = self._mgr.apply(operator.invert)
  File "C:Python_Projectstestvenvlibsite-packagespandascoreinternalsmanagers.py", line 302, in apply
    applied = b.apply(f, **kwargs)
  File "C:Python_Projectstestvenvlibsite-packagespandascoreinternalsblocks.py", line 402, in apply
    result = func(self.values, **kwargs)
TypeError: bad operand type for unary ~: 'NoneType'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:Pythonlibtkinter__init__.py", line 1892, in __call__
    return self.func(*args)
  File "C:Python_ProjectstestTest_Shell_BK.py", line 36, in submit_data
    df = data_clean(data)
  File "C:Python_ProjectstestTest_Shell_BK.py", line 22, in data_clean
    DF = jn.case_when(DF,
  File "C:Python_Projectstestvenvlibsite-packagesjanitorfunctionscase_when.py", line 127, in case_when
    raise ValueError(
ValueError: condition0 and value0 failed to evaluate. Original error message: bad operand type for unary ~: 'NoneType'
Asked By: Fugles

||

Answers:

The problem starts here:

data = data_input.get(1.0, tk.END)

When you enter your raw data here, it will actually add a line break at the end. So, what you are inputting here would be the equivalent of ending your raw_data multiline string as follows:

raw_data = """Straight
08/17/2022 8:37PM
A
...
U
"""

Rather than (as now):

raw_data = """Straight
08/17/2022 8:37PM
A
...
S
T
U"""

This becomes a problem inside data_clean on the first line.

# assign your original `raw_data` to `data`
data = raw_data
new_list = data.split("n")
print(new_list[-2:])
['T', 'U']

# same, but now with `raw_data` with extra line break at end, leads to:
print(new_list[-3:])
['T', 'U', '']

This leads to problems further down the road, as your df will look like this:

                col0  col1  col2        col3
0  08/17/2022 8:37PM     B     C   Over door
1  08/17/2022 8:35PM     M     N  Under door
2               None  None  None        None

The fix is rather simple: just make sure to get rid of the empty string at the end of your list.

new_list = data.split("n")[:-1]

# or if you want both versions to work with the same code snippet, something like:

temp_list = data.split("n")
if temp_list[-1] == '':
    new_list = temp_list[:-1]
else:
    new_list = temp_list

Update: as mentioned by @BryanOakley in the comments, the best solution is actually to change:

data = data_input.get(1.0, tk.END)

into:

data = data_input.get(1.0, "end-1c")
Answered By: ouroboros1