Highlight the best value of each row in python pandas to_latex

Question:

I am using pandas to automatically generate tables using to_latex(), and I want to highlight (either bold, italics, etc.) the best value (either max, min) of each row.

Is there a way to do that?

I found this issue in their github, however no answer was provided and I didn’t find a followup.

Asked By: lazary

||

Answers:

There is already a PR Request that will support this feature in the future. It is expected to be released with pandas 1.3.0, as mentioned in this comment. In the meantime, I came across a solution in another issue that focuses on highlighting the maximum value per column:

from functools import partial

import pandas as pd
import numpy as np


def bold_formatter(x, value, num_decimals=2):
    """Format a number in bold when (almost) identical to a given value.
    
    Args:
        x: Input number.
        
        value: Value to compare x with.
        
        num_decimals: Number of decimals to use for output format.

    Returns:
        String converted output.

    """
    # Consider values equal, when rounded results are equal
    # otherwise, it may look surprising in the table where they seem identical
    if round(x, num_decimals) == round(value, num_decimals):
        return f"{{\bfseries\num{{{x:.{num_decimals}f}}}}}"
    else:
        return f"\num{{{x:.{num_decimals}f}}}"


df = pd.DataFrame(np.array([[1.123456, 2.123456, 3.123456, 4.123456],
                            [11.123456, 22.123456, 33.123456, 44.123456],
                            [111.123456, 222.123456, 333.123456, 444.123456],]),
                   columns=['a', 'b', 'c', 'd'])

col_names = ['a in \si{\meter}',
             'b in \si{\volt}',
             'c in \si{\seconds}',
             'd']

# Colums to format with maximum condition and 2 floating decimals
max_columns_2f = ['a']

# Colums to format with minimum condition and 2 floating decimals
min_columns_2f = ['b', 'c']

# Colums to format with minimum condition and 4 floating decimals
min_columns_4f= ['d']

fmts_max_2f = {column: partial(bold_formatter, value=df[column].max(), num_decimals=2) for column in max_columns_2f}
fmts_min_2f = {column: partial(bold_formatter, value=df[column].min(), num_decimals=2) for column in min_columns_2f}
fmts_min_4f = {column: partial(bold_formatter, value=df[column].min(), num_decimals=4) for column in min_columns_4f}

fmts = dict(**fmts_max_2f, **fmts_min_2f, **fmts_min_4f)

with open("test_table.tex", "w") as fh:
    df.to_latex(buf=fh,
                index=False,
                header=col_names,
                formatters=fmts,
                escape=False)
Answered By: Gent Rexha

Loop through rows to find the locations of the max values and apply formatting on these specific places.

import numpy as np
import pandas as pd

# generate a dataframe with 10 rows and 4 columns filled with random numbers
df = pd.DataFrame(data=np.random.rand(10, 4), index= [f"row_{i}" for i in range(10)], columns=[f"col_{i}" for i in range(4)])

# apply some formatting for all numbers (optional)
df_s = df.style.format("{:.2f}")

# loop through rows and find which column for each row has the highest value
for row in df.index:
    col = df.loc[row].idxmax()
    # redo formatting for a specific cell
    df_s = df_s.format(lambda x: "\textbf{" + f'{x:.2f}' + "}", subset=(row, col))

print(df_s.to_latex())

Result:

begin{tabular}{lrrrr}
 & col_0 & col_1 & col_2 & col_3 \
row_0 & 0.56 & textbf{0.74} & 0.48 & 0.70 \
row_1 & 0.22 & 0.02 & 0.08 & textbf{0.97} \
row_2 & textbf{0.80} & 0.26 & 0.39 & 0.30 \
row_3 & textbf{0.93} & 0.26 & 0.28 & 0.75 \
row_4 & 0.39 & textbf{0.45} & 0.10 & 0.30 \
row_5 & 0.31 & textbf{0.73} & 0.19 & 0.45 \
row_6 & 0.23 & textbf{0.61} & 0.31 & 0.21 \
row_7 & 0.27 & 0.38 & 0.64 & textbf{0.93} \
row_8 & 0.15 & 0.09 & textbf{0.48} & 0.44 \
row_9 & textbf{0.84} & 0.59 & 0.57 & 0.44 \
end{tabular}
Answered By: Lukas

You can use all pandas internal functions:

df = pd.DataFrame(data=[[1,2], [3,4]])
print(df.style.highlight_max(axis=0, props="font-weight:bold;").to_latex(convert_css=True))
begin{tabular}{lrr}
 & 0 & 1 \
0 & 1 & 2 \
1 & bfseries 3 & bfseries 4 \
end{tabular}

or specify your own latex (command/options) pair:

print(df.style.highlight_max(axis=0, props="textbf:--rwrap;").to_latex())
begin{tabular}{lrr}
 & 0 & 1 \
0 & 1 & 2 \
1 & textbf{3} & textbf{4} \
end{tabular}

(use axis=1 for obtaining maximums in rows)

Answered By: Attack68
Categories: questions Tags: , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.