is there a way to implement pandas wide_to_long in Polars?
Question:
I use Pandas wide to long to stack survey data and it works beautifully with regex and stub names, is this possible to do in Polars ?
e.g. in Pandas –
import pandas as pd
df = pd.DataFrame({
'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
})
changed_df = pd.wide_to_long(df,
stubnames='ht',
i=['famid', 'birth'],
j='age',
sep='_',
suffix=r'w+')
stubnames can take a list as well.
Edit- Added code after taking inspiration from Jqurious –
import pandas as pd
import numpy as np
import polars as pl
import re
# Create age group data
age_groups = np.random.choice(['0-18', '19-35', '36-50', '51-65', '65+'], size=10)
# Create gender data
genders = np.random.choice(['Male', 'Female', 'Other'], size=10)
# Create familiarity and affinity data
fam_aff = np.random.rand(10, 4)
# Create column names
cols = ['Age_group', 'Gender', 'Familiarity_loop1', 'Familiarity_loop2', 'Affinity_loop1', 'Affinity_loop2']
# Combine data into dataframe
data = np.column_stack([age_groups, genders, fam_aff])
df = pd.DataFrame(data=data, columns=cols)
df["unique_records"] = np.arange(len(df))
regex_pattern = '^.*_loopd'
# get polars DF
pl_df = pl.from_pandas(df)
# get all columns list
col_list = pl_df.columns
loop_list = [] # list of columns which contains _loop
sans_loop_list = [] # list of columns which do not contain _loop
for col in col_list:
if re.search(regex_pattern, col):
loop_list.append(col)
else:
sans_loop_list.append(col)
pl_melt_df = (pl_df
.melt(
id_vars = pl_df.select(sans_loop_list).columns,
variable_name = "master_stack")
.with_columns(pl.col("master_stack").str.replace(r"_loopd",""))
)
pl_melt_df.pivot(index=sans_loop_list, columns="master_stack", values="value")
I want to see Affinity and Familiarity as their own columns, but I am not able to achieve it.
Edit 2 – Added Polars output and Pandas output
Answers:
It looks like a type of .melt
:
(df
.melt(
id_vars = df.select(pl.exclude(r"^ht_w+$")).columns,
variable_name = "age")
.with_columns(
pl.col("age").str.replace(r"^[^_]+_", ""))
)
shape: (18, 4)
┌───────┬───────┬─────┬───────┐
│ famid ┆ birth ┆ age ┆ value │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ f64 │
╞═══════╪═══════╪═════╪═══════╡
│ 1 ┆ 1 ┆ one ┆ 2.8 │
│ 1 ┆ 2 ┆ one ┆ 2.9 │
│ 1 ┆ 3 ┆ one ┆ 2.2 │
│ 2 ┆ 1 ┆ one ┆ 2.0 │
│ … ┆ … ┆ … ┆ … │
│ 2 ┆ 3 ┆ two ┆ 2.4 │
│ 3 ┆ 1 ┆ two ┆ 3.3 │
│ 3 ┆ 2 ┆ two ┆ 3.4 │
│ 3 ┆ 3 ┆ two ┆ 2.9 │
└───────┴───────┴─────┴───────┘
Update: Showing how to .melt
+ .pivot
as per updated example.
suffix = r"_loopd+$"
id_vars = df.select(pl.exclude("^.+" + suffix)).columns
(df.melt(id_vars)
.with_columns(pl.col("variable").str.replace(suffix, ""))
.with_columns(row_nr = pl.first().cumcount().over("variable"))
.pivot(index=id_vars + ["row_nr"], columns="variable", values="value", aggregate_function="first")
)
shape: (20, 6)
┌───────────┬────────┬────────────────┬────────┬──────────────────────┬────────────────────┐
│ Age_group ┆ Gender ┆ unique_records ┆ row_nr ┆ Familiarity ┆ Affinity │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i64 ┆ i64 ┆ str ┆ str │
╞═══════════╪════════╪════════════════╪════════╪══════════════════════╪════════════════════╡
│ 36-50 ┆ Other ┆ 0 ┆ 0 ┆ 0.5569650307413312 ┆ 0.9752962344272071 │
│ 19-35 ┆ Other ┆ 1 ┆ 1 ┆ 0.8723228408633724 ┆ 0.9051378743187902 │
│ 19-35 ┆ Other ┆ 2 ┆ 2 ┆ 0.010929392505575009 ┆ 0.7381748177290146 │
│ 36-50 ┆ Female ┆ 3 ┆ 3 ┆ 0.9782593916079607 ┆ 0.5099868864386063 │
│ … ┆ … ┆ … ┆ … ┆ … ┆ … │
│ 0-18 ┆ Female ┆ 6 ┆ 16 ┆ 0.6795089322714142 ┆ 0.3982539618169999 │
│ 19-35 ┆ Female ┆ 7 ┆ 17 ┆ 0.8168297900583801 ┆ 0.6198522863927297 │
│ 51-65 ┆ Female ┆ 8 ┆ 18 ┆ 0.46387232803532885 ┆ 0.9925845189718061 │
│ 51-65 ┆ Male ┆ 9 ┆ 19 ┆ 0.20514774525608237 ┆ 0.9388295904692754 │
└───────────┴────────┴────────────────┴────────┴──────────────────────┴────────────────────┘
Pivot explanation:
df = pl.DataFrame({"variable": ["familiarity"] * 3 + ["affinity"] * 3, "value": [1, 2, 3, 4, 5, 6]})
shape: (6, 2)
┌─────────────┬───────┐
│ variable ┆ value │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════════════╪═══════╡
│ familiarity ┆ 1 │
│ familiarity ┆ 2 │
│ familiarity ┆ 3 │
│ affinity ┆ 4 │
│ affinity ┆ 5 │
│ affinity ┆ 6 │
└─────────────┴───────┘
We use a window function to generate "row ids" to be used in the pivot index.
This would be what you would use .groupby
+ .cumcount
for in pandas.
>>> df.with_columns(row_nr = pl.first().cumcount().over("variable"))
shape: (6, 3)
┌─────────────┬───────┬────────┐
│ variable ┆ value ┆ row_nr │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════════════╪═══════╪════════╡
│ familiarity ┆ 1 ┆ 0 │
│ familiarity ┆ 2 ┆ 1 │
│ familiarity ┆ 3 ┆ 2 │
│ affinity ┆ 4 ┆ 0 │
│ affinity ┆ 5 ┆ 1 │
│ affinity ┆ 6 ┆ 2 │
└─────────────┴───────┴────────┘
(df.with_columns(row_nr = pl.first().cumcount().over("variable"))
.pivot(index="row_nr", columns="variable", values="value", aggregate_function="first"))
shape: (3, 3)
┌────────┬─────────────┬──────────┐
│ row_nr ┆ familiarity ┆ affinity │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞════════╪═════════════╪══════════╡
│ 0 ┆ 1 ┆ 4 │
│ 1 ┆ 2 ┆ 5 │
│ 2 ┆ 3 ┆ 6 │
└────────┴─────────────┴──────────┘
Here’s an alternative to the .melt
/ .pivot
approach.
df = pl.DataFrame({
"id": ["a", "b", "c", "d"],
"Fam_loop1": [1, 2, 3, 4],
"Aff_loop1": [8, 7, 6, 5],
"Aff_loop2": [4, 3, 2, 1]
})
shape: (4, 4)
┌─────┬───────────┬───────────┬───────────┐
│ id ┆ Fam_loop1 ┆ Aff_loop1 ┆ Aff_loop2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═══════════╪═══════════╪═══════════╡
│ a ┆ 1 ┆ 8 ┆ 4 │
│ b ┆ 2 ┆ 7 ┆ 3 │
│ c ┆ 3 ┆ 6 ┆ 2 │
│ d ┆ 4 ┆ 5 ┆ 1 │
└─────┴───────────┴───────────┴───────────┘
If we fill in missing columns e.g. Fam_loop2
in the above exmple – we can then create lists of equal size:
[id, id], [fam_loop1, fam_loop2], [aff_loop1, aff_loop2]
Which we can then .explode
to create the long format.
This should be a much more efficient approach, and also works with the Lazy API.
Update: After reading Tricky Long Pivot by Reverse Aggregation transformation (Pandas) I thought it would be nice to allow the prefix or suffix to be used as the resulting column names.
Using pivot=True
would use the suffixes as column names.
def wide_to_long(df, starts_with=None, ends_with=None, pivot=False, pivot_name="prefix"):
if starts_with and ends_with:
raise ValueError("Must provide either `starts_with` or `ends_with`.")
if starts_with is None and ends_with is None:
raise ValueError("Must provide either `starts_with` or `ends_with`.")
if starts_with:
pattern = starts_with
prefix = pl.all().str.extract("(" + pattern + ")")
suffix = pl.all().str.replace(pattern, "")
if ends_with:
pattern = ends_with
prefix = pl.all().str.replace(pattern, "")
suffix = pl.all().str.extract("(" + pattern + ")")
columns = pl.DataFrame(df.columns, schema=["col"])
is_wide = pl.all().str.contains(pattern)
narrow = columns.filter(is_wide.is_not())
wide = columns.filter(is_wide)
wide = wide.with_columns(
prefix = prefix,
suffix = suffix,
dtype = pl.all().map_dict(df.schema)
)
prefixes = wide.unique(subset="prefix", maintain_order=True)
suffixes = wide.select(pl.col("suffix").unique(maintain_order=True))
combinations = (
suffixes
.join(prefixes, how="cross")
.select(
col = pl.col("prefix") + pl.col("suffix"),
dtype = "dtype"
)
)
missing = (
combinations
.join(columns, how="anti", on=columns.columns)
)
nulls = (
pl.lit(None).alias(col).cast(dtype)
for col, dtype in missing.select("col", "dtype").iter_rows()
)
meta = []
prefix_columns = []
height = suffixes.height
names = prefixes.get_column("prefix")
fmt = "^{}.+$"
# use suffixes as column names
if pivot:
height = prefixes.height
names = suffixes.to_series()
fmt = "^.+{}$"
meta = (
pl.lit(prefix).alias(f"_{pivot_name}{n}")
for n, prefix in enumerate(prefixes.get_column("prefix"))
)
prefix_columns = [
pl.concat_list(pl.list(rf"^_{pivot_name}d+$")).alias(pivot_name)
]
narrow_columns = (
pl.concat_list(pl.list(name) for _ in range(height))
for name in narrow.to_series()
)
wide_columns = (
pl.concat_list(pl.list(fmt.format(name)).alias(name))
for name in names
)
return (
df.with_columns(nulls)
.with_columns(meta)
.select(*narrow_columns, *prefix_columns, *wide_columns)
.explode(pl.all())
)
>>> wide_to_long(df, ends_with=r"_loopd+$")
shape: (8, 3)
┌─────┬──────┬─────┐
│ id ┆ Fam ┆ Aff │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪══════╪═════╡
│ a ┆ 1 ┆ 8 │
│ b ┆ 2 ┆ 7 │
│ c ┆ 3 ┆ 6 │
│ d ┆ 4 ┆ 5 │
│ a ┆ null ┆ 4 │
│ b ┆ null ┆ 3 │
│ c ┆ null ┆ 2 │
│ d ┆ null ┆ 1 │
└─────┴──────┴─────┘
Using a size of 500_000
from your example:
start = time.perf_counter()
melt_pivot(pl_df)
time.perf_counter() - start
# 2.419576150015928
start = time.perf_counter()
wide_to_long(pl_df, ends_with=r"_loopd+$")
time.perf_counter() - start
# 0.06175561097916216
2.4s
-> 0.06s
I use Pandas wide to long to stack survey data and it works beautifully with regex and stub names, is this possible to do in Polars ?
e.g. in Pandas –
import pandas as pd
df = pd.DataFrame({
'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
})
changed_df = pd.wide_to_long(df,
stubnames='ht',
i=['famid', 'birth'],
j='age',
sep='_',
suffix=r'w+')
stubnames can take a list as well.
Edit- Added code after taking inspiration from Jqurious –
import pandas as pd
import numpy as np
import polars as pl
import re
# Create age group data
age_groups = np.random.choice(['0-18', '19-35', '36-50', '51-65', '65+'], size=10)
# Create gender data
genders = np.random.choice(['Male', 'Female', 'Other'], size=10)
# Create familiarity and affinity data
fam_aff = np.random.rand(10, 4)
# Create column names
cols = ['Age_group', 'Gender', 'Familiarity_loop1', 'Familiarity_loop2', 'Affinity_loop1', 'Affinity_loop2']
# Combine data into dataframe
data = np.column_stack([age_groups, genders, fam_aff])
df = pd.DataFrame(data=data, columns=cols)
df["unique_records"] = np.arange(len(df))
regex_pattern = '^.*_loopd'
# get polars DF
pl_df = pl.from_pandas(df)
# get all columns list
col_list = pl_df.columns
loop_list = [] # list of columns which contains _loop
sans_loop_list = [] # list of columns which do not contain _loop
for col in col_list:
if re.search(regex_pattern, col):
loop_list.append(col)
else:
sans_loop_list.append(col)
pl_melt_df = (pl_df
.melt(
id_vars = pl_df.select(sans_loop_list).columns,
variable_name = "master_stack")
.with_columns(pl.col("master_stack").str.replace(r"_loopd",""))
)
pl_melt_df.pivot(index=sans_loop_list, columns="master_stack", values="value")
I want to see Affinity and Familiarity as their own columns, but I am not able to achieve it.
Edit 2 – Added Polars output and Pandas output
It looks like a type of .melt
:
(df
.melt(
id_vars = df.select(pl.exclude(r"^ht_w+$")).columns,
variable_name = "age")
.with_columns(
pl.col("age").str.replace(r"^[^_]+_", ""))
)
shape: (18, 4)
┌───────┬───────┬─────┬───────┐
│ famid ┆ birth ┆ age ┆ value │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ f64 │
╞═══════╪═══════╪═════╪═══════╡
│ 1 ┆ 1 ┆ one ┆ 2.8 │
│ 1 ┆ 2 ┆ one ┆ 2.9 │
│ 1 ┆ 3 ┆ one ┆ 2.2 │
│ 2 ┆ 1 ┆ one ┆ 2.0 │
│ … ┆ … ┆ … ┆ … │
│ 2 ┆ 3 ┆ two ┆ 2.4 │
│ 3 ┆ 1 ┆ two ┆ 3.3 │
│ 3 ┆ 2 ┆ two ┆ 3.4 │
│ 3 ┆ 3 ┆ two ┆ 2.9 │
└───────┴───────┴─────┴───────┘
Update: Showing how to .melt
+ .pivot
as per updated example.
suffix = r"_loopd+$"
id_vars = df.select(pl.exclude("^.+" + suffix)).columns
(df.melt(id_vars)
.with_columns(pl.col("variable").str.replace(suffix, ""))
.with_columns(row_nr = pl.first().cumcount().over("variable"))
.pivot(index=id_vars + ["row_nr"], columns="variable", values="value", aggregate_function="first")
)
shape: (20, 6)
┌───────────┬────────┬────────────────┬────────┬──────────────────────┬────────────────────┐
│ Age_group ┆ Gender ┆ unique_records ┆ row_nr ┆ Familiarity ┆ Affinity │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i64 ┆ i64 ┆ str ┆ str │
╞═══════════╪════════╪════════════════╪════════╪══════════════════════╪════════════════════╡
│ 36-50 ┆ Other ┆ 0 ┆ 0 ┆ 0.5569650307413312 ┆ 0.9752962344272071 │
│ 19-35 ┆ Other ┆ 1 ┆ 1 ┆ 0.8723228408633724 ┆ 0.9051378743187902 │
│ 19-35 ┆ Other ┆ 2 ┆ 2 ┆ 0.010929392505575009 ┆ 0.7381748177290146 │
│ 36-50 ┆ Female ┆ 3 ┆ 3 ┆ 0.9782593916079607 ┆ 0.5099868864386063 │
│ … ┆ … ┆ … ┆ … ┆ … ┆ … │
│ 0-18 ┆ Female ┆ 6 ┆ 16 ┆ 0.6795089322714142 ┆ 0.3982539618169999 │
│ 19-35 ┆ Female ┆ 7 ┆ 17 ┆ 0.8168297900583801 ┆ 0.6198522863927297 │
│ 51-65 ┆ Female ┆ 8 ┆ 18 ┆ 0.46387232803532885 ┆ 0.9925845189718061 │
│ 51-65 ┆ Male ┆ 9 ┆ 19 ┆ 0.20514774525608237 ┆ 0.9388295904692754 │
└───────────┴────────┴────────────────┴────────┴──────────────────────┴────────────────────┘
Pivot explanation:
df = pl.DataFrame({"variable": ["familiarity"] * 3 + ["affinity"] * 3, "value": [1, 2, 3, 4, 5, 6]})
shape: (6, 2)
┌─────────────┬───────┐
│ variable ┆ value │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════════════╪═══════╡
│ familiarity ┆ 1 │
│ familiarity ┆ 2 │
│ familiarity ┆ 3 │
│ affinity ┆ 4 │
│ affinity ┆ 5 │
│ affinity ┆ 6 │
└─────────────┴───────┘
We use a window function to generate "row ids" to be used in the pivot index.
This would be what you would use .groupby
+ .cumcount
for in pandas.
>>> df.with_columns(row_nr = pl.first().cumcount().over("variable"))
shape: (6, 3)
┌─────────────┬───────┬────────┐
│ variable ┆ value ┆ row_nr │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════════════╪═══════╪════════╡
│ familiarity ┆ 1 ┆ 0 │
│ familiarity ┆ 2 ┆ 1 │
│ familiarity ┆ 3 ┆ 2 │
│ affinity ┆ 4 ┆ 0 │
│ affinity ┆ 5 ┆ 1 │
│ affinity ┆ 6 ┆ 2 │
└─────────────┴───────┴────────┘
(df.with_columns(row_nr = pl.first().cumcount().over("variable"))
.pivot(index="row_nr", columns="variable", values="value", aggregate_function="first"))
shape: (3, 3)
┌────────┬─────────────┬──────────┐
│ row_nr ┆ familiarity ┆ affinity │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞════════╪═════════════╪══════════╡
│ 0 ┆ 1 ┆ 4 │
│ 1 ┆ 2 ┆ 5 │
│ 2 ┆ 3 ┆ 6 │
└────────┴─────────────┴──────────┘
Here’s an alternative to the .melt
/ .pivot
approach.
df = pl.DataFrame({
"id": ["a", "b", "c", "d"],
"Fam_loop1": [1, 2, 3, 4],
"Aff_loop1": [8, 7, 6, 5],
"Aff_loop2": [4, 3, 2, 1]
})
shape: (4, 4)
┌─────┬───────────┬───────────┬───────────┐
│ id ┆ Fam_loop1 ┆ Aff_loop1 ┆ Aff_loop2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═══════════╪═══════════╪═══════════╡
│ a ┆ 1 ┆ 8 ┆ 4 │
│ b ┆ 2 ┆ 7 ┆ 3 │
│ c ┆ 3 ┆ 6 ┆ 2 │
│ d ┆ 4 ┆ 5 ┆ 1 │
└─────┴───────────┴───────────┴───────────┘
If we fill in missing columns e.g. Fam_loop2
in the above exmple – we can then create lists of equal size:
[id, id], [fam_loop1, fam_loop2], [aff_loop1, aff_loop2]
Which we can then .explode
to create the long format.
This should be a much more efficient approach, and also works with the Lazy API.
Update: After reading Tricky Long Pivot by Reverse Aggregation transformation (Pandas) I thought it would be nice to allow the prefix or suffix to be used as the resulting column names.
Using pivot=True
would use the suffixes as column names.
def wide_to_long(df, starts_with=None, ends_with=None, pivot=False, pivot_name="prefix"):
if starts_with and ends_with:
raise ValueError("Must provide either `starts_with` or `ends_with`.")
if starts_with is None and ends_with is None:
raise ValueError("Must provide either `starts_with` or `ends_with`.")
if starts_with:
pattern = starts_with
prefix = pl.all().str.extract("(" + pattern + ")")
suffix = pl.all().str.replace(pattern, "")
if ends_with:
pattern = ends_with
prefix = pl.all().str.replace(pattern, "")
suffix = pl.all().str.extract("(" + pattern + ")")
columns = pl.DataFrame(df.columns, schema=["col"])
is_wide = pl.all().str.contains(pattern)
narrow = columns.filter(is_wide.is_not())
wide = columns.filter(is_wide)
wide = wide.with_columns(
prefix = prefix,
suffix = suffix,
dtype = pl.all().map_dict(df.schema)
)
prefixes = wide.unique(subset="prefix", maintain_order=True)
suffixes = wide.select(pl.col("suffix").unique(maintain_order=True))
combinations = (
suffixes
.join(prefixes, how="cross")
.select(
col = pl.col("prefix") + pl.col("suffix"),
dtype = "dtype"
)
)
missing = (
combinations
.join(columns, how="anti", on=columns.columns)
)
nulls = (
pl.lit(None).alias(col).cast(dtype)
for col, dtype in missing.select("col", "dtype").iter_rows()
)
meta = []
prefix_columns = []
height = suffixes.height
names = prefixes.get_column("prefix")
fmt = "^{}.+$"
# use suffixes as column names
if pivot:
height = prefixes.height
names = suffixes.to_series()
fmt = "^.+{}$"
meta = (
pl.lit(prefix).alias(f"_{pivot_name}{n}")
for n, prefix in enumerate(prefixes.get_column("prefix"))
)
prefix_columns = [
pl.concat_list(pl.list(rf"^_{pivot_name}d+$")).alias(pivot_name)
]
narrow_columns = (
pl.concat_list(pl.list(name) for _ in range(height))
for name in narrow.to_series()
)
wide_columns = (
pl.concat_list(pl.list(fmt.format(name)).alias(name))
for name in names
)
return (
df.with_columns(nulls)
.with_columns(meta)
.select(*narrow_columns, *prefix_columns, *wide_columns)
.explode(pl.all())
)
>>> wide_to_long(df, ends_with=r"_loopd+$")
shape: (8, 3)
┌─────┬──────┬─────┐
│ id ┆ Fam ┆ Aff │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪══════╪═════╡
│ a ┆ 1 ┆ 8 │
│ b ┆ 2 ┆ 7 │
│ c ┆ 3 ┆ 6 │
│ d ┆ 4 ┆ 5 │
│ a ┆ null ┆ 4 │
│ b ┆ null ┆ 3 │
│ c ┆ null ┆ 2 │
│ d ┆ null ┆ 1 │
└─────┴──────┴─────┘
Using a size of 500_000
from your example:
start = time.perf_counter()
melt_pivot(pl_df)
time.perf_counter() - start
# 2.419576150015928
start = time.perf_counter()
wide_to_long(pl_df, ends_with=r"_loopd+$")
time.perf_counter() - start
# 0.06175561097916216
2.4s
-> 0.06s