Removing certain end-standing values from list in Python
Question:
Is there an elegant Pythonic way to perform something like rstrip()
on a list?
Imagine, I have different lists:
l1 = ['A', 'D', 'D']
l2 = ['A', 'D']
l3 = ['D', 'A', 'D', 'D']
l4 = ['A', 'D', 'B', 'D']
I need a function that will remove all end-standing 'D'
elements from a given list (but not those that come before or in between other elements!).
for mylist in [l1, l2, l3, l4]:
print(mylist, ' => ', remove_end_elements(mylist, 'D'))
So the desired output would be:
['A', 'D', 'D'] => ['A']
['A', 'D'] => ['A']
['D', 'A', 'D', 'D'] => ['D', 'A']
['A', 'D', 'B', 'D'] => ['A','D','B']
One implementation that does the job is this:
def remove_end_elements(mylist, myelement):
counter = 0
for element in mylist[::-1]:
if element != myelement:
break
counter -= 1
return mylist[:counter]
Is there a more elegant / efficient way to do it?
To answer comment-questions:
- Either a new list or modifying the original list is fine (although the above implementation has creating a new list in mind).
- The real lists contain multi-character-strings (lines from a text file).
- What I’m actually trying to strip away are lines that fulfill certain criteria for "empty" (no characters OR only whitespace OR only whitespace and commas). I have that check implemented elsewhere.
- These empty lines can be an arbitrary number at the end of the list, but in most cases will be 1.
I timed the different solutions offered so far, with simulated data close to my actual use case, and the actual is_empty_line() function that I’m using:
- Kelly Bundy’s solution: 0.029670200019609183
- Guy’s solution: 0.038380099984351546
- my original solution: 0.03837349999230355
- cards’ solution: 0.0408437000005506
- Timeless’ solution: 0.08083210000768304
Which one performs better does seem to depend on the complexity of the is_empty_line()
function (except for Timeless’ solution, which is consistently slower than everything else, and KellyBundy’s solution, which is consistently faster).
Answers:
You can iterate over the list in reverse to find the last index that is not param
and return a slice of the list
def remove_end_elements(my_list, param):
if not my_list or my_list[-1] != param:
return my_list
for i, x in enumerate(my_list[::-1]):
if x != param:
return my_list[:-i]
return []
A possible option with slicing :
def rstrip(lst, item="D"):
enum = reversed(list(enumerate(lst, start=1)))
upto = next((i for i, v in enum if v != item), 0)
return lst[:upto]
Output :
for l in (l1, l2, l3, l4):
print(f"{l!s:<22}", rstrip(l), sep="=> ")
['A', 'D', 'D'] => ['A']
['A', 'D'] => ['A']
['D', 'A', 'D', 'D'] => ['D', 'A']
['A', 'D', 'B', 'D'] => ['A', 'D', 'B']
Use itertools.groupby
to "hide" all the index/counting technicality.
from itertools import groupby
def list_rstrip(l, char='D'):
grp_id, grp = next(groupby(reversed(l)))
return (l[:-len(list(grp))] if grp_id == char else l)
def remove_end_elements(mylist, myelement):
while myelement in mylist[-1:]:
mylist.pop()
return mylist
As requested by @KellyBundy: this is my benchmarking script for the various solutions offered.
(All solutions adjusted to closer resemble my actual usecase.)
import timeit
statement = """
lines = ['ABCDEFGHIJ'] * 100
lines += [' ', ',, ,']
def line_is_empty_row(line: str) -> bool:
if line.replace(",", "").strip() == "":
return True
return False
"""
imp1 = """
def remove_empty_rows_from_end(lines: list[str]) -> list[str]:
counter = 0
for line in lines[::-1]:
if line_is_empty_row(line):
counter -= 1
else:
break
return lines[:counter]
result = remove_empty_rows_from_end(lines)
"""
imp2 = """
def remove_end_elements(my_list):
if not my_list or not line_is_empty_row(my_list[-1]):
return my_list
for i, line in enumerate(my_list[::-1]):
if not line_is_empty_row(line):
return my_list[:-i]
return []
result = remove_end_elements(lines)
"""
imp3 = """
def rstrip(lst):
enum = reversed(list(enumerate(lst, start=1)))
upto = next((i for i, line in enum if not line_is_empty_row(line)), 0)
return lst[:upto]
result = rstrip(lines)
"""
imp4 = """
from itertools import groupby
def list_rstrip(my_list):
line, i = next(groupby(reversed(my_list)))
return my_list[:-len(list(i))] if line_is_empty_row(line) else my_list
result = list_rstrip(lines)
"""
imp5 = """
def remove_end_elements(mylist):
while mylist and line_is_empty_row(mylist[-1]):
mylist.pop()
return mylist
result = remove_end_elements(lines)
"""
num = 10000
print("Kelly Bundy", timeit.timeit(statement + imp5, number=num))
print("Guy", timeit.timeit(statement + imp2, number=num))
print("mine", timeit.timeit(statement + imp1, number=num))
print("cards", timeit.timeit(statement + imp4, number=num))
print("Timeless", timeit.timeit(statement + imp3, number=num))
Is there an elegant Pythonic way to perform something like rstrip()
on a list?
Imagine, I have different lists:
l1 = ['A', 'D', 'D']
l2 = ['A', 'D']
l3 = ['D', 'A', 'D', 'D']
l4 = ['A', 'D', 'B', 'D']
I need a function that will remove all end-standing 'D'
elements from a given list (but not those that come before or in between other elements!).
for mylist in [l1, l2, l3, l4]:
print(mylist, ' => ', remove_end_elements(mylist, 'D'))
So the desired output would be:
['A', 'D', 'D'] => ['A']
['A', 'D'] => ['A']
['D', 'A', 'D', 'D'] => ['D', 'A']
['A', 'D', 'B', 'D'] => ['A','D','B']
One implementation that does the job is this:
def remove_end_elements(mylist, myelement):
counter = 0
for element in mylist[::-1]:
if element != myelement:
break
counter -= 1
return mylist[:counter]
Is there a more elegant / efficient way to do it?
To answer comment-questions:
- Either a new list or modifying the original list is fine (although the above implementation has creating a new list in mind).
- The real lists contain multi-character-strings (lines from a text file).
- What I’m actually trying to strip away are lines that fulfill certain criteria for "empty" (no characters OR only whitespace OR only whitespace and commas). I have that check implemented elsewhere.
- These empty lines can be an arbitrary number at the end of the list, but in most cases will be 1.
I timed the different solutions offered so far, with simulated data close to my actual use case, and the actual is_empty_line() function that I’m using:
- Kelly Bundy’s solution: 0.029670200019609183
- Guy’s solution: 0.038380099984351546
- my original solution: 0.03837349999230355
- cards’ solution: 0.0408437000005506
- Timeless’ solution: 0.08083210000768304
Which one performs better does seem to depend on the complexity of the is_empty_line()
function (except for Timeless’ solution, which is consistently slower than everything else, and KellyBundy’s solution, which is consistently faster).
You can iterate over the list in reverse to find the last index that is not param
and return a slice of the list
def remove_end_elements(my_list, param):
if not my_list or my_list[-1] != param:
return my_list
for i, x in enumerate(my_list[::-1]):
if x != param:
return my_list[:-i]
return []
A possible option with slicing :
def rstrip(lst, item="D"):
enum = reversed(list(enumerate(lst, start=1)))
upto = next((i for i, v in enum if v != item), 0)
return lst[:upto]
Output :
for l in (l1, l2, l3, l4):
print(f"{l!s:<22}", rstrip(l), sep="=> ")
['A', 'D', 'D'] => ['A']
['A', 'D'] => ['A']
['D', 'A', 'D', 'D'] => ['D', 'A']
['A', 'D', 'B', 'D'] => ['A', 'D', 'B']
Use itertools.groupby
to "hide" all the index/counting technicality.
from itertools import groupby
def list_rstrip(l, char='D'):
grp_id, grp = next(groupby(reversed(l)))
return (l[:-len(list(grp))] if grp_id == char else l)
def remove_end_elements(mylist, myelement):
while myelement in mylist[-1:]:
mylist.pop()
return mylist
As requested by @KellyBundy: this is my benchmarking script for the various solutions offered.
(All solutions adjusted to closer resemble my actual usecase.)
import timeit
statement = """
lines = ['ABCDEFGHIJ'] * 100
lines += [' ', ',, ,']
def line_is_empty_row(line: str) -> bool:
if line.replace(",", "").strip() == "":
return True
return False
"""
imp1 = """
def remove_empty_rows_from_end(lines: list[str]) -> list[str]:
counter = 0
for line in lines[::-1]:
if line_is_empty_row(line):
counter -= 1
else:
break
return lines[:counter]
result = remove_empty_rows_from_end(lines)
"""
imp2 = """
def remove_end_elements(my_list):
if not my_list or not line_is_empty_row(my_list[-1]):
return my_list
for i, line in enumerate(my_list[::-1]):
if not line_is_empty_row(line):
return my_list[:-i]
return []
result = remove_end_elements(lines)
"""
imp3 = """
def rstrip(lst):
enum = reversed(list(enumerate(lst, start=1)))
upto = next((i for i, line in enum if not line_is_empty_row(line)), 0)
return lst[:upto]
result = rstrip(lines)
"""
imp4 = """
from itertools import groupby
def list_rstrip(my_list):
line, i = next(groupby(reversed(my_list)))
return my_list[:-len(list(i))] if line_is_empty_row(line) else my_list
result = list_rstrip(lines)
"""
imp5 = """
def remove_end_elements(mylist):
while mylist and line_is_empty_row(mylist[-1]):
mylist.pop()
return mylist
result = remove_end_elements(lines)
"""
num = 10000
print("Kelly Bundy", timeit.timeit(statement + imp5, number=num))
print("Guy", timeit.timeit(statement + imp2, number=num))
print("mine", timeit.timeit(statement + imp1, number=num))
print("cards", timeit.timeit(statement + imp4, number=num))
print("Timeless", timeit.timeit(statement + imp3, number=num))