How to check if occurrences of identical consecutive numbers is below a threshold in pandas series
Question:
I need to check if the occurrences of identical consecutive numbers is below a certain threshold, e.g. maximal two same consecutive numbers.
pd.Series(data=[-1, -1, 2, -2, 2, -2, 1, 1]) # True
pd.Series(data=[-1, -1, -1, 2, 2, -2, 1, 1]) # False
Further checks:
Only the numbers +1
and -1
are allowed to occur as consecutive numbers with a maximum of two occurrences.
pd.Series(data=[-1, 1, -2, 2, -2, 2, -1, 1]) # True
pd.Series(data=[1, 1, -2, 2, -2, 2, -1, 1]) # True
pd.Series(data=[-1, -1, 2, 2, -2, 1, 1, -2]) # False
pd.Series(data=[-1, 1, -2, -2, 1, -1, 2, -2]) # False
Answers:
On option with groupby.size
and any
:
def check(s, N=2):
return s.groupby(s.ne(s.shift()).cumsum()).size().gt(N).any()
Or using a rolling
sum:
def check(s, N=2):
return s.rolling(N+1).apply(lambda x: x.eq(x.iloc[0]).sum()>N).any()
check(pd.Series(data=[-1, -1, 2, -2, 2, -2, 1, 1]))
# False
check(pd.Series(data=[-1, -1, -1, 2, 2, -2, 1, 1]))
# True
filtering from a whitelist:
def check(s, N=2):
return (s.where(s.isin([1, -1]))
.groupby(s.ne(s.shift()).cumsum())
.count().le(N).all()
)
check(pd.Series(data=[-1, 1, -2, 2, -2, 2, -1, 1]))
# True
check(pd.Series(data=[1, 1, -2, 2, -2, 2, -1, 1]))
# True
check(pd.Series(data=[-1, -1, 2, 2, -2, 1, 1, -2]))
# False
check(pd.Series(data=[-1, 1, -2, -2, 1, -1, 2, -2]))
# True
check(pd.Series(data=[-1, -1, -1, 2, -2, 1, 1, -2]))
# False
You can use the shift
method along with Boolean indexing to achieve this. The idea is to compare each element with the previous one, and if they are equal and not equal to +1 or -1, return False.
Here’s an example implementation:
def check_consecutive(series):
consecutive = (series == series.shift()).values
allowed = ((series == 1) | (series == -1)).values
return (consecutive & ~allowed).sum() <= 2
print(check_consecutive(pd.Series(data=[-1, -1, 2, -2, 2, -2, 1, 1]))) # True
print(check_consecutive(pd.Series(data=[-1, -1, -1, 2, 2, -2, 1, 1]))) # False
print(check_consecutive(pd.Series(data=[-1, 1, -2, 2, -2, 2, -1, 1]))) # True
print(check_consecutive(pd.Series(data=[1, 1, -2, 2, -2, 2, -1, 1]))) # True
print(check_consecutive(pd.Series(data=[-1, -1, 2, 2, -2, 1, 1, -2]))) # False
print(check_consecutive(pd.Series(data=[-1, 1, -2, -2, 1, -1, 2, -2]))) # False
I need to check if the occurrences of identical consecutive numbers is below a certain threshold, e.g. maximal two same consecutive numbers.
pd.Series(data=[-1, -1, 2, -2, 2, -2, 1, 1]) # True
pd.Series(data=[-1, -1, -1, 2, 2, -2, 1, 1]) # False
Further checks:
Only the numbers +1
and -1
are allowed to occur as consecutive numbers with a maximum of two occurrences.
pd.Series(data=[-1, 1, -2, 2, -2, 2, -1, 1]) # True
pd.Series(data=[1, 1, -2, 2, -2, 2, -1, 1]) # True
pd.Series(data=[-1, -1, 2, 2, -2, 1, 1, -2]) # False
pd.Series(data=[-1, 1, -2, -2, 1, -1, 2, -2]) # False
On option with groupby.size
and any
:
def check(s, N=2):
return s.groupby(s.ne(s.shift()).cumsum()).size().gt(N).any()
Or using a rolling
sum:
def check(s, N=2):
return s.rolling(N+1).apply(lambda x: x.eq(x.iloc[0]).sum()>N).any()
check(pd.Series(data=[-1, -1, 2, -2, 2, -2, 1, 1]))
# False
check(pd.Series(data=[-1, -1, -1, 2, 2, -2, 1, 1]))
# True
filtering from a whitelist:
def check(s, N=2):
return (s.where(s.isin([1, -1]))
.groupby(s.ne(s.shift()).cumsum())
.count().le(N).all()
)
check(pd.Series(data=[-1, 1, -2, 2, -2, 2, -1, 1]))
# True
check(pd.Series(data=[1, 1, -2, 2, -2, 2, -1, 1]))
# True
check(pd.Series(data=[-1, -1, 2, 2, -2, 1, 1, -2]))
# False
check(pd.Series(data=[-1, 1, -2, -2, 1, -1, 2, -2]))
# True
check(pd.Series(data=[-1, -1, -1, 2, -2, 1, 1, -2]))
# False
You can use the shift
method along with Boolean indexing to achieve this. The idea is to compare each element with the previous one, and if they are equal and not equal to +1 or -1, return False.
Here’s an example implementation:
def check_consecutive(series):
consecutive = (series == series.shift()).values
allowed = ((series == 1) | (series == -1)).values
return (consecutive & ~allowed).sum() <= 2
print(check_consecutive(pd.Series(data=[-1, -1, 2, -2, 2, -2, 1, 1]))) # True
print(check_consecutive(pd.Series(data=[-1, -1, -1, 2, 2, -2, 1, 1]))) # False
print(check_consecutive(pd.Series(data=[-1, 1, -2, 2, -2, 2, -1, 1]))) # True
print(check_consecutive(pd.Series(data=[1, 1, -2, 2, -2, 2, -1, 1]))) # True
print(check_consecutive(pd.Series(data=[-1, -1, 2, 2, -2, 1, 1, -2]))) # False
print(check_consecutive(pd.Series(data=[-1, 1, -2, -2, 1, -1, 2, -2]))) # False