Counter for consecutive negative values in a Data-frame
Question:
I need to implement a counter, which does the counting as shown in the below OUTPUT. It checks the past values of "data" column for negative values.
data output
0 -1 Nan // since there are no past values for data: count=NaN
1 -2 1 //-1, so count= 1
2 4 2 //-2,-1 count=2
3 12 0 // count=0
4 -22 0 // count=0
5 -12 1 //-22 count=1
6 -7 2 // -22,-12 count=2
7 -5 3 // -7,-22,-12 count=3
8 -33 4 // -5,7,-22,-12 count=4
9 2 5 // -33,-5,7,-22,-12 count=5
10 2 1 // count=0
MY CODE
import pandas as pd
import talib
import numpy as np
df=pd.DataFrame()
df["data"]=[-1,-2,4,12,-22,-12,-7,-5,-33,2,2]
print(df)
c=0
for y in [0,len(ff)-1] :
for z in [1,10]:
if (ff["data"].shift(-z)).any()<=0:c=c+1
else:c
if (ff["data"].shift(-z)).any()>0:break
count["dd"]=c
OUTPUT needed:
Answers:
I am pretty unsure how to write the "Nan" (not very great myself), but here is a code that seems to do what you asked for:
df = pd.DataFrame()
df["data"] = [-1, -2, 4, 12, -22, -12, -7, -5, -22, 2, 2]
def generateOutput(df):
a = [0]
for i in range(len(df) - 1):
if df["data"][i] < 0:
a.append(a[-1] + 1)
else:
a.append(0)
df["output"] = a
return df
print(df)
df = generateOutput(df)
print(df)
And here is my output when launched the program
data
0 -1
1 -2
2 4
3 12
4 -22
5 -12
6 -7
7 -5
8 -22
9 2
10 2
data output
0 -1 0
1 -2 1
2 4 2
3 12 0
4 -22 0
5 -12 1
6 -7 2
7 -5 3
8 -22 4
9 2 5
10 2 0
One-liner:
df.data.lt(0).groupby(df.data.lt(0).diff().ne(0).cumsum()).cumsum().shift()
Expanded version:
import pandas as pd
df = pd.DataFrame()
df["data"] = [-1, -2, 4, 12, -22, -12, -7, -5, -33, 2, 2]
subzero = df.data < 0 # == df.data.lt(0)
# 0 True
# 1 True
# 2 False
# 3 False
# 4 True
# 5 True
# 6 True
# 7 True
# 8 True
# 9 False
# 10 False
# Name: data, dtype: bool
# We need the `cumsum` of subzero,
# but it should be calculated for each True group separately.
# The following array can be used to group consecutive boolean elements.
by = subzero.diff().cumsum()
# 0 NaN
# 1 0.0
# 2 1.0
# 3 1.0
# 4 2.0
# 5 2.0
# 6 2.0
# 7 2.0
# 8 2.0
# 9 3.0
# 10 3.0
# Name: data, dtype: object
# Decide about the group of the first element.
# (The `.ne(0)` in the one-liner does the same job)
by[0] = 0.0 if by[1] == 0.0 else -1.0
result = subzero.groupby(by).cumsum().shift(1)
# 0 NaN
# 1 1.0
# 2 2.0
# 3 0.0
# 4 0.0
# 5 1.0
# 6 2.0
# 7 3.0
# 8 4.0
# 9 5.0
# 10 0.0
# Name: data, dtype: float64
I need to implement a counter, which does the counting as shown in the below OUTPUT. It checks the past values of "data" column for negative values.
data output
0 -1 Nan // since there are no past values for data: count=NaN
1 -2 1 //-1, so count= 1
2 4 2 //-2,-1 count=2
3 12 0 // count=0
4 -22 0 // count=0
5 -12 1 //-22 count=1
6 -7 2 // -22,-12 count=2
7 -5 3 // -7,-22,-12 count=3
8 -33 4 // -5,7,-22,-12 count=4
9 2 5 // -33,-5,7,-22,-12 count=5
10 2 1 // count=0
MY CODE
import pandas as pd
import talib
import numpy as np
df=pd.DataFrame()
df["data"]=[-1,-2,4,12,-22,-12,-7,-5,-33,2,2]
print(df)
c=0
for y in [0,len(ff)-1] :
for z in [1,10]:
if (ff["data"].shift(-z)).any()<=0:c=c+1
else:c
if (ff["data"].shift(-z)).any()>0:break
count["dd"]=c
OUTPUT needed:
I am pretty unsure how to write the "Nan" (not very great myself), but here is a code that seems to do what you asked for:
df = pd.DataFrame()
df["data"] = [-1, -2, 4, 12, -22, -12, -7, -5, -22, 2, 2]
def generateOutput(df):
a = [0]
for i in range(len(df) - 1):
if df["data"][i] < 0:
a.append(a[-1] + 1)
else:
a.append(0)
df["output"] = a
return df
print(df)
df = generateOutput(df)
print(df)
And here is my output when launched the program
data
0 -1
1 -2
2 4
3 12
4 -22
5 -12
6 -7
7 -5
8 -22
9 2
10 2
data output
0 -1 0
1 -2 1
2 4 2
3 12 0
4 -22 0
5 -12 1
6 -7 2
7 -5 3
8 -22 4
9 2 5
10 2 0
One-liner:
df.data.lt(0).groupby(df.data.lt(0).diff().ne(0).cumsum()).cumsum().shift()
Expanded version:
import pandas as pd
df = pd.DataFrame()
df["data"] = [-1, -2, 4, 12, -22, -12, -7, -5, -33, 2, 2]
subzero = df.data < 0 # == df.data.lt(0)
# 0 True
# 1 True
# 2 False
# 3 False
# 4 True
# 5 True
# 6 True
# 7 True
# 8 True
# 9 False
# 10 False
# Name: data, dtype: bool
# We need the `cumsum` of subzero,
# but it should be calculated for each True group separately.
# The following array can be used to group consecutive boolean elements.
by = subzero.diff().cumsum()
# 0 NaN
# 1 0.0
# 2 1.0
# 3 1.0
# 4 2.0
# 5 2.0
# 6 2.0
# 7 2.0
# 8 2.0
# 9 3.0
# 10 3.0
# Name: data, dtype: object
# Decide about the group of the first element.
# (The `.ne(0)` in the one-liner does the same job)
by[0] = 0.0 if by[1] == 0.0 else -1.0
result = subzero.groupby(by).cumsum().shift(1)
# 0 NaN
# 1 1.0
# 2 2.0
# 3 0.0
# 4 0.0
# 5 1.0
# 6 2.0
# 7 3.0
# 8 4.0
# 9 5.0
# 10 0.0
# Name: data, dtype: float64