Calculate source value from deviation of Bollinger Bands with Python and Pandas

Question:

I am calculating the standard deviation of the rolling mean (Bollinger Bands, example here is very simplified) in a pandas dataframe like this:

import pandas as pd
import numpy as np

no_of_std = 3
window = 20

df = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31]})

rolling_mean = df['A'].rolling(window).mean()
rolling_std = df['A'].rolling(window).std(ddof=0)

df['M'] = rolling_mean
df['BBL'] = rolling_mean - (rolling_std * no_of_std)
df['BBH'] = rolling_mean + (rolling_std * no_of_std)

print (df)

The result looks like this:

       A      M        BBL        BBH
0   34.0    NaN        NaN        NaN
1   34.0    NaN        NaN        NaN
2   34.0    NaN        NaN        NaN
3   33.0    NaN        NaN        NaN
4   32.0    NaN        NaN        NaN
5   34.0    NaN        NaN        NaN
6   35.0    NaN        NaN        NaN
7   21.0    NaN        NaN        NaN
8   22.0    NaN        NaN        NaN
9   25.0    NaN        NaN        NaN
10  23.0    NaN        NaN        NaN
11  21.0    NaN        NaN        NaN
12  39.0    NaN        NaN        NaN
13  26.0    NaN        NaN        NaN
14  31.0    NaN        NaN        NaN
15  34.0    NaN        NaN        NaN
16  38.0    NaN        NaN        NaN
17  26.0    NaN        NaN        NaN
18  21.0    NaN        NaN        NaN
19  39.0  30.10  11.633544  48.566456
20  31.0  29.95  11.665375  48.234625

Now i want to calculate in the other direction which value the last value in the column ‘A’ needs to have to hit exactly the 3rd standard deviation of the rolling mean.
That means in other words i want to calculate: which value needs A to have in a next row nr.15 that it will be exactly the same as the value in BBH or BBL.
I can do this by recursive approximation but this needs a lot of perfomance and i think there must be a better way. Here is an example for the solution from which i think it is to slow and there must be a better faster way:

import pandas as pd


odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31]})

def get_last_bbh_bbl(idf):
    xdf = idf.copy()
    no_of_std = 3
    window = 20
    rolling_mean = xdf['A'].rolling(window).mean()
    rolling_std = xdf['A'].rolling(window).std()
    xdf['M'] = rolling_mean
    xdf['BBL'] = rolling_mean - (rolling_std * no_of_std)
    xdf['BBH'] = rolling_mean + (rolling_std * no_of_std)
    bbh = xdf.loc[len(xdf) - 1, 'BBH']
    bbl = xdf.loc[len(xdf) - 1, 'BBL']
    return bbh, bbl

def search_matching_value(idf, low, high, search_for):
    xdf = idf.copy()
    if abs(high-low) < 0.000001:
        return high

    middle = low + ((high-low)/2)
    xdf = xdf.append({'A' : middle}, ignore_index=True)
    bbh, bbl = get_last_bbh_bbl(xdf)
    if search_for == 'bbh':
        if bbh < middle:
            result=search_matching_value(idf, low, middle, search_for)
        elif bbh > middle:
            result=search_matching_value(idf, middle, high, search_for)
        else:
            return middle
    elif search_for == 'bbl':
        if bbl > middle:
            result=search_matching_value(idf, middle, high, search_for)
        elif bbl < middle:
            result=search_matching_value(idf, low, middle, search_for)
        else:
            return middle
    return result

actual_bbh, actual_bbl = get_last_bbh_bbl(odf)
last_value = odf.loc[len(odf) - 1, 'A']
print('last_value: {}, actual bbh: {}, actual bbl: {}'.format(last_value, actual_bbh, actual_bbl))
low = last_value
high = actual_bbh * 10
next_value_that_hits_bbh = search_matching_value(odf, low, high, 'bbh')
print ('next_value_that_hits_bbh: {}'.format(next_value_that_hits_bbh))
low=0
high=last_value
next_value_that_hits_bbl = search_matching_value(odf, low, high, 'bbl')
print ('next_value_that_hits_bbl: {}'.format(next_value_that_hits_bbl))

the result looks like this:

 last_value: 31.0, actual bbh: 48.709629106422284, actual bbl: 11.190370893577711
 next_value_that_hits_bbh: 57.298733206475276
 next_value_that_hits_bbl: 2.174952656030655
Asked By: Egirus Ornila

||

Answers:

here one solution to calculate next value with fast algorithm: newton opt and newton classic are faster than dichotomy and this solution dont use dataframe to recalculate the different value, i use directly the statistic function from the library of same name

some info for scipy.optimize.newton

from scipy import misc
import pandas as pd
import statistics
from scipy.optimize import newton
#scipy.optimize if you want to test the newton optimized function

def get_last_bbh_bbl(idf):
    xdf = idf.copy()
    rolling_mean = xdf['A'].rolling(window).mean()
    rolling_std = xdf['A'].rolling(window).std()
    xdf['M'] = rolling_mean
    xdf['BBL'] = rolling_mean - (rolling_std * no_of_std)
    xdf['BBH'] = rolling_mean + (rolling_std * no_of_std)
    bbh = xdf.loc[len(xdf) - 1, 'BBH']
    bbl = xdf.loc[len(xdf) - 1, 'BBL']
    lastvalue = xdf.loc[len(xdf) - 1, 'A']
    return lastvalue, bbh, bbl

#classic newton
def NewtonsMethod(f, x, tolerance=0.00000001):
    while True:
        x1 = x - f(x) / misc.derivative(f, x)
        t = abs(x1 - x)
        if t < tolerance:
            break
        x = x1
    return x

#to calculate the result of function bbl(x) - x (we want 0!)
def low(x):
    l = lastlistofvalue[:-1]
    l.append(x)
    avg = statistics.mean(l)
    std = statistics.stdev(l, avg)
    return avg - std * no_of_std - x

#to calculate the result of function bbh(x) - x (we want 0!)
def high(x):
    l = lastlistofvalue[:-1]
    l.append(x)
    avg = statistics.mean(l)
    std = statistics.stdev(l, avg)
    return avg + std * no_of_std - x

odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31]})
no_of_std = 3
window = 20
lastlistofvalue = odf['A'].shift(0).to_list()[::-1][:window]

"""" Newton classic method """
x = odf.loc[len(odf) - 1, 'A']
x0 = NewtonsMethod(high, x)
print(f'value to hit bbh: {x0}')
odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31, x0]})
lastvalue, new_bbh, new_bbl = get_last_bbh_bbl(odf)
print(f'value to hit bbh: {lastvalue} -> check new bbh: {new_bbh}')

x0 = NewtonsMethod(low, x)
print(f'value to hit bbl: {x0}')
odf = pd.DataFrame({'A': [34, 34, 34, 33, 32, 34, 35.0, 21, 22, 25, 23, 21, 39, 26, 31, 34, 38, 26, 21, 39, 31, x0]})
lastvalue, new_bbh, new_bbl = get_last_bbh_bbl(odf)
print(f'value to hit bbl: {lastvalue} -> check new bbl: {new_bbl}')

output:

value to hit bbh: 57.298732375228624
value to hit bbh: 57.298732375228624 -> check new bbh: 57.29873237527272
value to hit bbl: 2.1749518354059636
value to hit bbl: 2.1749518354059636 -> check new bbl: 2.1749518353102992

you could compare the newton optimized like:

""" Newton optimized method """
x = odf.loc[len(odf) - 1, 'A']
x0 = newton(high, x, fprime=None, args=(), tol=1.00e-08, maxiter=50, fprime2=None)
print(f'Newton opt value to hit bbh: {x0}')

x0 = newton(low, x, fprime=None, args=(), tol=1.48e-08, maxiter=50, fprime2=None)
print(f'Newton value to hit bbl: {x0}')

output:

Newton opt value to hit bbh: 57.29873237532118
Newton value to hit bbl: 2.1749518352051225

with the newton optimized, you could play with the max iteration

and optimized is faster than classic:

measures for each calculus

0.002 sec for optimized

0.005 sec for classic

*Remarks: *

if you use rolling(window).std() you are using the standard deviation so you have to use

std = statistics.stdev(l, avg) you divide by N-1 items

if you use rolling(window).std(ddof=0) you are using the population deviation so you have to use

std = statistics.pstdev(l, avg) you divide by N items

Answered By: Frenchy
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.