Is it possible to improve my zigzag indicator loop?
Question:
I have read on SO and replicated an indicator for stock prices that works as intended. It’s called ZigZag and projects peaks and valleys on historical prices. I pass a pandas dataframe with OHLC prices to my ZigZag class.
My problem is that it runs really slow, I have about 70 stocks with 10k rows each. takes about a minute and a half, but the dataset and number of stocks are increasing rapidly.
Can you see anything that could improve first of all execution speed?
Other comments also welcome.
class Zigzag:
def __init__(self, timeseries, swingthresh = 0.03):
self.timeseries = timeseries
self.swingthresh = swingthresh #0.03 default, can pass value to change zigzag indikator
self.upswing = None
self.zigzag = []
def add_row(self, list):
self.zigzag.append(list)
self.upswing = True if list[-1] == 'high' else False
def update_row(self, timestamp, price):
#uppdates last row
self.zigzag[-1][0], self.zigzag[-1][1] = timestamp, price
def first_swing(self):
lasthigh, lastlow = self.timeseries.iat[0,1], self.timeseries.iat[0,2]
for index, row in self.timeseries.iterrows():
row_high, row_low = row['high'], row['low']
high = row['high'] > lastlow + (lastlow * self.swingthresh)
low = row['low'] < lasthigh - (lasthigh * self.swingthresh)
if high & low:
lasthigh = row_high
lastlow = row_low
else:
self.upswing = True if high ==True else False
return
def get_swings(self):
self.first_swing()
#Config
#Sätter de första swingarna i tidsserien
lasthigh, lastlow = self.timeseries.iat[0,1], self.timeseries.iat[0,2]
#setting first order based on first swing
if self.upswing == True:
self.add_row([self.timeseries.index[0], lastlow, 'low'])
self.add_row([self.timeseries.index[0], lasthigh, 'high'])
else:
self.add_row([self.timeseries.index[0], lasthigh, 'high'])
self.add_row([self.timeseries.index[0], lastlow, 'low'])
for index, row in self.timeseries.iterrows():
row_timestamp = index
row_high = row['high']
row_low = row['low']
if self.upswing == True:
if row_high > lasthigh:
self.update_row(index, row_high)
lasthigh = row_high
# if index != self.timeseries.index[0]:
if row_low <= lasthigh - (lasthigh * self.swingthresh):
#if new swinglow, add new row
self.add_row([row_timestamp, row_low, 'low'])
lastlow = row_low
else:
#in downswing
if row_low < lastlow:
self.update_row(index, row_low)
lastlow = row_low
# if index != self.timeseries.index[0]:
if row_high >= lastlow + (lastlow * self.swingthresh):
self.add_row([row_timestamp, row_high, 'high'])
lasthigh = row_high
return self.zigzag
Answers:
So i found that itertuples() make this go WAY faster.
Another day of data recorded and iterrows() took 1 minute 39 seconds. Change to itertuples() and got to around 6 seconds!
I found this code on stack overflow and decided to run it using EURUSD OHLC data from yfinance, but I don’t get an output when I run it in google collab.
I have read on SO and replicated an indicator for stock prices that works as intended. It’s called ZigZag and projects peaks and valleys on historical prices. I pass a pandas dataframe with OHLC prices to my ZigZag class.
My problem is that it runs really slow, I have about 70 stocks with 10k rows each. takes about a minute and a half, but the dataset and number of stocks are increasing rapidly.
Can you see anything that could improve first of all execution speed?
Other comments also welcome.
class Zigzag:
def __init__(self, timeseries, swingthresh = 0.03):
self.timeseries = timeseries
self.swingthresh = swingthresh #0.03 default, can pass value to change zigzag indikator
self.upswing = None
self.zigzag = []
def add_row(self, list):
self.zigzag.append(list)
self.upswing = True if list[-1] == 'high' else False
def update_row(self, timestamp, price):
#uppdates last row
self.zigzag[-1][0], self.zigzag[-1][1] = timestamp, price
def first_swing(self):
lasthigh, lastlow = self.timeseries.iat[0,1], self.timeseries.iat[0,2]
for index, row in self.timeseries.iterrows():
row_high, row_low = row['high'], row['low']
high = row['high'] > lastlow + (lastlow * self.swingthresh)
low = row['low'] < lasthigh - (lasthigh * self.swingthresh)
if high & low:
lasthigh = row_high
lastlow = row_low
else:
self.upswing = True if high ==True else False
return
def get_swings(self):
self.first_swing()
#Config
#Sätter de första swingarna i tidsserien
lasthigh, lastlow = self.timeseries.iat[0,1], self.timeseries.iat[0,2]
#setting first order based on first swing
if self.upswing == True:
self.add_row([self.timeseries.index[0], lastlow, 'low'])
self.add_row([self.timeseries.index[0], lasthigh, 'high'])
else:
self.add_row([self.timeseries.index[0], lasthigh, 'high'])
self.add_row([self.timeseries.index[0], lastlow, 'low'])
for index, row in self.timeseries.iterrows():
row_timestamp = index
row_high = row['high']
row_low = row['low']
if self.upswing == True:
if row_high > lasthigh:
self.update_row(index, row_high)
lasthigh = row_high
# if index != self.timeseries.index[0]:
if row_low <= lasthigh - (lasthigh * self.swingthresh):
#if new swinglow, add new row
self.add_row([row_timestamp, row_low, 'low'])
lastlow = row_low
else:
#in downswing
if row_low < lastlow:
self.update_row(index, row_low)
lastlow = row_low
# if index != self.timeseries.index[0]:
if row_high >= lastlow + (lastlow * self.swingthresh):
self.add_row([row_timestamp, row_high, 'high'])
lasthigh = row_high
return self.zigzag
So i found that itertuples() make this go WAY faster.
Another day of data recorded and iterrows() took 1 minute 39 seconds. Change to itertuples() and got to around 6 seconds!
I found this code on stack overflow and decided to run it using EURUSD OHLC data from yfinance, but I don’t get an output when I run it in google collab.