How to prevent weird plotting of plotly go when plotting multiple time series that are measured in different dates?
Question:
I have a dataset with date_time, date, time, and a VALUE1 column that shows measurement values of each time point. For the same ID, there are multiple measurements over a day. Besides, there are 6 different 24 hour measurements for an ID, which is shown in INSPECTION column. When I would like to see multiple measurements all together overlaid (only the time in x-axis without date info and VALUE1 on the y axis), the plots looks quite messy. #
Below, you can find the sample data and my attempt in the commented part at the end before fig.show part.:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from plotly.subplots import make_subplots
from dash import Dash, html, dcc, Input, Output, dash_table, no_update
import plotly.graph_objects as go
random.seed(0)
df = pd.DataFrame({'DATE_TIME': pd.date_range('2022-11-01', '2022-11-06 23:00:00', freq='20min'),
'ID': [random.randrange(1, 3) for n in range(430)]})
df['VALUE1'] = [random.uniform(110, 160) for n in range(430)]
df['VALUE2'] = [random.uniform(50, 80) for n in range(430)]
df['INSPECTION'] = df['DATE_TIME'].dt.day
df['MODE'] = np.select([df['INSPECTION'] == 1, df['INSPECTION'].isin([2, 3])], ['A', 'B'], 'C')
df['TIME'] = df['DATE_TIME'].dt.time
df['TIME'] = df['TIME'].astype('str')
df['TIMEINTERVAL'] = df.DATE_TIME.diff().astype('timedelta64[m]')
df['TIMEINTERVAL'] = df['TIMEINTERVAL'].fillna(0)
def to_day_period(s):
bins = ['0', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00']
labels = ['Nighttime', 'Daytime', 'Daytime', 'Nighttime', 'Nighttime']
return pd.cut(
pd.to_timedelta(s),
bins=list(map(pd.Timedelta, bins)),
labels=labels, right=False, ordered=False
)
df['TIME_OF_DAY'] = to_day_period(df['TIME'])
df_monthly = df
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Interactive Plot +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#extract color palette, the palette can be changed
df_id = df[df.ID==1]
list_scode = list(set(df_id['ID'])) # ID = STATION CODE ?
list_scode = list(range(0,7))
lst_VISIT = [i for i in df_id.INSPECTION] # district = visit ?
#print(lst_VISIT)
list_district = list(set(lst_VISIT))
pal = list(sns.color_palette(palette='viridis', n_colors=len(list_scode)).as_hex())
df_monthly = df_id # df_id_29 # df_id_29_visit_1
fig = go.Figure()
for d,p in zip(list_district, pal):
fig.add_trace(go.Scatter(x = df_monthly[df_monthly['INSPECTION']==d]['TIME'],
y = df_monthly[df_monthly['INSPECTION']==d]['VALUE1'],
name = d,
line_color = p,
fill=None)) #tozeroy
# fig.update_xaxes(type='multicategory')
# fig.update_xaxes(nticks=5)
fig.show()
I tried limiting the number of ticks on the x axis, but it did not work. Also, I tried updating x axis, by selecting its category (fig.update_xaxes(type='multicategory')
), but this did not work as well. How can I fix this issue? My expectation is to see all 6 inspections together shown nicely. on the x-axis, there should be TIME column and on the y-axis, there should be the VALUE1 column .
Answers:
This may be because Plotly automatically assumed that the columns in the time series format were time series. In addition, add a new column for the x-axis TIME column as a categorical variable code. The chart will be generated based on this new column. Other approaches may be available.
df_monthly = df_monthly.copy()
df_monthly['TIME_code'] = df_monthly['TIME'].astype('category').cat.codes
fig = go.Figure()
for d,p in zip(list_district, pal):
dff = df_monthly.query('INSPECTION == @d')
dff = dff.sort_values('TIME_code', ascending=True)
fig.add_trace(go.Scatter(x = dff['TIME_code'],
y = dff['VALUE1'],
name = d,
line_color = p,
fill=None)) #tozeroy
fig.update_xaxes(tickvals=np.arange(0,71,1), ticktext=sorted(df_monthly['TIME'].unique()))
fig.show()
I have a dataset with date_time, date, time, and a VALUE1 column that shows measurement values of each time point. For the same ID, there are multiple measurements over a day. Besides, there are 6 different 24 hour measurements for an ID, which is shown in INSPECTION column. When I would like to see multiple measurements all together overlaid (only the time in x-axis without date info and VALUE1 on the y axis), the plots looks quite messy. #
Below, you can find the sample data and my attempt in the commented part at the end before fig.show part.:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from plotly.subplots import make_subplots
from dash import Dash, html, dcc, Input, Output, dash_table, no_update
import plotly.graph_objects as go
random.seed(0)
df = pd.DataFrame({'DATE_TIME': pd.date_range('2022-11-01', '2022-11-06 23:00:00', freq='20min'),
'ID': [random.randrange(1, 3) for n in range(430)]})
df['VALUE1'] = [random.uniform(110, 160) for n in range(430)]
df['VALUE2'] = [random.uniform(50, 80) for n in range(430)]
df['INSPECTION'] = df['DATE_TIME'].dt.day
df['MODE'] = np.select([df['INSPECTION'] == 1, df['INSPECTION'].isin([2, 3])], ['A', 'B'], 'C')
df['TIME'] = df['DATE_TIME'].dt.time
df['TIME'] = df['TIME'].astype('str')
df['TIMEINTERVAL'] = df.DATE_TIME.diff().astype('timedelta64[m]')
df['TIMEINTERVAL'] = df['TIMEINTERVAL'].fillna(0)
def to_day_period(s):
bins = ['0', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00']
labels = ['Nighttime', 'Daytime', 'Daytime', 'Nighttime', 'Nighttime']
return pd.cut(
pd.to_timedelta(s),
bins=list(map(pd.Timedelta, bins)),
labels=labels, right=False, ordered=False
)
df['TIME_OF_DAY'] = to_day_period(df['TIME'])
df_monthly = df
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Interactive Plot +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#extract color palette, the palette can be changed
df_id = df[df.ID==1]
list_scode = list(set(df_id['ID'])) # ID = STATION CODE ?
list_scode = list(range(0,7))
lst_VISIT = [i for i in df_id.INSPECTION] # district = visit ?
#print(lst_VISIT)
list_district = list(set(lst_VISIT))
pal = list(sns.color_palette(palette='viridis', n_colors=len(list_scode)).as_hex())
df_monthly = df_id # df_id_29 # df_id_29_visit_1
fig = go.Figure()
for d,p in zip(list_district, pal):
fig.add_trace(go.Scatter(x = df_monthly[df_monthly['INSPECTION']==d]['TIME'],
y = df_monthly[df_monthly['INSPECTION']==d]['VALUE1'],
name = d,
line_color = p,
fill=None)) #tozeroy
# fig.update_xaxes(type='multicategory')
# fig.update_xaxes(nticks=5)
fig.show()
I tried limiting the number of ticks on the x axis, but it did not work. Also, I tried updating x axis, by selecting its category (fig.update_xaxes(type='multicategory')
), but this did not work as well. How can I fix this issue? My expectation is to see all 6 inspections together shown nicely. on the x-axis, there should be TIME column and on the y-axis, there should be the VALUE1 column .
This may be because Plotly automatically assumed that the columns in the time series format were time series. In addition, add a new column for the x-axis TIME column as a categorical variable code. The chart will be generated based on this new column. Other approaches may be available.
df_monthly = df_monthly.copy()
df_monthly['TIME_code'] = df_monthly['TIME'].astype('category').cat.codes
fig = go.Figure()
for d,p in zip(list_district, pal):
dff = df_monthly.query('INSPECTION == @d')
dff = dff.sort_values('TIME_code', ascending=True)
fig.add_trace(go.Scatter(x = dff['TIME_code'],
y = dff['VALUE1'],
name = d,
line_color = p,
fill=None)) #tozeroy
fig.update_xaxes(tickvals=np.arange(0,71,1), ticktext=sorted(df_monthly['TIME'].unique()))
fig.show()