Month column in dataframe, plus number of months, to compare with current calendar month
Question:
A column in dataframe looks like month, I want to use it to plus a month, as a ‘future’ month, then to compare this ‘future’ month with current (calendar) month.
import pandas as pd
from io import StringIO
import numpy as np
from datetime import datetime
csvfile = StringIO(
"""Name Year - Month Score
Mike 2022-11 31
Mike 2022-09 136
""")
df = pd.read_csv(csvfile, sep = 't', engine='python')
d_name_plus_month = {"Mike":2}
month_of_first_row = pd.to_datetime(df.iloc[[0]]['Year - Month']).values.astype("datetime64[M]")
plus_months = d_name_plus_month['Mike']
scheduled_month = month_of_first_row + int(plus_months)
# scheduled_month_in_string = scheduled_month.astype('str')
current_month = datetime.now().strftime("%Y") +'-' +datetime.now().strftime("%m") # it's string
current_month = np.array(current_month)
print (scheduled_month <= current_month)
# month_of_first_row: 2022-11
# scheduled_month: 2023-01
# current_month: 2023-02
# so "scheduled_month" is earlier than "current_month".
But it has error:
TypeError: '<=' not supported between instances of 'numpy.ndarray' and 'numpy.ndarray'
I’ve tried to alert the lines to make them into string for compare, but not successful.
How can I correct the lines?
Answers:
I suggest use month periods by Serie.dt.to_period
for easy add/ remove months by integers:
d_name_plus_month = {"Mike":2}
month_of_first_row = pd.to_datetime(df['Year - Month'].iat[0]).to_period('m')
print (month_of_first_row)
2022-11
plus_months = d_name_plus_month['Mike']
scheduled_month = month_of_first_row + int(plus_months)
current_month = pd.Timestamp.now().to_period('m')
print (current_month)
2023-02
print (scheduled_month <= current_month)
True
You can convert both scheduled_month
and current_month
to strings before comparison
scheduled_month_str = scheduled_month.astype('datetime64[M]').astype(str)
current_month_str = datetime.now().strftime("%Y-%m")
print(scheduled_month_str <= current_month_str)
month_of_first_row is an array with one value, you can add a [0] to convert it to get a single datetime object
month_of_first_row = pd.to_datetime(df.iloc[[0]]['Year - Month']).values.astype("datetime64[M]")[0]
And then convert the current month to a datetime objetc as well instead of a numpy array
# no: current_month = np.array(current_month)
current_month = pd.to_datetime(current_month) # yes
seems like right now
current month is np.array('2023-02', dtype='<U7')
and scheduled month is np.array(['2023-01'], dtype='datetime64[M]')
using this row will bring them to the same type:
current_month =np.array([current_month], dtype='datetime64[M]')
which should solve the error.
full code:
import pandas as pd
from io import StringIO
import numpy as np
from datetime import datetime
csvfile = StringIO(
"""Name,Year - Month,Score
Mike,2022-11,31
Mike,2022-09,136
""")
df = pd.read_csv(csvfile, sep = ',', engine='python')
d_name_plus_month = {"Mike":2}
month_of_first_row = pd.to_datetime(df.iloc[[0]]['Year - Month']).values.astype("datetime64[M]")
plus_months = d_name_plus_month['Mike']
scheduled_month = month_of_first_row + int(plus_months)
# scheduled_month_in_string = scheduled_month.astype('str')
current_month = datetime.now().strftime("%Y") +'-' +datetime.now().strftime("%m") # it's string
current_month =np.array([current_month], dtype='datetime64[M]')
print (scheduled_month <= current_month)
A column in dataframe looks like month, I want to use it to plus a month, as a ‘future’ month, then to compare this ‘future’ month with current (calendar) month.
import pandas as pd
from io import StringIO
import numpy as np
from datetime import datetime
csvfile = StringIO(
"""Name Year - Month Score
Mike 2022-11 31
Mike 2022-09 136
""")
df = pd.read_csv(csvfile, sep = 't', engine='python')
d_name_plus_month = {"Mike":2}
month_of_first_row = pd.to_datetime(df.iloc[[0]]['Year - Month']).values.astype("datetime64[M]")
plus_months = d_name_plus_month['Mike']
scheduled_month = month_of_first_row + int(plus_months)
# scheduled_month_in_string = scheduled_month.astype('str')
current_month = datetime.now().strftime("%Y") +'-' +datetime.now().strftime("%m") # it's string
current_month = np.array(current_month)
print (scheduled_month <= current_month)
# month_of_first_row: 2022-11
# scheduled_month: 2023-01
# current_month: 2023-02
# so "scheduled_month" is earlier than "current_month".
But it has error:
TypeError: '<=' not supported between instances of 'numpy.ndarray' and 'numpy.ndarray'
I’ve tried to alert the lines to make them into string for compare, but not successful.
How can I correct the lines?
I suggest use month periods by Serie.dt.to_period
for easy add/ remove months by integers:
d_name_plus_month = {"Mike":2}
month_of_first_row = pd.to_datetime(df['Year - Month'].iat[0]).to_period('m')
print (month_of_first_row)
2022-11
plus_months = d_name_plus_month['Mike']
scheduled_month = month_of_first_row + int(plus_months)
current_month = pd.Timestamp.now().to_period('m')
print (current_month)
2023-02
print (scheduled_month <= current_month)
True
You can convert both scheduled_month
and current_month
to strings before comparison
scheduled_month_str = scheduled_month.astype('datetime64[M]').astype(str)
current_month_str = datetime.now().strftime("%Y-%m")
print(scheduled_month_str <= current_month_str)
month_of_first_row is an array with one value, you can add a [0] to convert it to get a single datetime object
month_of_first_row = pd.to_datetime(df.iloc[[0]]['Year - Month']).values.astype("datetime64[M]")[0]
And then convert the current month to a datetime objetc as well instead of a numpy array
# no: current_month = np.array(current_month)
current_month = pd.to_datetime(current_month) # yes
seems like right now
current month is np.array('2023-02', dtype='<U7')
and scheduled month is np.array(['2023-01'], dtype='datetime64[M]')
using this row will bring them to the same type:
current_month =np.array([current_month], dtype='datetime64[M]')
which should solve the error.
full code:
import pandas as pd
from io import StringIO
import numpy as np
from datetime import datetime
csvfile = StringIO(
"""Name,Year - Month,Score
Mike,2022-11,31
Mike,2022-09,136
""")
df = pd.read_csv(csvfile, sep = ',', engine='python')
d_name_plus_month = {"Mike":2}
month_of_first_row = pd.to_datetime(df.iloc[[0]]['Year - Month']).values.astype("datetime64[M]")
plus_months = d_name_plus_month['Mike']
scheduled_month = month_of_first_row + int(plus_months)
# scheduled_month_in_string = scheduled_month.astype('str')
current_month = datetime.now().strftime("%Y") +'-' +datetime.now().strftime("%m") # it's string
current_month =np.array([current_month], dtype='datetime64[M]')
print (scheduled_month <= current_month)