How to parse dates with -0400 timezone string in Python?
Question:
I have a date string of the form ‘2009/05/13 19:19:30 -0400’. It seems that previous versions of Python may have supported a %z format tag in strptime for the trailing timezone specification, but 2.6.x seems to have removed that.
What’s the right way to parse this string into a datetime object?
Answers:
You can use the parse function from dateutil:
>>> from dateutil.parser import parse
>>> d = parse('2009/05/13 19:19:30 -0400')
>>> d
datetime.datetime(2009, 5, 13, 19, 19, 30, tzinfo=tzoffset(None, -14400))
This way you obtain a datetime object you can then use.
As answered, dateutil2.0 is written for Python 3.0 and does not work with Python 2.x. For Python 2.x dateutil1.5 needs to be used.
If you are on Linux, then you can use the external date
command to dwim:
import commands, datetime
def parsedate(text):
output=commands.getoutput('date -d "%s" +%%s' % text )
try:
stamp=eval(output)
except:
print output
raise
return datetime.datetime.frometimestamp(stamp)
This is of course less portable than dateutil, but slightly more flexible, because date
will also accept inputs like “yesterday” or “last year” 🙂
The problem with using dateutil is that you can’t have the same format string for both serialization and deserialization, as dateutil has limited formatting options (only dayfirst
and yearfirst
).
In my application, I store the format string in .INI file, and each deployment can have its own format. Thus, I really don’t like the dateutil approach.
Here’s an alternative method that uses pytz instead:
from datetime import datetime, timedelta
from pytz import timezone, utc
from pytz.tzinfo import StaticTzInfo
class OffsetTime(StaticTzInfo):
def __init__(self, offset):
"""A dumb timezone based on offset such as +0530, -0600, etc.
"""
hours = int(offset[:3])
minutes = int(offset[0] + offset[3:])
self._utcoffset = timedelta(hours=hours, minutes=minutes)
def load_datetime(value, format):
if format.endswith('%z'):
format = format[:-2]
offset = value[-5:]
value = value[:-5]
return OffsetTime(offset).localize(datetime.strptime(value, format))
return datetime.strptime(value, format)
def dump_datetime(value, format):
return value.strftime(format)
value = '2009/05/13 19:19:30 -0400'
format = '%Y/%m/%d %H:%M:%S %z'
assert dump_datetime(load_datetime(value, format), format) == value
assert datetime(2009, 5, 13, 23, 19, 30, tzinfo=utc)
.astimezone(timezone('US/Eastern')) == load_datetime(value, format)
%z
is supported in Python 3.2+:
>>> from datetime import datetime
>>> datetime.strptime('2009/05/13 19:19:30 -0400', '%Y/%m/%d %H:%M:%S %z')
datetime.datetime(2009, 5, 13, 19, 19, 30,
tzinfo=datetime.timezone(datetime.timedelta(-1, 72000)))
On earlier versions:
from datetime import datetime
date_str = '2009/05/13 19:19:30 -0400'
naive_date_str, _, offset_str = date_str.rpartition(' ')
naive_dt = datetime.strptime(naive_date_str, '%Y/%m/%d %H:%M:%S')
offset = int(offset_str[-4:-2])*60 + int(offset_str[-2:])
if offset_str[0] == "-":
offset = -offset
dt = naive_dt.replace(tzinfo=FixedOffset(offset))
print(repr(dt))
# -> datetime.datetime(2009, 5, 13, 19, 19, 30, tzinfo=FixedOffset(-240))
print(dt)
# -> 2009-05-13 19:19:30-04:00
where FixedOffset
is a class based on the code example from the docs:
from datetime import timedelta, tzinfo
class FixedOffset(tzinfo):
"""Fixed offset in minutes: `time = utc_time + utc_offset`."""
def __init__(self, offset):
self.__offset = timedelta(minutes=offset)
hours, minutes = divmod(offset, 60)
#NOTE: the last part is to remind about deprecated POSIX GMT+h timezones
# that have the opposite sign in the name;
# the corresponding numeric value is not used e.g., no minutes
self.__name = '<%+03d%02d>%+d' % (hours, minutes, -hours)
def utcoffset(self, dt=None):
return self.__offset
def tzname(self, dt=None):
return self.__name
def dst(self, dt=None):
return timedelta(0)
def __repr__(self):
return 'FixedOffset(%d)' % (self.utcoffset().total_seconds() / 60)
Here is a fix of the "%z"
issue for Python 2.7 and earlier
Instead of using:
datetime.strptime(t,'%Y-%m-%dT%H:%M %z')
Use the timedelta
to account for the timezone, like this:
from datetime import datetime,timedelta
def dt_parse(t):
ret = datetime.strptime(t[0:16],'%Y-%m-%dT%H:%M')
if t[18]=='+':
ret-=timedelta(hours=int(t[19:22]),minutes=int(t[23:]))
elif t[18]=='-':
ret+=timedelta(hours=int(t[19:22]),minutes=int(t[23:]))
return ret
Note that the dates would be converted to GMT
, which would allow doing date arithmetic without worrying about time zones.
One liner for old Pythons out there. You can multiply a timedelta by 1/-1 depending on +/- sign, as in:
datetime.strptime(s[:19], '%Y-%m-%dT%H:%M:%S') + timedelta(hours=int(s[20:22]), minutes=int(s[23:])) * (-1 if s[19] == '+' else 1)
I have a date string of the form ‘2009/05/13 19:19:30 -0400’. It seems that previous versions of Python may have supported a %z format tag in strptime for the trailing timezone specification, but 2.6.x seems to have removed that.
What’s the right way to parse this string into a datetime object?
You can use the parse function from dateutil:
>>> from dateutil.parser import parse
>>> d = parse('2009/05/13 19:19:30 -0400')
>>> d
datetime.datetime(2009, 5, 13, 19, 19, 30, tzinfo=tzoffset(None, -14400))
This way you obtain a datetime object you can then use.
As answered, dateutil2.0 is written for Python 3.0 and does not work with Python 2.x. For Python 2.x dateutil1.5 needs to be used.
If you are on Linux, then you can use the external date
command to dwim:
import commands, datetime
def parsedate(text):
output=commands.getoutput('date -d "%s" +%%s' % text )
try:
stamp=eval(output)
except:
print output
raise
return datetime.datetime.frometimestamp(stamp)
This is of course less portable than dateutil, but slightly more flexible, because date
will also accept inputs like “yesterday” or “last year” 🙂
The problem with using dateutil is that you can’t have the same format string for both serialization and deserialization, as dateutil has limited formatting options (only dayfirst
and yearfirst
).
In my application, I store the format string in .INI file, and each deployment can have its own format. Thus, I really don’t like the dateutil approach.
Here’s an alternative method that uses pytz instead:
from datetime import datetime, timedelta
from pytz import timezone, utc
from pytz.tzinfo import StaticTzInfo
class OffsetTime(StaticTzInfo):
def __init__(self, offset):
"""A dumb timezone based on offset such as +0530, -0600, etc.
"""
hours = int(offset[:3])
minutes = int(offset[0] + offset[3:])
self._utcoffset = timedelta(hours=hours, minutes=minutes)
def load_datetime(value, format):
if format.endswith('%z'):
format = format[:-2]
offset = value[-5:]
value = value[:-5]
return OffsetTime(offset).localize(datetime.strptime(value, format))
return datetime.strptime(value, format)
def dump_datetime(value, format):
return value.strftime(format)
value = '2009/05/13 19:19:30 -0400'
format = '%Y/%m/%d %H:%M:%S %z'
assert dump_datetime(load_datetime(value, format), format) == value
assert datetime(2009, 5, 13, 23, 19, 30, tzinfo=utc)
.astimezone(timezone('US/Eastern')) == load_datetime(value, format)
%z
is supported in Python 3.2+:
>>> from datetime import datetime
>>> datetime.strptime('2009/05/13 19:19:30 -0400', '%Y/%m/%d %H:%M:%S %z')
datetime.datetime(2009, 5, 13, 19, 19, 30,
tzinfo=datetime.timezone(datetime.timedelta(-1, 72000)))
On earlier versions:
from datetime import datetime
date_str = '2009/05/13 19:19:30 -0400'
naive_date_str, _, offset_str = date_str.rpartition(' ')
naive_dt = datetime.strptime(naive_date_str, '%Y/%m/%d %H:%M:%S')
offset = int(offset_str[-4:-2])*60 + int(offset_str[-2:])
if offset_str[0] == "-":
offset = -offset
dt = naive_dt.replace(tzinfo=FixedOffset(offset))
print(repr(dt))
# -> datetime.datetime(2009, 5, 13, 19, 19, 30, tzinfo=FixedOffset(-240))
print(dt)
# -> 2009-05-13 19:19:30-04:00
where FixedOffset
is a class based on the code example from the docs:
from datetime import timedelta, tzinfo
class FixedOffset(tzinfo):
"""Fixed offset in minutes: `time = utc_time + utc_offset`."""
def __init__(self, offset):
self.__offset = timedelta(minutes=offset)
hours, minutes = divmod(offset, 60)
#NOTE: the last part is to remind about deprecated POSIX GMT+h timezones
# that have the opposite sign in the name;
# the corresponding numeric value is not used e.g., no minutes
self.__name = '<%+03d%02d>%+d' % (hours, minutes, -hours)
def utcoffset(self, dt=None):
return self.__offset
def tzname(self, dt=None):
return self.__name
def dst(self, dt=None):
return timedelta(0)
def __repr__(self):
return 'FixedOffset(%d)' % (self.utcoffset().total_seconds() / 60)
Here is a fix of the "%z"
issue for Python 2.7 and earlier
Instead of using:
datetime.strptime(t,'%Y-%m-%dT%H:%M %z')
Use the timedelta
to account for the timezone, like this:
from datetime import datetime,timedelta
def dt_parse(t):
ret = datetime.strptime(t[0:16],'%Y-%m-%dT%H:%M')
if t[18]=='+':
ret-=timedelta(hours=int(t[19:22]),minutes=int(t[23:]))
elif t[18]=='-':
ret+=timedelta(hours=int(t[19:22]),minutes=int(t[23:]))
return ret
Note that the dates would be converted to GMT
, which would allow doing date arithmetic without worrying about time zones.
One liner for old Pythons out there. You can multiply a timedelta by 1/-1 depending on +/- sign, as in:
datetime.strptime(s[:19], '%Y-%m-%dT%H:%M:%S') + timedelta(hours=int(s[20:22]), minutes=int(s[23:])) * (-1 if s[19] == '+' else 1)