Python gzip: is there a way to decompress from a string?
Question:
I’ve read this SO post around the problem to no avail.
I am trying to decompress a .gz file coming from an URL.
url_file_handle=StringIO( gz_data )
gzip_file_handle=gzip.open(url_file_handle,"r")
decompressed_data = gzip_file_handle.read()
gzip_file_handle.close()
… but I get TypeError: coercing to Unicode: need string or buffer, cStringIO.StringI found
What’s going on?
Traceback (most recent call last):
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2974, in _HandleRequest
base_env_dict=env_dict)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 411, in Dispatch
base_env_dict=base_env_dict)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2243, in Dispatch
self._module_dict)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2161, in ExecuteCGI
reset_modules = exec_script(handler_path, cgi_path, hook)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2057, in ExecuteOrImportScript
exec module_code in script_module.__dict__
File "/home/jldupont/workspace/jldupont/trunk/site/app/server/tasks/debian/repo_fetcher.py", line 36, in <module>
main()
File "/home/jldupont/workspace/jldupont/trunk/site/app/server/tasks/debian/repo_fetcher.py", line 30, in main
gziph=gzip.open(fh,'r')
File "/usr/lib/python2.5/gzip.py", line 49, in open
return GzipFile(filename, mode, compresslevel)
File "/usr/lib/python2.5/gzip.py", line 95, in __init__
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
TypeError: coercing to Unicode: need string or buffer, cStringIO.StringI found
Answers:
gzip.open
is a shorthand for opening a file, what you want is gzip.GzipFile
which you can pass a fileobj
open(filename, mode='rb', compresslevel=9)
#Shorthand for GzipFile(filename, mode, compresslevel).
vs
class GzipFile
__init__(self, filename=None, mode=None, compresslevel=9, fileobj=None)
# At least one of fileobj and filename must be given a non-trivial value.
so this should work for you
gzip_file_handle = gzip.GzipFile(fileobj=url_file_handle)
If your data is already in a string, try zlib, which claims to be fully gzip compatible:
import zlib
decompressed_data = zlib.decompress(gz_data, 16+zlib.MAX_WBITS)
Read more: http://docs.python.org/library/zlib.html
Consider using gzip.GzipFile
if you don’t like passing obscure arguments to zlib.decompress
.
When you deal with urllib2.urlopen
response that can be either gzip-compressed or uncompressed:
import gzip
from StringIO import StringIO
# response = urllib2.urlopen(...
content_raw = response.read()
if 'gzip' in response.info().getheader('Content-Encoding'):
content = gzip.GzipFile(fileobj=StringIO(content_raw)).read()
When you deal with a file that can store either gzip-compressed or uncompressed data:
import gzip
# some_file = open(...
try:
content = gzip.GzipFile(fileobj=some_file).read()
except IOError:
some_file.seek(0)
content = some_file.read()
The examples above are in Python 2.7
You can use gzip.decompress
from the gzip builtin Python library(available for Python 3.2+).
Example on how to decompress bytes:
import gzip
gzip.decompress(gzip_data)
Documentation
https://docs.python.org/3.5/library/gzip.html#gzip.decompress
I’ve read this SO post around the problem to no avail.
I am trying to decompress a .gz file coming from an URL.
url_file_handle=StringIO( gz_data )
gzip_file_handle=gzip.open(url_file_handle,"r")
decompressed_data = gzip_file_handle.read()
gzip_file_handle.close()
… but I get TypeError: coercing to Unicode: need string or buffer, cStringIO.StringI found
What’s going on?
Traceback (most recent call last):
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2974, in _HandleRequest
base_env_dict=env_dict)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 411, in Dispatch
base_env_dict=base_env_dict)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2243, in Dispatch
self._module_dict)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2161, in ExecuteCGI
reset_modules = exec_script(handler_path, cgi_path, hook)
File "/opt/google/google_appengine-1.2.5/google/appengine/tools/dev_appserver.py", line 2057, in ExecuteOrImportScript
exec module_code in script_module.__dict__
File "/home/jldupont/workspace/jldupont/trunk/site/app/server/tasks/debian/repo_fetcher.py", line 36, in <module>
main()
File "/home/jldupont/workspace/jldupont/trunk/site/app/server/tasks/debian/repo_fetcher.py", line 30, in main
gziph=gzip.open(fh,'r')
File "/usr/lib/python2.5/gzip.py", line 49, in open
return GzipFile(filename, mode, compresslevel)
File "/usr/lib/python2.5/gzip.py", line 95, in __init__
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
TypeError: coercing to Unicode: need string or buffer, cStringIO.StringI found
gzip.open
is a shorthand for opening a file, what you want is gzip.GzipFile
which you can pass a fileobj
open(filename, mode='rb', compresslevel=9)
#Shorthand for GzipFile(filename, mode, compresslevel).
vs
class GzipFile
__init__(self, filename=None, mode=None, compresslevel=9, fileobj=None)
# At least one of fileobj and filename must be given a non-trivial value.
so this should work for you
gzip_file_handle = gzip.GzipFile(fileobj=url_file_handle)
If your data is already in a string, try zlib, which claims to be fully gzip compatible:
import zlib
decompressed_data = zlib.decompress(gz_data, 16+zlib.MAX_WBITS)
Read more: http://docs.python.org/library/zlib.html
Consider using gzip.GzipFile
if you don’t like passing obscure arguments to zlib.decompress
.
When you deal with urllib2.urlopen
response that can be either gzip-compressed or uncompressed:
import gzip
from StringIO import StringIO
# response = urllib2.urlopen(...
content_raw = response.read()
if 'gzip' in response.info().getheader('Content-Encoding'):
content = gzip.GzipFile(fileobj=StringIO(content_raw)).read()
When you deal with a file that can store either gzip-compressed or uncompressed data:
import gzip
# some_file = open(...
try:
content = gzip.GzipFile(fileobj=some_file).read()
except IOError:
some_file.seek(0)
content = some_file.read()
The examples above are in Python 2.7
You can use gzip.decompress
from the gzip builtin Python library(available for Python 3.2+).
Example on how to decompress bytes:
import gzip
gzip.decompress(gzip_data)
Documentation
https://docs.python.org/3.5/library/gzip.html#gzip.decompress