How do I read an image from a path with Unicode characters?
Question:
I have the following code and it fails, because it cannot read the file from disk. The image is always None
.
# -*- coding: utf-8 -*-
import cv2
import numpy
bgrImage = cv2.imread(u'D:\ö\handschuh.jpg')
Note: my file is already saved as UTF-8 with BOM. I verified with Notepad++.
In Process Monitor, I see that Python is acccessing the file from a wrong path:
I have read about:
- Open file with unicode filename, which is about the
open()
function and not related to OpenCV.
- How do I read an image file using Python, but that’s unrelated to Unicode issues.
Answers:
It can be done by
- opening the file using
open()
, which supports Unicode as in the linked answer,
- read the contents as a byte array,
- convert the byte array to a NumPy array,
- decode the image
# -*- coding: utf-8 -*-
import cv2
import numpy
stream = open(u'D:\ö\handschuh.jpg', "rb")
bytes = bytearray(stream.read())
numpyarray = numpy.asarray(bytes, dtype=numpy.uint8)
bgrImage = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED)
bgrImage = cv2.imread(filename.encode('utf-8'))
encode file full path to utf-8
Inspired by Thomas Weller’s answer, you can also use np.fromfile()
to read the image and convert it to ndarray and then use cv2.imdecode()
to decode the array into a three-dimensional numpy ndarray (suppose this is a color image without alpha channel):
import numpy as np
# img is in BGR format if the underlying image is a color image
img = cv2.imdecode(np.fromfile('测试目录/test.jpg', dtype=np.uint8), cv2.IMREAD_UNCHANGED)
np.fromfile()
will convert the image on disk to numpy 1-dimensional ndarray representation. cv2.imdecode
can decode this format and convert to the normal 3-dimensional image representation. cv2.IMREAD_UNCHANGED
is a flag for decoding. Complete list of flags can be found here.
PS. For how to write image to a path with unicode characters, see here.
My problem is similar to you, however, my program will terminate at the
image = cv2.imread(filename)
statement.
I solved this problem by first encode the file name into utf-8 and then decode it as
image = cv2.imread(filename.encode('utf-8', 'surrogateescape').decode('utf-8', 'surrogateescape'))
I copied them to a temporary directory. It works fine for me.
import os
import shutil
import tempfile
import cv2
def cv_read(path, *args):
"""
Read from a path with Unicode characters.
:param path: path of a single image or a directory which contains images
:param args: other args passed to cv2.imread
:return: a single image or a list of images
"""
with tempfile.TemporaryDirectory() as tmp_dir:
if os.path.isdir(path):
shutil.copytree(path, tmp_dir, dirs_exist_ok=True)
elif os.path.isfile(path):
shutil.copy(path, tmp_dir)
else:
raise FileNotFoundError
img_arr = [
cv2.imread(os.path.join(tmp_dir, img), *args)
for img in os.listdir(tmp_dir)
]
return img_arr if os.path.isdir(path) else img_arr[0]
It can be done by
- Save current directory
- change current directory to the one the image must be saved
- save the image
- change current directory to the one saved in step 1
import os
from pathlib import Path
import cv2
im_path = Path(u'D:\ö\handschuh.jpg')
# Save current directory
curr_dir = os.getcwd()
# change current directory to the one the image must be saved
os.chdir(im_path.parent)
# read the image
bgrImage = cv2.imread(im_path.name)
# change current directory to the one saved in step 1
os.chdir(curr_dir)
I have the following code and it fails, because it cannot read the file from disk. The image is always None
.
# -*- coding: utf-8 -*-
import cv2
import numpy
bgrImage = cv2.imread(u'D:\ö\handschuh.jpg')
Note: my file is already saved as UTF-8 with BOM. I verified with Notepad++.
In Process Monitor, I see that Python is acccessing the file from a wrong path:
I have read about:
- Open file with unicode filename, which is about the
open()
function and not related to OpenCV. - How do I read an image file using Python, but that’s unrelated to Unicode issues.
It can be done by
- opening the file using
open()
, which supports Unicode as in the linked answer, - read the contents as a byte array,
- convert the byte array to a NumPy array,
- decode the image
# -*- coding: utf-8 -*-
import cv2
import numpy
stream = open(u'D:\ö\handschuh.jpg', "rb")
bytes = bytearray(stream.read())
numpyarray = numpy.asarray(bytes, dtype=numpy.uint8)
bgrImage = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED)
bgrImage = cv2.imread(filename.encode('utf-8'))
encode file full path to utf-8
Inspired by Thomas Weller’s answer, you can also use np.fromfile()
to read the image and convert it to ndarray and then use cv2.imdecode()
to decode the array into a three-dimensional numpy ndarray (suppose this is a color image without alpha channel):
import numpy as np
# img is in BGR format if the underlying image is a color image
img = cv2.imdecode(np.fromfile('测试目录/test.jpg', dtype=np.uint8), cv2.IMREAD_UNCHANGED)
np.fromfile()
will convert the image on disk to numpy 1-dimensional ndarray representation. cv2.imdecode
can decode this format and convert to the normal 3-dimensional image representation. cv2.IMREAD_UNCHANGED
is a flag for decoding. Complete list of flags can be found here.
PS. For how to write image to a path with unicode characters, see here.
My problem is similar to you, however, my program will terminate at the
image = cv2.imread(filename)
statement.
I solved this problem by first encode the file name into utf-8 and then decode it as
image = cv2.imread(filename.encode('utf-8', 'surrogateescape').decode('utf-8', 'surrogateescape'))
I copied them to a temporary directory. It works fine for me.
import os
import shutil
import tempfile
import cv2
def cv_read(path, *args):
"""
Read from a path with Unicode characters.
:param path: path of a single image or a directory which contains images
:param args: other args passed to cv2.imread
:return: a single image or a list of images
"""
with tempfile.TemporaryDirectory() as tmp_dir:
if os.path.isdir(path):
shutil.copytree(path, tmp_dir, dirs_exist_ok=True)
elif os.path.isfile(path):
shutil.copy(path, tmp_dir)
else:
raise FileNotFoundError
img_arr = [
cv2.imread(os.path.join(tmp_dir, img), *args)
for img in os.listdir(tmp_dir)
]
return img_arr if os.path.isdir(path) else img_arr[0]
It can be done by
- Save current directory
- change current directory to the one the image must be saved
- save the image
- change current directory to the one saved in step 1
import os
from pathlib import Path
import cv2
im_path = Path(u'D:\ö\handschuh.jpg')
# Save current directory
curr_dir = os.getcwd()
# change current directory to the one the image must be saved
os.chdir(im_path.parent)
# read the image
bgrImage = cv2.imread(im_path.name)
# change current directory to the one saved in step 1
os.chdir(curr_dir)