How to make binary mask of large .tif file fast using .shp file with polygons (GPS coordinates)?

Question:

I have a large .tif file (~24 Gb, more, than RAM could store)
(Similar question: https://rasterio.readthedocs.io/en/latest/topics/masking-by-shapefile.html)
(Method from library: https://rasterio.readthedocs.io/en/latest/topics/masking-by-shapefile.html)

But I’m searching another faster solution. Maybe I should create numpy array with zeros and add polygon areas into that array (it shouldn’t be better, than with rasterio)

Here is my code to process polygons with longitude and latitude.

import rasterio
import geopandas as gpd
from tqdm import tqdm
import os

def generate_mask(raster_path, shape_path, output_path):
    gdf = gpd.read_file(shape_path)
    with rasterio.open(raster_path) as src:
        for num in tqdm(sorted(gdf['class'].unique())):
            if os.path.isfile(f'{path}/masks/{num}.tif'):
                continue
            shapes = gdf.loc[(gdf['class'] == num)]['geometry']
            file_name = f'{num}.tif'

            for shp in shapes:
                bound = rasterio.features.bounds(shp)
                bbox = rasterio.windows.from_bounds(*bound, src.transform)
                window_transform = rasterio.windows.transform(window=bbox, transform=src.transform)

                img_crop = src.read([1,2,3], window=bbox)
                print(f'Image read')
                mask = rasterio.features.geometry_mask([shp],out_shape=(img_crop.shape[1], img_crop.shape[2]), 
                                       transform=window_transform, invert=True)

                out_image, out_transform = rasterio.mask.mask(src, shp, crop=True, filled=True)
                out_meta = src.meta
                print(f'Mask {fn} is done')

                with rasterio.open(f'{output_path}{file_name}', "w") as dest:
                    dest.write(out_image)
                print(f'Mask is written')`

The problem here, that it could process one polygon at a time, moreover if I process all shapes at a time it time more than 3 hours to make mask. (It’s too long, server usually kill process.)

Asked By: Nikita Artemenko

||

Answers:

I solved problem using osgeo.gdal.Rasterize()
(see documentation: https://gdal.org/api/python/osgeo.gdal.html)
It handles large files way better than analogs (~3-5 min per one mask)

Here is my code:

from osgeo import gdal
import os

def gdal_generate_mask(raster_path, shape_path, output_path):
# open the raster layer and get its relevant properties
raster_ds = gdal.Open(raster_path, gdal.GA_ReadOnly)
xSize = raster_ds.RasterXSize
ySize = raster_ds.RasterYSize
geotransform = raster_ds.GetGeoTransform()
projection = raster_ds.GetProjection()

# create the target layer (1 band binary)
gdf = gpd.read_file(shape_path)
for num in tqdm(sorted(gdf['class'].unique())):
    driver = gdal.GetDriverByName('GTiff')
    target_ds = driver.Create(f'{output_path}{num}.tif', xSize, ySize, bands=1, eType = gdal.GDT_Byte, options = ["COMPRESS=DEFLATE"])
    target_ds.SetGeoTransform(geotransform)
    target_ds.SetProjection(projection)

    # create temp file for vector layer gdal Rasterize
    vector_layer = f'/home/username/GeoProj/tmp_shp/{num}.shp'
    if not os.path.isfile(vector_layer):
        gdf[gdf['class'] == num].to_file(vector_layer)

    # rasterize the vector layer into the target one
    ds = gdal.Rasterize(target_ds, vector_layer, burnValues=[1])

P.S. I should seek an answer here: https://gis.stackexchange.com/

Answered By: Nikita Artemenko
Categories: questions Tags: , , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.