Unable to successfully patch functions of Azure ContainerClient

Question:

I have been trying to patch the list_blobs() function of ContainerClient, have not been able to do this successfully, this code outputs a MagicMock() function – but the function isn’t patched as I would expect it to be (Trying to patch with a list [‘Blob1’, ‘Blob2’].

#################Script File
import sys
from datetime import datetime, timedelta
import pyspark
import pytz
import yaml

# from azure.storage.blob import BlobServiceClient, ContainerClient
from pyspark.dbutils import DBUtils as dbutils
import azure.storage.blob

# Open Config
def main():

    spark_context = pyspark.SparkContext.getOrCreate()
    spark_context.addFile(sys.argv[1])
    stream = None
    stream = open(sys.argv[1], "r")
    config = yaml.load(stream, Loader=yaml.FullLoader)
    stream.close()

    account_key = dbutils.secrets.get(scope=config["Secrets"]["Scope"], key=config["Secrets"]["Key Name"])

    target_container = config["Storage Configuration"]["Container"]
    target_account = config["Storage Configuration"]["Account"]
    days_history_to_keep = config["Storage Configuration"]["Days History To Keep"]

    connection_string = (
        "DefaultEndpointsProtocol=https;AccountName="
        + target_account
        + ";AccountKey="
        + account_key
        + ";EndpointSuffix=core.windows.net"
    )
    blob_service_client: azure.storage.blob.BlobServiceClient = (
        azure.storage.blob.BlobServiceClient.from_connection_string(connection_string)
    )
    container_client: azure.storage.blob.ContainerClient = (
        blob_service_client.get_container_client(target_container)
    )
    blobs = container_client.list_blobs()
    print(blobs)
    print(blobs)

    utc = pytz.UTC
    delete_before_date = utc.localize(
        datetime.today() - timedelta(days=days_history_to_keep)
    )

    for blob in blobs:
        if blob.creation_time < delete_before_date:
            print("Deleting Blob: " + blob.name)
            container_client.delete_blob(blob, delete_snapshots="include")


if __name__ == "__main__":
    main()

#################Test File
import unittest
from unittest import mock
import DeleteOldBlobs


class DeleteBlobsTest(unittest.TestCase):
    def setUp(self):
        pass

    @mock.patch("DeleteOldBlobs.azure.storage.blob.ContainerClient")
    @mock.patch("DeleteOldBlobs.azure.storage.blob.BlobServiceClient")
    @mock.patch("DeleteOldBlobs.dbutils")
    @mock.patch("DeleteOldBlobs.sys")
    @mock.patch('DeleteOldBlobs.pyspark')
    def test_main(self, mock_pyspark, mock_sys, mock_dbutils, mock_blobserviceclient, mock_containerclient):

        # mock setup
        config_file = "Delete_Old_Blobs_UnitTest.yml"
        mock_sys.argv = ["unused_arg", config_file]
        mock_dbutils.secrets.get.return_value = "A Secret"
        mock_containerclient.list_blobs.return_value = ["ablob1", "ablob2"]

        # execute test
        DeleteOldBlobs.main()

        # TODO assert actions taken
        # mock_sys.argv.__get__.assert_called_with()
        # dbutils.secrets.get(scope=config['Secrets']['Scope'], key=config['Secrets']['Key Name'])


if __name__ == "__main__":

    unittest.main()

Output:

<MagicMock name='BlobServiceClient.from_connection_string().get_container_client().list_blobs()' id='1143355577232'>

What am I doing incorrectly here?

Asked By: Alan Blyth

||

Answers:

I’m not able to execute your code in this moment, but I have tried to simulate it. To do this I have created the following 3 files in the path: /<path-to>/pkg/sub_pkg1 (where pkg and sub_pkg1 are packages).

File ContainerClient.py

def list_blobs(self):
    return "blob1"

File DeleteOldBlobs.py

from pkg.sub_pkg1 import ContainerClient

# Open Config
def main():
    blobs = ContainerClient.list_blobs()
    print(blobs)
    print(blobs)

File DeleteBlobsTest.py

import unittest
from unittest import mock
from pkg.sub_pkg1 import DeleteOldBlobs

class DeleteBlobsTest(unittest.TestCase):
    def setUp(self):
        pass

    def test_main(self):
        mock_containerclient = mock.MagicMock()
        with mock.patch("DeleteOldBlobs.ContainerClient.list_blobs", mock_containerclient.list_blobs):
            mock_containerclient.list_blobs.return_value = ["ablob1", "ablob2"]
            DeleteOldBlobs.main()

if __name__ == '__main__':
    unittest.main()

If you execute the test code you obtain the output:

['ablob1', 'ablob2']
['ablob1', 'ablob2']

This output means that the function list_blobs() is mocked by mock_containerclient.list_blobs.

I don’t know if the content of this post can be useful for you, but I’m not able to simulate better your code in this moment.
I hope you can inspire to my code to find your real solution.

Answered By: frankfalse

The structure of the answer didn’t match my solution, perhaps both will work but it was important for me to patch pyspark even though i never call it, or exceptions would get thrown when my code tried to interact with spark.

Perhaps this will be useful to someone:

    @mock.patch("DeleteOldBlobs.azure.storage.blob.BlobServiceClient")
@mock.patch("DeleteOldBlobs.dbutils")
@mock.patch("DeleteOldBlobs.sys")
@mock.patch('DeleteOldBlobs.pyspark')
def test_list_blobs_called_once(self, mock_pyspark, mock_sys, mock_dbutils, mock_blobserviceclient):

    # mock setup
    config_file = "Delete_Old_Blobs_UnitTest.yml"
    mock_sys.argv = ["unused_arg", config_file]
    account_key = 'Secret Key'
    mock_dbutils.secrets.get.return_value = account_key
    bsc_mock: mock.Mock = mock.Mock()
    container_client_mock = mock.Mock()

    blob1 = Blob('newblob', datetime.today())
    blob2 = Blob('oldfile', datetime.today() - timedelta(days=20))

    container_client_mock.list_blobs.return_value = [blob1, blob2]
    bsc_mock.get_container_client.return_value = container_client_mock
    mock_blobserviceclient.from_connection_string.return_value = bsc_mock

    # execute test
    DeleteOldBlobs.main()

    #Assert Results
    container_client_mock.list_blobs.assert_called_once()
Answered By: Alan Blyth