How to decrypt OpenSSL AES-encrypted files in Python?

Question:

OpenSSL provides a popular (but insecure – see below!) command line interface for AES encryption:

openssl aes-256-cbc -salt -in filename -out filename.enc

Python has support for AES in the shape of the PyCrypto package, but it only provides the tools. How to use Python/PyCrypto to decrypt files that have been encrypted using OpenSSL?

Notice

This question used to also concern encryption in Python using the same scheme. I have since removed that part to discourage anyone from using it. Do NOT encrypt any more data in this way, because it is NOT secure by today’s standards. You should ONLY use decryption, for no other reasons than BACKWARD COMPATIBILITY, i.e. when you have no other choice. Want to encrypt? Use NaCl/libsodium if you possibly can.

Asked By: Thijs van Dien

||

Answers:

Given the popularity of Python, at first I was disappointed that there was no complete answer to this question to be found. It took me a fair amount of reading different answers on this board, as well as other resources, to get it right. I thought I might share the result for future reference and perhaps review; I’m by no means a cryptography expert! However, the code below appears to work seamlessly:

from hashlib import md5
from Crypto.Cipher import AES
from Crypto import Random

def derive_key_and_iv(password, salt, key_length, iv_length):
    d = d_i = ''
    while len(d) < key_length + iv_length:
        d_i = md5(d_i + password + salt).digest()
        d += d_i
    return d[:key_length], d[key_length:key_length+iv_length]

def decrypt(in_file, out_file, password, key_length=32):
    bs = AES.block_size
    salt = in_file.read(bs)[len('Salted__'):]
    key, iv = derive_key_and_iv(password, salt, key_length, bs)
    cipher = AES.new(key, AES.MODE_CBC, iv)
    next_chunk = ''
    finished = False
    while not finished:
        chunk, next_chunk = next_chunk, cipher.decrypt(in_file.read(1024 * bs))
        if len(next_chunk) == 0:
            padding_length = ord(chunk[-1])
            chunk = chunk[:-padding_length]
            finished = True
        out_file.write(chunk)

Usage:

with open(in_filename, 'rb') as in_file, open(out_filename, 'wb') as out_file:
    decrypt(in_file, out_file, password)

If you see a chance to improve on this or extend it to be more flexible (e.g. make it work without salt, or provide Python 3 compatibility), please feel free to do so.

Notice

This answer used to also concern encryption in Python using the same scheme. I have since removed that part to discourage anyone from using it. Do NOT encrypt any more data in this way, because it is NOT secure by today’s standards. You should ONLY use decryption, for no other reasons than BACKWARD COMPATIBILITY, i.e. when you have no other choice. Want to encrypt? Use NaCl/libsodium if you possibly can.

Answered By: Thijs van Dien

I am re-posting your code with a couple of corrections (I didn’t want to obscure your version). While your code works, it does not detect some errors around padding. In particular, if the decryption key provided is incorrect, your padding logic may do something odd. If you agree with my change, you may update your solution.

from hashlib import md5
from Crypto.Cipher import AES
from Crypto import Random

def derive_key_and_iv(password, salt, key_length, iv_length):
    d = d_i = ''
    while len(d) < key_length + iv_length:
        d_i = md5(d_i + password + salt).digest()
        d += d_i
    return d[:key_length], d[key_length:key_length+iv_length]

# This encryption mode is no longer secure by today's standards.
# See note in original question above.
def obsolete_encrypt(in_file, out_file, password, key_length=32):
    bs = AES.block_size
    salt = Random.new().read(bs - len('Salted__'))
    key, iv = derive_key_and_iv(password, salt, key_length, bs)
    cipher = AES.new(key, AES.MODE_CBC, iv)
    out_file.write('Salted__' + salt)
    finished = False
    while not finished:
        chunk = in_file.read(1024 * bs)
        if len(chunk) == 0 or len(chunk) % bs != 0:
            padding_length = bs - (len(chunk) % bs)
            chunk += padding_length * chr(padding_length)
            finished = True
        out_file.write(cipher.encrypt(chunk))

def decrypt(in_file, out_file, password, key_length=32):
    bs = AES.block_size
    salt = in_file.read(bs)[len('Salted__'):]
    key, iv = derive_key_and_iv(password, salt, key_length, bs)
    cipher = AES.new(key, AES.MODE_CBC, iv)
    next_chunk = ''
    finished = False
    while not finished:
        chunk, next_chunk = next_chunk, cipher.decrypt(in_file.read(1024 * bs))
        if len(next_chunk) == 0:
            padding_length = ord(chunk[-1])
            if padding_length < 1 or padding_length > bs:
               raise ValueError("bad decrypt pad (%d)" % padding_length)
            # all the pad-bytes must be the same
            if chunk[-padding_length:] != (padding_length * chr(padding_length)):
               # this is similar to the bad decrypt:evp_enc.c from openssl program
               raise ValueError("bad decrypt")
            chunk = chunk[:-padding_length]
            finished = True
        out_file.write(chunk)
Answered By: Gregor

The code below should be Python 3 compatible with the small changes documented in the code. Also wanted to use os.urandom instead of Crypto.Random. ‘Salted__’ is replaced with salt_header that can be tailored or left empty if needed.

from os import urandom
from hashlib import md5

from Crypto.Cipher import AES

def derive_key_and_iv(password, salt, key_length, iv_length):
    d = d_i = b''  # changed '' to b''
    while len(d) < key_length + iv_length:
        # changed password to str.encode(password)
        d_i = md5(d_i + str.encode(password) + salt).digest()
        d += d_i
    return d[:key_length], d[key_length:key_length+iv_length]

def encrypt(in_file, out_file, password, salt_header='', key_length=32):
    # added salt_header=''
    bs = AES.block_size
    # replaced Crypt.Random with os.urandom
    salt = urandom(bs - len(salt_header))
    key, iv = derive_key_and_iv(password, salt, key_length, bs)
    cipher = AES.new(key, AES.MODE_CBC, iv)
    # changed 'Salted__' to str.encode(salt_header)
    out_file.write(str.encode(salt_header) + salt)
    finished = False
    while not finished:
        chunk = in_file.read(1024 * bs) 
        if len(chunk) == 0 or len(chunk) % bs != 0:
            padding_length = (bs - len(chunk) % bs) or bs
            # changed right side to str.encode(...)
            chunk += str.encode(
                padding_length * chr(padding_length))
            finished = True
        out_file.write(cipher.encrypt(chunk))

def decrypt(in_file, out_file, password, salt_header='', key_length=32):
    # added salt_header=''
    bs = AES.block_size
    # changed 'Salted__' to salt_header
    salt = in_file.read(bs)[len(salt_header):]
    key, iv = derive_key_and_iv(password, salt, key_length, bs)
    cipher = AES.new(key, AES.MODE_CBC, iv)
    next_chunk = ''
    finished = False
    while not finished:
        chunk, next_chunk = next_chunk, cipher.decrypt(
            in_file.read(1024 * bs))
        if len(next_chunk) == 0:
            padding_length = chunk[-1]  # removed ord(...) as unnecessary
            chunk = chunk[:-padding_length]
            finished = True 
        out_file.write(bytes(x for x in chunk))  # changed chunk to bytes(...)
Answered By: Johnny Booy

I know this is a bit late but here is a solution that I blogged in 2013 about how to use the python pycrypto package to encrypt/decrypt in an openssl compatible way. It has been tested on python2.7 and python3.x. The source code and a test script can be found here.

One of the key differences between this solution and the excellent solutions presented above is that it differentiates between pipe and file I/O which can cause problems in some applications.

The key functions from that blog are shown below.

# ================================================================
# get_key_and_iv
# ================================================================
def get_key_and_iv(password, salt, klen=32, ilen=16, msgdgst='md5'):
    '''
    Derive the key and the IV from the given password and salt.

    This is a niftier implementation than my direct transliteration of
    the C++ code although I modified to support different digests.

    CITATION: http://stackoverflow.com/questions/13907841/implement-openssl-aes-encryption-in-python

    @param password  The password to use as the seed.
    @param salt      The salt.
    @param klen      The key length.
    @param ilen      The initialization vector length.
    @param msgdgst   The message digest algorithm to use.
    '''
    # equivalent to:
    #   from hashlib import <mdi> as mdf
    #   from hashlib import md5 as mdf
    #   from hashlib import sha512 as mdf
    mdf = getattr(__import__('hashlib', fromlist=[msgdgst]), msgdgst)
    password = password.encode('ascii', 'ignore')  # convert to ASCII

    try:
        maxlen = klen + ilen
        keyiv = mdf(password + salt).digest()
        tmp = [keyiv]
        while len(tmp) < maxlen:
            tmp.append( mdf(tmp[-1] + password + salt).digest() )
            keyiv += tmp[-1]  # append the last byte
        key = keyiv[:klen]
        iv = keyiv[klen:klen+ilen]
        return key, iv
    except UnicodeDecodeError:
        return None, None


# ================================================================
# encrypt
# ================================================================
def encrypt(password, plaintext, chunkit=True, msgdgst='md5'):
    '''
    Encrypt the plaintext using the password using an openssl
    compatible encryption algorithm. It is the same as creating a file
    with plaintext contents and running openssl like this:

    $ cat plaintext
    <plaintext>
    $ openssl enc -e -aes-256-cbc -base64 -salt \
        -pass pass:<password> -n plaintext

    @param password  The password.
    @param plaintext The plaintext to encrypt.
    @param chunkit   Flag that tells encrypt to split the ciphertext
                     into 64 character (MIME encoded) lines.
                     This does not affect the decrypt operation.
    @param msgdgst   The message digest algorithm.
    '''
    salt = os.urandom(8)
    key, iv = get_key_and_iv(password, salt, msgdgst=msgdgst)
    if key is None:
        return None

    # PKCS#7 padding
    padding_len = 16 - (len(plaintext) % 16)
    if isinstance(plaintext, str):
        padded_plaintext = plaintext + (chr(padding_len) * padding_len)
    else: # assume bytes
        padded_plaintext = plaintext + (bytearray([padding_len] * padding_len))

    # Encrypt
    cipher = AES.new(key, AES.MODE_CBC, iv)
    ciphertext = cipher.encrypt(padded_plaintext)

    # Make openssl compatible.
    # I first discovered this when I wrote the C++ Cipher class.
    # CITATION: http://projects.joelinoff.com/cipher-1.1/doxydocs/html/
    openssl_ciphertext = b'Salted__' + salt + ciphertext
    b64 = base64.b64encode(openssl_ciphertext)
    if not chunkit:
        return b64

    LINELEN = 64
    chunk = lambda s: b'n'.join(s[i:min(i+LINELEN, len(s))]
                                for i in range(0, len(s), LINELEN))
    return chunk(b64)


# ================================================================
# decrypt
# ================================================================
def decrypt(password, ciphertext, msgdgst='md5'):
    '''
    Decrypt the ciphertext using the password using an openssl
    compatible decryption algorithm. It is the same as creating a file
    with ciphertext contents and running openssl like this:

    $ cat ciphertext
    # ENCRYPTED
    <ciphertext>
    $ egrep -v '^#|^$' | \
        openssl enc -d -aes-256-cbc -base64 -salt -pass pass:<password> -in ciphertext
    @param password   The password.
    @param ciphertext The ciphertext to decrypt.
    @param msgdgst    The message digest algorithm.
    @returns the decrypted data.
    '''

    # unfilter -- ignore blank lines and comments
    if isinstance(ciphertext, str):
        filtered = ''
        nl = 'n'
        re1 = r'^s*$'
        re2 = r'^s*#'
    else:
        filtered = b''
        nl = b'n'
        re1 = b'^\s*$'
        re2 = b'^\s*#'

    for line in ciphertext.split(nl):
        line = line.strip()
        if re.search(re1,line) or re.search(re2, line):
            continue
        filtered += line + nl

    # Base64 decode
    raw = base64.b64decode(filtered)
    assert(raw[:8] == b'Salted__' )
    salt = raw[8:16]  # get the salt

    # Now create the key and iv.
    key, iv = get_key_and_iv(password, salt, msgdgst=msgdgst)
    if key is None:
        return None

    # The original ciphertext
    ciphertext = raw[16:]

    # Decrypt
    cipher = AES.new(key, AES.MODE_CBC, iv)
    padded_plaintext = cipher.decrypt(ciphertext)

    if isinstance(padded_plaintext, str):
        padding_len = ord(padded_plaintext[-1])
    else:
        padding_len = padded_plaintext[-1]
    plaintext = padded_plaintext[:-padding_len]
    return plaintext
Answered By: Joe Linoff

Note: this method is not OpenSSL compatible

But it is suitable if all you want to do is encrypt and decrypt files.

A self-answer I copied from here. I think this is, perhaps, a simpler and more secure option. Although I would be interested in some expert opinion on how secure it is.

I used Python 3.6 and SimpleCrypt to encrypt the file and then uploaded it.

I think this is the code I used to encrypt the file:

from simplecrypt import encrypt, decrypt
f = open('file.csv','r').read()
ciphertext = encrypt('USERPASSWORD',f.encode('utf8')) # I am not certain of whether I used the .encode('utf8')
e = open('file.enc','wb') # file.enc doesn't need to exist, python will create it
e.write(ciphertext)
e.close

This is the code I use to decrypt at runtime, I run getpass("password: ") as an argument so I don’t have to store a password variable in memory

from simplecrypt import encrypt, decrypt
from getpass import getpass

# opens the file
f = open('file.enc','rb').read()

print('Please enter the password and press the enter key n Decryption may take some time')

# Decrypts the data, requires a user-input password
plaintext = decrypt(getpass("password: "), f).decode('utf8')
print('Data have been Decrypted')

Note, the UTF-8 encoding behaviour is different in python 2.7 so the code will be slightly different.

Answered By: Harvs

This answer is based on openssl v1.1.1, which supports a stronger key derivation process for AES encryption, than that of previous versions of openssl.

This answer is based on the following command:

echo -n 'Hello World!' | openssl aes-256-cbc -e -a -salt -pbkdf2 -iter 10000 

This command encrypts the plaintext ‘Hello World!’ using aes-256-cbc. The key is derived using pbkdf2 from the password and a random salt, with 10,000 iterations of sha256 hashing. When prompted for the password, I entered the password, ‘p4$$w0rd’. The ciphertext output produced by the command was:

U2FsdGVkX1/Kf8Yo6JjBh+qELWhirAXr78+bbPQjlxE=

The process for decrypting of the ciphertext above produced by openssl is as follows:

  1. base64-decode the output from openssl, and utf-8 decode the
    password, so that we have the underlying bytes for both of these.
  2. The salt is bytes 8-15 of the base64-decoded openssl output.
  3. Derive a 48-byte key using pbkdf2 given the password bytes and salt with
    10,000 iterations of sha256 hashing.
  4. The key is bytes 0-31 of the derived key, the iv is bytes 32-47 of the derived key.
  5. The ciphertext is bytes 16 through the end of the base64-decoded openssl
    output.
  6. Decrypt the ciphertext using aes-256-cbc, given the key, iv, and
    ciphertext.
  7. Remove PKCS#7 padding from plaintext. The last byte of
    plaintext indicates the number of padding bytes appended to the end
    of the plaintext. This is the number of bytes to be removed.

Below is a python3 implementation of the above process:

import binascii
import base64
import hashlib
from Crypto.Cipher import AES       #requires pycrypto

#inputs
openssloutputb64='U2FsdGVkX1/Kf8Yo6JjBh+qELWhirAXr78+bbPQjlxE='
password='p4$$w0rd'
pbkdf2iterations=10000

#convert inputs to bytes
openssloutputbytes=base64.b64decode(openssloutputb64)
passwordbytes=password.encode('utf-8')

#salt is bytes 8 through 15 of openssloutputbytes
salt=openssloutputbytes[8:16]

#derive a 48-byte key using pbkdf2 given the password and salt with 10,000 iterations of sha256 hashing
derivedkey=hashlib.pbkdf2_hmac('sha256', passwordbytes, salt, pbkdf2iterations, 48)

#key is bytes 0-31 of derivedkey, iv is bytes 32-47 of derivedkey 
key=derivedkey[0:32]
iv=derivedkey[32:48]

#ciphertext is bytes 16-end of openssloutputbytes
ciphertext=openssloutputbytes[16:]

#decrypt ciphertext using aes-cbc, given key, iv, and ciphertext
decryptor=AES.new(key, AES.MODE_CBC, iv)
plaintext=decryptor.decrypt(ciphertext)

#remove PKCS#7 padding. 
#Last byte of plaintext indicates the number of padding bytes appended to end of plaintext.  This is the number of bytes to be removed.
plaintext = plaintext[:-plaintext[-1]]

#output results
print('openssloutputb64:', openssloutputb64)
print('password:', password)
print('salt:', salt.hex())
print('key: ', key.hex())
print('iv: ', iv.hex())
print('ciphertext: ', ciphertext.hex())
print('plaintext: ', plaintext.decode('utf-8'))

As expected, the above python3 script produces the following:

openssloutputb64: U2FsdGVkX1/Kf8Yo6JjBh+qELWhirAXr78+bbPQjlxE=
password: p4$$w0rd
salt: ca7fc628e898c187
key:  444ab886d5721fc87e58f86f3e7734659007bea7fbe790541d9e73c481d9d983
iv:  7f4597a18096715d7f9830f0125be8fd
ciphertext:  ea842d6862ac05ebefcf9b6cf4239711
plaintext:  Hello World!

Note: An equivalent/compatible implementation in javascript (using the web crypto api) can be found at https://github.com/meixler/web-browser-based-file-encryption-decryption.

Answered By: mti2935

Tried everything above and some more from other threads,
this is what has worked for me, equivalent of this in openssl:

Not the best encrpython but those were requirements

Decryption: openssl enc -d -aes256 -md md5 -in {->path_in} -out {->path_out} -pass pass:{->pass}

Encryption: openssl enc -e -aes256 -md md5 -in {->path_in} -out {->path_out} -pass pass:{->pass}

Python:

from os import urandom
from hashlib import md5
from Crypto.Cipher import AES
import typer

def filecrypto(in_file, out_file, password, decrypt: bool = True):
    salt_header = 'Salted__'

    def derive_key_and_iv(password, salt, key_length, iv_length):
        d = d_i = b''  # changed '' to b''
        while len(d) < key_length + iv_length:
            # changed password to str.encode(password)
            d_i = md5(d_i + str.encode(password) + salt).digest()
            d += d_i

        return d[:key_length], d[key_length:key_length+iv_length]

    def encrypt_f(in_file, out_file, password, salt_header=salt_header, key_length=32):
        bs = AES.block_size
        salt = urandom(bs - len(salt_header))
        key, iv = derive_key_and_iv(password, salt, key_length, bs)
        cipher = AES.new(key, AES.MODE_CBC, iv)
        with open(out_file, 'wb') as f_out:
            # write the first line or the salted header
            f_out.write(str.encode(salt_header) + salt)
            with open(in_file, 'rb') as f_in:
                f_out.write(cipher.encrypt(f_in.read()))

    def decrypt_f(in_file, out_file, password, salt_header=salt_header, key_length=32):
        bs = AES.block_size
        with open(in_file, 'rb') as f_in:
            # retrieve the salted header
            salt = f_in.read(bs)[len(salt_header):]
            key, iv = derive_key_and_iv(password, salt, key_length, bs)
            cipher = AES.new(key, AES.MODE_CBC, iv)
            with open(out_file, 'wb') as f_out:
                f_out.write(cipher.decrypt(f_in.read()))

    return decrypt_f(in_file, out_file, password) if decrypt else encrypt_f(in_file, out_file, password)

if __name__ == "__filecrypto__":
    typer.run(filecrypto)
Answered By: Anton Smirnov Mies