CRC computation port from C to Python
Question:
I need to convert the following CRC computation algorithm to Python:
#include <stdio.h>
unsigned int Crc32Table[256];
unsigned int crc32jam(const unsigned char *Block, unsigned int uSize)
{
unsigned int x = -1; //initial value
unsigned int c = 0;
while (c < uSize)
{
x = ((x >> 8) ^ Crc32Table[((x ^ Block[c]) & 255)]);
c++;
}
return x;
}
void crc32tab()
{
unsigned int x, c, b;
c = 0;
while (c <= 255)
{
x = c;
b = 0;
while (b <= 7)
{
if ((x & 1) != 0)
x = ((x >> 1) ^ 0xEDB88320); //polynomial
else
x = (x >> 1);
b++;
}
Crc32Table[c] = x;
c++;
}
}
int main() {
unsigned char buff[] = "whatever buffer content";
unsigned int l = sizeof(buff) -1;
unsigned int hash;
crc32tab();
hash = crc32jam(buff, l);
printf("%dn", hash);
}
two (failed) attempts to rewrite this in python follow:
def crc32_1(buf):
crc = 0xffffffff
for b in buf:
crc ^= b
for _ in range(8):
crc = (crc >> 1) ^ 0xedb88320 if crc & 1 else crc >> 1
return crc ^ 0xffffffff
def crc32_2(block):
table = [0] * 256
for c in range(256):
x = c
b = 0
for _ in range(8):
if x & 1:
x = ((x >> 1) ^ 0xEDB88320)
else:
x >>= 1
table[c] = x
x = -1
for c in block:
x = ((x >> 8) ^ table[((x ^ c) & 255)])
return x & 0xffffffff
data = b'whatever buffer content'
print(crc32_1(data), crc32_2(data))
Using the three routines on thee exact same data yield three different results:
mcon@cinderella:~/Desktop/3xDAsav/DDDAedit$ ./test5
2022541416
mcon@cinderella:~/Desktop/3xDAsav/DDDAedit$ python3 test5.py
2272425879 2096952735
As said: C
code is "Golden Standard", how do I fix this in Python?
Note: I know I can call C
routines from Python, but I consider that as "last resort".
Answers:
Instead of porting your own CRC32 implementation, you can use one from the Python standard library. For historic reasons, the standard library includes two identical1 CRC32 implementations:
Both implementations match the behavior of your crc32_1
function:
import binascii
import zlib
>>> print(binascii.crc32(b'whatever buffer content'))
2272425879
>>> print(zlib.crc32(b'whatever buffer content'))
2272425879
To get a result matching the C implementation from the question, you just need to apply a constant offset:
>>> 0xffff_ffff - zlib.crc32(b'whatever buffer content')
2022541416
As a bonus, these CRC32 functions are implemented in efficient C code, and will be much faster than any equivalent pure-Python port.
1Note that the zlib
module is only available when CPython is compiled with zlib support (which is almost always true). In the off chance that you’re using a CPythion build without zlib, you won’t be able to use the zlib
module. Instead, you can use the binascii
implementation, which uses zlib
when available and defaults to an "in-house" implementation when its not.
Two small changes to your code produce the desired results:
def crc32_1(buf):
crc = 0xffffffff
for b in buf:
crc ^= b
for _ in range(8):
crc = (crc >> 1) ^ 0xedb88320 if crc & 1 else crc >> 1
return crc
def crc32_2(block):
table = [0] * 256
for c in range(256):
x = c
b = 0
for _ in range(8):
if x & 1:
x = ((x >> 1) ^ 0xEDB88320)
else:
x >>= 1
table[c] = x
x = 0xffffffff
for c in block:
x = ((x >> 8) ^ table[((x ^ c) & 255)])
return x & 0xffffffff
data = b'whatever buffer content'
print(crc32_1(data), crc32_2(data))
prints:
2022541416 2022541416
In the first function, the final ^ 0xfffffff
was removed. That is not there at all in the C code.
In the second function, the initialization x = -1
was replaced with x = 0xffffffff
. The x = -1;
works in C, since x
is 32 bits, at least for the compiler being used by whoever wrote that. (An int
in C is almost always 32 bits nowadays, even though the standard permits it to have as few as 16 bits. It would be more portable to use uint32_t
instead.) In Python, x = -1
has an infinite supply of one bits to shift down.
By the way, you don’t need that final & 0xffffffff
.
I need to convert the following CRC computation algorithm to Python:
#include <stdio.h>
unsigned int Crc32Table[256];
unsigned int crc32jam(const unsigned char *Block, unsigned int uSize)
{
unsigned int x = -1; //initial value
unsigned int c = 0;
while (c < uSize)
{
x = ((x >> 8) ^ Crc32Table[((x ^ Block[c]) & 255)]);
c++;
}
return x;
}
void crc32tab()
{
unsigned int x, c, b;
c = 0;
while (c <= 255)
{
x = c;
b = 0;
while (b <= 7)
{
if ((x & 1) != 0)
x = ((x >> 1) ^ 0xEDB88320); //polynomial
else
x = (x >> 1);
b++;
}
Crc32Table[c] = x;
c++;
}
}
int main() {
unsigned char buff[] = "whatever buffer content";
unsigned int l = sizeof(buff) -1;
unsigned int hash;
crc32tab();
hash = crc32jam(buff, l);
printf("%dn", hash);
}
two (failed) attempts to rewrite this in python follow:
def crc32_1(buf):
crc = 0xffffffff
for b in buf:
crc ^= b
for _ in range(8):
crc = (crc >> 1) ^ 0xedb88320 if crc & 1 else crc >> 1
return crc ^ 0xffffffff
def crc32_2(block):
table = [0] * 256
for c in range(256):
x = c
b = 0
for _ in range(8):
if x & 1:
x = ((x >> 1) ^ 0xEDB88320)
else:
x >>= 1
table[c] = x
x = -1
for c in block:
x = ((x >> 8) ^ table[((x ^ c) & 255)])
return x & 0xffffffff
data = b'whatever buffer content'
print(crc32_1(data), crc32_2(data))
Using the three routines on thee exact same data yield three different results:
mcon@cinderella:~/Desktop/3xDAsav/DDDAedit$ ./test5
2022541416
mcon@cinderella:~/Desktop/3xDAsav/DDDAedit$ python3 test5.py
2272425879 2096952735
As said: C
code is "Golden Standard", how do I fix this in Python?
Note: I know I can call C
routines from Python, but I consider that as "last resort".
Instead of porting your own CRC32 implementation, you can use one from the Python standard library. For historic reasons, the standard library includes two identical1 CRC32 implementations:
Both implementations match the behavior of your crc32_1
function:
import binascii
import zlib
>>> print(binascii.crc32(b'whatever buffer content'))
2272425879
>>> print(zlib.crc32(b'whatever buffer content'))
2272425879
To get a result matching the C implementation from the question, you just need to apply a constant offset:
>>> 0xffff_ffff - zlib.crc32(b'whatever buffer content')
2022541416
As a bonus, these CRC32 functions are implemented in efficient C code, and will be much faster than any equivalent pure-Python port.
1Note that the zlib
module is only available when CPython is compiled with zlib support (which is almost always true). In the off chance that you’re using a CPythion build without zlib, you won’t be able to use the zlib
module. Instead, you can use the binascii
implementation, which uses zlib
when available and defaults to an "in-house" implementation when its not.
Two small changes to your code produce the desired results:
def crc32_1(buf):
crc = 0xffffffff
for b in buf:
crc ^= b
for _ in range(8):
crc = (crc >> 1) ^ 0xedb88320 if crc & 1 else crc >> 1
return crc
def crc32_2(block):
table = [0] * 256
for c in range(256):
x = c
b = 0
for _ in range(8):
if x & 1:
x = ((x >> 1) ^ 0xEDB88320)
else:
x >>= 1
table[c] = x
x = 0xffffffff
for c in block:
x = ((x >> 8) ^ table[((x ^ c) & 255)])
return x & 0xffffffff
data = b'whatever buffer content'
print(crc32_1(data), crc32_2(data))
prints:
2022541416 2022541416
In the first function, the final ^ 0xfffffff
was removed. That is not there at all in the C code.
In the second function, the initialization x = -1
was replaced with x = 0xffffffff
. The x = -1;
works in C, since x
is 32 bits, at least for the compiler being used by whoever wrote that. (An int
in C is almost always 32 bits nowadays, even though the standard permits it to have as few as 16 bits. It would be more portable to use uint32_t
instead.) In Python, x = -1
has an infinite supply of one bits to shift down.
By the way, you don’t need that final & 0xffffffff
.