How to show diff of two string sequences in colors?
Question:
I’m trying to find a Python way to diff strings. I know about difflib
but I haven’t been able to find an inline mode that does something similar to what this JS library does (insertions in green, deletions in red):
one_string = "beep boop"
other_string = "beep boob blah"
Is there a way to achieve this?
Answers:
You can use ndiff.
Example…
import difflib
cases=[('afrykanerskojęzyczny', 'afrykanerskojęzycznym'),
('afrykanerskojęzyczni', 'nieafrykanerskojęzyczni'),
('afrykanerskojęzycznym', 'afrykanerskojęzyczny'),
('nieafrykanerskojęzyczni', 'afrykanerskojęzyczni'),
('nieafrynerskojęzyczni', 'afrykanerskojzyczni'),
('abcdefg','xac')]
for a,b in cases:
print('{} => {}'.format(a,b))
for i,s in enumerate(difflib.ndiff(a, b)):
if s[0]==' ': continue
elif s[0]=='-':
print(u'Delete "{}" from position {}'.format(s[-1],i))
elif s[0]=='+':
print(u'Add "{}" to position {}'.format(s[-1],i))
print()
Returns…..
afrykanerskojęzyczny => afrykanerskojęzycznym
Add "m" to position 20
afrykanerskojęzyczni => nieafrykanerskojęzyczni
Add "n" to position 0
Add "i" to position 1
Add "e" to position 2
afrykanerskojęzycznym => afrykanerskojęzyczny
Delete "m" from position 20
nieafrykanerskojęzyczni => afrykanerskojęzyczni
Delete "n" from position 0
Delete "i" from position 1
Delete "e" from position 2
nieafrynerskojęzyczni => afrykanerskojzyczni
Delete "n" from position 0
Delete "i" from position 1
Delete "e" from position 2
Add "k" to position 7
Add "a" to position 8
Delete "ę" from position 16
abcdefg => xac
Add "x" to position 0
Delete "b" from position 2
Delete "d" from position 4
Delete "e" from position 5
Delete "f" from position 6
Delete "g" from position 7
See this post for more information..
Try this solution based in Minimum Edit Distance, in this case I use this algorithm to calculate the distance’s matrix. After that, the iteration on matrix back to forward to identify what character is included or removed in a string, because this I need invert the result.
To color a terminal I use the colorama module.
#!/bin/python
import sys
from colorama import *
from numpy import zeros
init()
inv_WHITE = Fore.WHITE[::-1]
inv_RED = Fore.RED[::-1]
inv_GREEN = Fore.GREEN[::-1]
def edDistDp(y, x):
res = inv_WHITE
D = zeros((len(x)+1, len(y)+1), dtype=int)
D[0, 1:] = range(1, len(y)+1)
D[1:, 0] = range(1, len(x)+1)
for i in xrange(1, len(x)+1):
for j in xrange(1, len(y)+1):
delt = 1 if x[i-1] != y[j-1] else 0
D[i, j] = min(D[i-1, j-1]+delt, D[i-1, j]+1, D[i, j-1]+1)
#print D
# iterate the matrix's values from back to forward
i = len(x)
j = len(y)
while i > 0 and j > 0:
diagonal = D[i-1, j-1]
upper = D[i, j-1]
left = D[i-1, j]
# check back direction
direction = "\" if diagonal <= upper and diagonal <= left else "<-" if left < diagonal and left <= upper else "^"
#print "(",i,j,")",diagonal, upper, left, direction
i = i-1 if direction == "<-" or direction == "\" else i
j = j-1 if direction == "^" or direction == "\" else j
# Colorize caracters
if (direction == "\"):
if D[i+1, j+1] == diagonal:
res += x[i] + inv_WHITE
elif D[i+1, j+1] > diagonal:
res += y[j] + inv_RED
res += x[i] + inv_GREEN
else:
res += x[i] + inv_GREEN
res += y[j] + inv_RED
elif (direction == "<-"):
res += x[i] + inv_GREEN
elif (direction == "^"):
res += y[j] + inv_RED
return res[::-1]
one_string = "beep boop"
other_string = "beep boob blah"
print ("'%s'-'%s'='%s'" % (one_string, other_string, edDistDp(one_string, other_string)))
print ("'%s'-'%s'='%s'" % (other_string, one_string, edDistDp(other_string, one_string)))
other_string = "hola nacho"
one_string = "hola naco"
print ("'%s'-'%s'='%s'" % (one_string, other_string, edDistDp(one_string, other_string)))
print ("'%s'-'%s'='%s'" % (other_string, one_string, edDistDp(other_string, one_string)))
One possible way (see also @interjay’s comment to the OP) is
import difflib
red = lambda text: f"
I’m trying to find a Python way to diff strings. I know about difflib
but I haven’t been able to find an inline mode that does something similar to what this JS library does (insertions in green, deletions in red):
one_string = "beep boop"
other_string = "beep boob blah"
Is there a way to achieve this?
You can use ndiff.
Example…
import difflib
cases=[('afrykanerskojęzyczny', 'afrykanerskojęzycznym'),
('afrykanerskojęzyczni', 'nieafrykanerskojęzyczni'),
('afrykanerskojęzycznym', 'afrykanerskojęzyczny'),
('nieafrykanerskojęzyczni', 'afrykanerskojęzyczni'),
('nieafrynerskojęzyczni', 'afrykanerskojzyczni'),
('abcdefg','xac')]
for a,b in cases:
print('{} => {}'.format(a,b))
for i,s in enumerate(difflib.ndiff(a, b)):
if s[0]==' ': continue
elif s[0]=='-':
print(u'Delete "{}" from position {}'.format(s[-1],i))
elif s[0]=='+':
print(u'Add "{}" to position {}'.format(s[-1],i))
print()
Returns…..
afrykanerskojęzyczny => afrykanerskojęzycznym
Add "m" to position 20
afrykanerskojęzyczni => nieafrykanerskojęzyczni
Add "n" to position 0
Add "i" to position 1
Add "e" to position 2
afrykanerskojęzycznym => afrykanerskojęzyczny
Delete "m" from position 20
nieafrykanerskojęzyczni => afrykanerskojęzyczni
Delete "n" from position 0
Delete "i" from position 1
Delete "e" from position 2
nieafrynerskojęzyczni => afrykanerskojzyczni
Delete "n" from position 0
Delete "i" from position 1
Delete "e" from position 2
Add "k" to position 7
Add "a" to position 8
Delete "ę" from position 16
abcdefg => xac
Add "x" to position 0
Delete "b" from position 2
Delete "d" from position 4
Delete "e" from position 5
Delete "f" from position 6
Delete "g" from position 7
See this post for more information..
Try this solution based in Minimum Edit Distance, in this case I use this algorithm to calculate the distance’s matrix. After that, the iteration on matrix back to forward to identify what character is included or removed in a string, because this I need invert the result.
To color a terminal I use the colorama module.
#!/bin/python
import sys
from colorama import *
from numpy import zeros
init()
inv_WHITE = Fore.WHITE[::-1]
inv_RED = Fore.RED[::-1]
inv_GREEN = Fore.GREEN[::-1]
def edDistDp(y, x):
res = inv_WHITE
D = zeros((len(x)+1, len(y)+1), dtype=int)
D[0, 1:] = range(1, len(y)+1)
D[1:, 0] = range(1, len(x)+1)
for i in xrange(1, len(x)+1):
for j in xrange(1, len(y)+1):
delt = 1 if x[i-1] != y[j-1] else 0
D[i, j] = min(D[i-1, j-1]+delt, D[i-1, j]+1, D[i, j-1]+1)
#print D
# iterate the matrix's values from back to forward
i = len(x)
j = len(y)
while i > 0 and j > 0:
diagonal = D[i-1, j-1]
upper = D[i, j-1]
left = D[i-1, j]
# check back direction
direction = "\" if diagonal <= upper and diagonal <= left else "<-" if left < diagonal and left <= upper else "^"
#print "(",i,j,")",diagonal, upper, left, direction
i = i-1 if direction == "<-" or direction == "\" else i
j = j-1 if direction == "^" or direction == "\" else j
# Colorize caracters
if (direction == "\"):
if D[i+1, j+1] == diagonal:
res += x[i] + inv_WHITE
elif D[i+1, j+1] > diagonal:
res += y[j] + inv_RED
res += x[i] + inv_GREEN
else:
res += x[i] + inv_GREEN
res += y[j] + inv_RED
elif (direction == "<-"):
res += x[i] + inv_GREEN
elif (direction == "^"):
res += y[j] + inv_RED
return res[::-1]
one_string = "beep boop"
other_string = "beep boob blah"
print ("'%s'-'%s'='%s'" % (one_string, other_string, edDistDp(one_string, other_string)))
print ("'%s'-'%s'='%s'" % (other_string, one_string, edDistDp(other_string, one_string)))
other_string = "hola nacho"
one_string = "hola naco"
print ("'%s'-'%s'='%s'" % (one_string, other_string, edDistDp(one_string, other_string)))
print ("'%s'-'%s'='%s'" % (other_string, one_string, edDistDp(other_string, one_string)))
One possible way (see also @interjay’s comment to the OP) is
import difflib
red = lambda text: f"