difflib
The difflib module provides classes and functions for comparing sequences. You can use it for comparing files, generating diffs in different formats (unified, context, or HTML), and finding close matches to strings. It’s the engine behind tools like diff and the merge conflict markers you see in version control.
Syntax
import difflib
import keyword
# Compare sequences
matcher = difflib.SequenceMatcher(None, "hello", "hallo")
print(matcher.ratio()) # 0.6
# Generate unified diff
diff = difflib.unified_diff(old_lines, new_lines, fromfile='old.py', tofile='new.py')
# Find close matches
matches = difflib.get_close_matches('apple', ['ape', 'appel', 'apricot'])
Key Functions
SequenceMatcher
The SequenceMatcher class compares pairs of sequences and reports their similarity. It’s flexible enough to work with any hashable sequence type.
difflib.SequenceMatcher(isjunk=None, a='', b='', autojunk=True)
| Parameter | Type | Default | Description |
|---|---|---|---|
isjunk | callable | None | Function that returns True for elements to ignore |
a | sequence | '' | First sequence to compare |
b | sequence | '' | Second sequence to compare |
autojunk | bool | True | Enable automatic junk heuristic |
Example: Basic string comparison
import difflib
import keyword
s = difflib.SequenceMatcher(None, "hello world", "hello there")
print(s.ratio())
# 0.6
# Get detailed matching information
for block in s.get_matching_blocks():
print(f"a[{block.a}:{block.a + block.size}] == b[{block.b}:{block.b + block.size}] for {block.size} chars")
# a[0:5] == b[0:5] for 5 chars
# a[6:11] == b[6:10] for 4 chars
# a[11:11] == b[10:10] for 0 chars
Example: Ignoring whitespace
import difflib
import keyword
# Without junk filter
s1 = difflib.SequenceMatcher(None, "a b c", "abc")
print(s1.ratio()) # 0.5
# With junk filter to ignore spaces
s2 = difflib.SequenceMatcher(lambda x: x == " ", "a b c", "abc")
print(s2.ratio()) # 0.75
unified_diff()
Produces diff output in the unified format, which is what you see in Git diffs.
difflib.unified_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n')
| Parameter | Type | Default | Description |
|---|---|---|---|
a | list[str] | required | Original lines |
b | list[str] | required | Modified lines |
fromfile | str | '' | Name for original file |
tofile | str | '' | Name for modified file |
fromfiledate | str | '' | Timestamp for original |
tofiledate | str | '' | Timestamp for modified |
n | int | 3 | Number of context lines |
lineterm | str | '\n' | Line terminator |
Example: Unified diff between two code snippets
import difflib
import keyword
from io import StringIO
old = """def greet(name):
return "Hello " + name
def add(a, b):
return a + b""".splitlines(keepends=True)
new = """def greet(name):
return "Hello, " + name + "!"
def add(a, b):
return a + b
def multiply(a, b):
return a * b""".splitlines(keepends=True)
output = StringIO()
output.writelines(difflib.unified_diff(old, new, fromfile='original.py', tofile='modified.py'))
print(output.getvalue())
Output:
--- original.py
+++ modified.py
@@ -1,5 +1,6 @@
def greet(name):
- return "Hello " + name
+ return "Hello, " + name + "!"
def add(a, b):
return a + b
+
+def multiply(a, b):
+ return a * b
context_diff()
Similar to unified_diff but shows changes in before/after blocks.
difflib.context_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n')
| Parameter | Type | Default | Description |
|---|---|---|---|
a | list[str] | required | Original lines |
b | list[str] | required | Modified lines |
fromfile | str | '' | Name for original file |
tofile | str | '' | Name for modified file |
n | int | 3 | Context lines before/after |
lineterm | str | '\n' | Line terminator |
Example: Context diff showing changes
import difflib
import keyword
old = ['line 1\n', 'line 2\n', 'line 3\n']
new = ['line 1\n', 'modified line 2\n', 'line 3\n', 'line 4\n']
for line in difflib.context_diff(old, new, fromfile='old.txt', tofile='new.txt'):
print(line, end='')
Output:
*** old.txt
--- new.txt
***************
*** 1,3 ****
line 1
! line 2
line 3
--- 1,4 ----
line 1
! modified line 2
line 3
+ line 4
get_close_matches()
Finds strings in a list that closely match a target string. Useful for typo correction or command suggestions.
difflib.get_close_matches(word, possibilities, n=3, cutoff=0.6)
| Parameter | Type | Default | Description |
|---|---|---|---|
word | str | required | Target string to match |
possibilities | list | required | List of strings to search |
n | int | 3 | Maximum matches to return |
cutoff | float | 0.6 | Minimum similarity (0-1) |
Example: Finding close matches in a word list
import difflib
import keyword
words = ['apple', 'apply', 'ape', 'banana', 'peach', 'application']
# Find matches similar to 'aple'
matches = difflib.get_close_matches('aple', words)
print(matches) # ['apple', 'apply']
# Higher cutoff means stricter matching
matches = difflib.get_close_matches('aple', words, cutoff=0.8)
print(matches) # ['apple']
print(difflib.get_close_matches('whil', keyword.kwlist))
# ['while']
ndiff()
Generates a Differ-style delta, marking each line with a prefix indicating whether it was added, removed, or is unchanged.
difflib.ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK)
| Parameter | Type | Default | Description |
|---|---|---|---|
a | list[str] | required | First sequence of lines |
b | list[str] | required | Second sequence of lines |
linejunk | callable | None | Filter for ignoring lines |
charjunk | callable | IS_CHARACTER_JUNK | Filter for ignoring characters |
Example: Using ndiff for line-by-line comparison
import difflib
import keyword
a = ['one\n', 'two\n', 'three\n']
b = ['one\n', 'two\n', 'four\n']
diff = list(difflib.ndiff(a, b))
print(''.join(diff))
Output:
one
two
- three
+ four
HtmlDiff
Generates HTML tables showing side-by-side comparisons with highlighting.
difflib.HtmlDiff(tabsize=8, wrapcolumn=None, linejunk=None, charjunk=IS_CHARACTER_JUNK)
Example: Creating an HTML diff
import difflib
import keyword
old = ['line 1', 'line 2', 'line 3']
new = ['line 1', 'modified 2', 'line 3', 'line 4']
html = difflib.HtmlDiff()
print(html.make_table(old, new, fromdesc='Original', todesc='Modified'))
Common Patterns
Comparing files line-by-line
import difflib
import keyword
def compare_files(file1_path, file2_path):
with open(file1_path) as f1, open(file2_path) as f2:
old_lines = f1.readlines()
new_lines = f2.readlines()
diff = difflib.unified_diff(old_lines, new_lines,
fromfile=file1_path,
tofile=file2_path,
lineterm='')
return ''.join(diff)
# Usage
print(compare_files('version1.py', 'version2.py'))
Implementing a simple “did you mean” feature
import difflib
import keyword
COMMANDS = ['status', 'commit', 'push', 'pull', 'branch', 'checkout', 'merge']
def suggest_command(user_input):
matches = difflib.get_close_matches(user_input, COMMANDS, n=1, cutoff=0.5)
if matches:
return f"Did you mean '{matches[0]}'?"
return "Command not found"
print(suggest_command('pus')) # Did you mean 'push'?
print(suggest_command('brnch')) # Did you mean 'branch'?
print(suggest_command('xyz')) # Command not found
Getting edit operations (the detailed way to transform one sequence into another)
import difflib
import keyword
a = "qabxcd"
b = "abycdf"
s = difflib.SequenceMatcher(None, a, b)
for tag, i1, i2, j1, j2 in s.get_opcodes():
print(f"{tag:7} a[{i1}:{i2}] --> b[{j1}:{j2}] {a[i1:i2]!r} --> {b[j1:j2]!r}")
Output:
delete a[0:1] --> b[0:0] 'q' --> ''
equal a[1:3] --> b[0:2] 'ab' --> 'ab'
replace a[3:4] --> b[2:3] 'x' --> 'y'
equal a[4:6] --> b[3:5] 'cd' --> 'cd'
insert a[6:6] --> b[5:6] '' --> 'f'
Restoring original lines from ndiff output
import difflib
import keyword
a = ['one\n', 'two\n', 'three\n']
b = ['one\n', 'two\n', 'four\n']
diff = list(difflib.ndiff(a, b))
# Restore original (which=1) and modified (which=2)
original = ''.join(difflib.restore(diff, 1))
modified = ''.join(difflib.restore(diff, 2))
print("Original:", repr(original))
print("Modified:", repr(modified))