difflib

Updated March 17, 2026 · Modules
comparison diff sequences text-processing

The difflib module provides classes and functions for comparing sequences. You can use it for comparing files, generating diffs in different formats (unified, context, or HTML), and finding close matches to strings. It’s the engine behind tools like diff and the merge conflict markers you see in version control.

Syntax

import difflib
import keyword

# Compare sequences
matcher = difflib.SequenceMatcher(None, "hello", "hallo")
print(matcher.ratio())  # 0.6

# Generate unified diff
diff = difflib.unified_diff(old_lines, new_lines, fromfile='old.py', tofile='new.py')

# Find close matches
matches = difflib.get_close_matches('apple', ['ape', 'appel', 'apricot'])

Key Functions

SequenceMatcher

The SequenceMatcher class compares pairs of sequences and reports their similarity. It’s flexible enough to work with any hashable sequence type.

difflib.SequenceMatcher(isjunk=None, a='', b='', autojunk=True)
ParameterTypeDefaultDescription
isjunkcallableNoneFunction that returns True for elements to ignore
asequence''First sequence to compare
bsequence''Second sequence to compare
autojunkboolTrueEnable automatic junk heuristic

Example: Basic string comparison

import difflib
import keyword

s = difflib.SequenceMatcher(None, "hello world", "hello there")
print(s.ratio())
# 0.6

# Get detailed matching information
for block in s.get_matching_blocks():
    print(f"a[{block.a}:{block.a + block.size}] == b[{block.b}:{block.b + block.size}] for {block.size} chars")
# a[0:5] == b[0:5] for 5 chars
# a[6:11] == b[6:10] for 4 chars
# a[11:11] == b[10:10] for 0 chars

Example: Ignoring whitespace

import difflib
import keyword

# Without junk filter
s1 = difflib.SequenceMatcher(None, "a b c", "abc")
print(s1.ratio())  # 0.5

# With junk filter to ignore spaces
s2 = difflib.SequenceMatcher(lambda x: x == " ", "a b c", "abc")
print(s2.ratio())  # 0.75

unified_diff()

Produces diff output in the unified format, which is what you see in Git diffs.

difflib.unified_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n')
ParameterTypeDefaultDescription
alist[str]requiredOriginal lines
blist[str]requiredModified lines
fromfilestr''Name for original file
tofilestr''Name for modified file
fromfiledatestr''Timestamp for original
tofiledatestr''Timestamp for modified
nint3Number of context lines
linetermstr'\n'Line terminator

Example: Unified diff between two code snippets

import difflib
import keyword
from io import StringIO

old = """def greet(name):
    return "Hello " + name

def add(a, b):
    return a + b""".splitlines(keepends=True)

new = """def greet(name):
    return "Hello, " + name + "!"

def add(a, b):
    return a + b

def multiply(a, b):
    return a * b""".splitlines(keepends=True)

output = StringIO()
output.writelines(difflib.unified_diff(old, new, fromfile='original.py', tofile='modified.py'))
print(output.getvalue())

Output:

--- original.py
+++ modified.py
@@ -1,5 +1,6 @@
 def greet(name):
-    return "Hello " + name
+    return "Hello, " + name + "!"
 
 def add(a, b):
     return a + b
+
+def multiply(a, b):
+    return a * b

context_diff()

Similar to unified_diff but shows changes in before/after blocks.

difflib.context_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n')
ParameterTypeDefaultDescription
alist[str]requiredOriginal lines
blist[str]requiredModified lines
fromfilestr''Name for original file
tofilestr''Name for modified file
nint3Context lines before/after
linetermstr'\n'Line terminator

Example: Context diff showing changes

import difflib
import keyword

old = ['line 1\n', 'line 2\n', 'line 3\n']
new = ['line 1\n', 'modified line 2\n', 'line 3\n', 'line 4\n']

for line in difflib.context_diff(old, new, fromfile='old.txt', tofile='new.txt'):
    print(line, end='')

Output:

*** old.txt
--- new.txt
***************
*** 1,3 ****
  line 1
! line 2
  line 3
--- 1,4 ----
  line 1
! modified line 2
  line 3
+ line 4

get_close_matches()

Finds strings in a list that closely match a target string. Useful for typo correction or command suggestions.

difflib.get_close_matches(word, possibilities, n=3, cutoff=0.6)
ParameterTypeDefaultDescription
wordstrrequiredTarget string to match
possibilitieslistrequiredList of strings to search
nint3Maximum matches to return
cutofffloat0.6Minimum similarity (0-1)

Example: Finding close matches in a word list

import difflib
import keyword

words = ['apple', 'apply', 'ape', 'banana', 'peach', 'application']

# Find matches similar to 'aple'
matches = difflib.get_close_matches('aple', words)
print(matches)  # ['apple', 'apply']

# Higher cutoff means stricter matching
matches = difflib.get_close_matches('aple', words, cutoff=0.8)
print(matches)  # ['apple']

print(difflib.get_close_matches('whil', keyword.kwlist))
# ['while']

ndiff()

Generates a Differ-style delta, marking each line with a prefix indicating whether it was added, removed, or is unchanged.

difflib.ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK)
ParameterTypeDefaultDescription
alist[str]requiredFirst sequence of lines
blist[str]requiredSecond sequence of lines
linejunkcallableNoneFilter for ignoring lines
charjunkcallableIS_CHARACTER_JUNKFilter for ignoring characters

Example: Using ndiff for line-by-line comparison

import difflib
import keyword

a = ['one\n', 'two\n', 'three\n']
b = ['one\n', 'two\n', 'four\n']

diff = list(difflib.ndiff(a, b))
print(''.join(diff))

Output:

  one
  two
- three
+ four

HtmlDiff

Generates HTML tables showing side-by-side comparisons with highlighting.

difflib.HtmlDiff(tabsize=8, wrapcolumn=None, linejunk=None, charjunk=IS_CHARACTER_JUNK)

Example: Creating an HTML diff

import difflib
import keyword

old = ['line 1', 'line 2', 'line 3']
new = ['line 1', 'modified 2', 'line 3', 'line 4']

html = difflib.HtmlDiff()
print(html.make_table(old, new, fromdesc='Original', todesc='Modified'))

Common Patterns

Comparing files line-by-line

import difflib
import keyword

def compare_files(file1_path, file2_path):
    with open(file1_path) as f1, open(file2_path) as f2:
        old_lines = f1.readlines()
        new_lines = f2.readlines()
    
    diff = difflib.unified_diff(old_lines, new_lines, 
                                fromfile=file1_path, 
                                tofile=file2_path,
                                lineterm='')
    return ''.join(diff)

# Usage
print(compare_files('version1.py', 'version2.py'))

Implementing a simple “did you mean” feature

import difflib
import keyword

COMMANDS = ['status', 'commit', 'push', 'pull', 'branch', 'checkout', 'merge']

def suggest_command(user_input):
    matches = difflib.get_close_matches(user_input, COMMANDS, n=1, cutoff=0.5)
    if matches:
        return f"Did you mean '{matches[0]}'?"
    return "Command not found"

print(suggest_command('pus'))    # Did you mean 'push'?
print(suggest_command('brnch'))  # Did you mean 'branch'?
print(suggest_command('xyz'))   # Command not found

Getting edit operations (the detailed way to transform one sequence into another)

import difflib
import keyword

a = "qabxcd"
b = "abycdf"

s = difflib.SequenceMatcher(None, a, b)

for tag, i1, i2, j1, j2 in s.get_opcodes():
    print(f"{tag:7} a[{i1}:{i2}] --> b[{j1}:{j2}] {a[i1:i2]!r} --> {b[j1:j2]!r}")

Output:

delete  a[0:1] --> b[0:0] 'q' --> ''
equal   a[1:3] --> b[0:2] 'ab' --> 'ab'
replace a[3:4] --> b[2:3] 'x' --> 'y'
equal   a[4:6] --> b[3:5] 'cd' --> 'cd'
insert  a[6:6] --> b[5:6] '' --> 'f'

Restoring original lines from ndiff output

import difflib
import keyword

a = ['one\n', 'two\n', 'three\n']
b = ['one\n', 'two\n', 'four\n']

diff = list(difflib.ndiff(a, b))

# Restore original (which=1) and modified (which=2)
original = ''.join(difflib.restore(diff, 1))
modified = ''.join(difflib.restore(diff, 2))

print("Original:", repr(original))
print("Modified:", repr(modified))

See Also