Skip to content

GPTDiff API Reference

Core Functions

generate_diff

def generate_diff(
    environment: str,
    goal: str,
    model: str = os.getenv('GPTDIFF_MODEL', 'deepseek-reasoner'),
    temperature: float = 0.7,
    max_tokens: int = 32000,
    prepend: Optional[str] = None,  # Path to file or URL whose content will be prepended to the system prompt
    api_key: Optional[str] = None,  # Can also set via GPTDIFF_LLM_API_KEY env
    base_url: Optional[str] = None
) -> str

Note: Built with the AI Agent Toolbox for reliable tool parsing across models and frameworks

Transforms environments through natural language instructions

  • environment: Multi-file codebase representation using File: [path] headers
  • goal: Natural language instruction for desired code changes
  • Returns unified diff string sometimes compatible with smartapply

Example:

from gptdiff import generate_diff, build_environment  # Uses AI Agent Toolbox for reliable parsing

# Multi-file environment example
files = {
    "main.py": "def greet():\n    print('Hello World')",
    "tests/test_main.py": "def test_greet():\n    pass" 
}
env = build_environment(files)

# Coordinated change across implementation and tests
diff = generate_diff(env, 
    "Add return type hints and update tests to match"
)

files = {"main.py": "def greet():\n    print('Hello World')"}
env = build_environment(files)
diff = generate_diff(env, "Translate greeting to Spanish")

Pro Tip: Use build_environment() with file dictionaries for safer environment creation

smartapply

def smartapply(  # AI-powered patch resolver
    diff_text: str,
    files: Dict[str, str],
    model: str = 'deepseek-reasoner',
    api_key: Optional[str] = None,
    base_url: Optional[str] = None
) -> Dict[str, str]

Applies diffs with AI-powered conflict resolution - Safe: Preserves file encoding and line endings - Robust: Handles ambiguous hunks through context-aware reconciliation - Uses LLM to preserve context around changes - Handles new file creation and complex patches

  • diff_text: Unified diff generated by generate_diff
  • files: Dictionary of {file_path: content} to modify
  • Returns updated file dictionary with changes applied

Implementation Notes: - Uses per-file processing with concurrent execution - Maintains original file encodings and line endings - Handles ambiguous hunks through LLM-powered reconciliation

Basic Example:

original_files = {"main.py": "def greet():\n    print('Hello World')"}
updated_files = smartapply(diff, original_files)  # Returns new dict

Advanced Example (Multi-file Modification):

original_files = {
    "file1.py": "def func1():\n    print('Old func1')",
    "file2.py": "def func2():\n    print('Old func2')",
    "unrelated.py": "def unrelated():\n    pass"
}

diff = '''diff --git a/file1.py b/file1.py
--- a/file1.py
+++ b/file1.py
@@ -1,2 +1,2 @@
 def func1():
-    print("Old func1")
+    print("New func1")
diff --git a/file2.py b/file2.py
--- a/file2.py
+++ b/file2.py
@@ -1,2 +1,2 @@
 def func2():
-    print("Old func2")
+    print("New func2")'''

updated_files = smartapply(diff, original_files)

print(updated_files["file1.py"])  # Contains 'print("New func1")'
print(updated_files["file2.py"])  # Contains 'print("New func2")'
print(updated_files["unrelated.py"])  # Remains unchanged

Real-World Refactor Example:

# Coordinated database schema and ORM update
original_files = {
    "models.py": "class User:\n    name = CharField()",
    "migrations/0001_initial.py": "# Existing migration",
    "tests/test_models.py": "def test_user_creation():\n    User(name='Test').save()"
}

diff = generate_diff(
    build_environment(original_files),
    "Rename 'name' field to 'username' across all layers"
)

updated_files = smartapply(diff, original_files)

Conflict Resolution & Error Handling:

from openai import APIError
from gptdiff import parse_environment

diff = '''diff --git a/core.py b/core.py
@@ -10,3 +10,3 @@
+-def old_name():  # Existing line being removed
++def new_name():'''

try:
    smartapply(diff, {"core.py": "def old_name():\n    pass"}) 
except APIError as e:  # Built-in error handling
    updated = smartapply(diff, files, model='gemini-2.0-flash')  # Retry

Authentication & Configuration

# Option 1: Environment variables
import os  # Recommended for CLI usage
os.environ['GPTDIFF_LLM_API_KEY'] = 'key_123'
os.environ['GPTDIFF_LLM_BASE_URL'] = 'https://api.example.com/v1'

# Option 2: Direct parameters  # Preferred for library use
# Direct parameters override environment variables

generate_diff(
    environment=env,
    goal=instruction,
    api_key='key_123',
    base_url='https://api.example.com/v1'
)

Error Handling & Recovery

from openai import APIError, AuthenticationError

try:
    diff = generate_diff(environment, "Modernize error handling")
    updated = smartapply(diff, files)
except AuthenticationError as e:
    print(f"Invalid credentials: {e.user_message}")
except APIError as e:
    print(f"API failure: {e.message}")
    if e.status_code == 429:
        print("Rate limit exceeded - add delay")
except Exception as e:
    print(f"Unexpected error: {str(e)}")

Best Practices

  1. Environment Formatting: python from gptdiff import build_environment env = build_environment(files_dict) # Safely handles edge cases

  2. Model Selection:

  3. deepseek-reasoner: Structural code changes (default)
  4. gemini-2.0-flash: Faster responses for simple text modifications

Edge Case Handling

New File Creation:

# smartapply automatically creates missing files
diff = '''diff --git a/new.py b/new.py
@@ -0,0 +1,2 @@
+def initialize():
+    print("New module")'''  # Correct diff syntax

assert 'new.py' in smartapply(diff, {})

Modified File Validation:

# Maintains surrounding context during patching
original = {"utils.py": "def calc():\n    return 42"}
diff = '''diff --git a/utils.py b/utils.py
@@ -1,2 +1,2 @@
+-def calc():
++def calculate():
+     return 42'''

updated = smartapply(diff, original)
assert "calculate()" in updated["utils.py"]

File Deletion Handling:

diff = '''diff --git a/old.py b/old.py
deleted file mode 100644
--- a/old.py
++++ /dev/null
@@ -1,3 +0,0 @@
+-def deprecated():
+-    print("Remove me")'''

# smartapply will remove file from returned dictionary
original = {"old.py": "def deprecated():\n    print('Remove me')"}
updated = smartapply(diff, original)
assert "old.py" not in updated

# Verify unrelated files preserved
assert all(f not in updated for f in ["utils.py", "config.ini"])

Note: Deleted files are omitted from the returned dictionary

Edge Case Handling: - New files are created with proper directory structure - Binary files are automatically skipped

Advanced Use Cases

Coordinated Multi-File Refactor:

original_files = {
    "api.py": "def old_name(): pass",
    "test_api.py": "def test_old_name(): pass"
}

diff = generate_diff(build_environment(original_files), "Rename old_name to new_name everywhere")
updated_files = smartapply(diff, original_files)

Batch Processing Pipeline:

from gptdiff import parse_environment, build_environment

# Process multiple transformations sequentially
files = load_project_files()  # Your custom loader
env = build_environment(files)

for task in ["Add type hints", "Convert to f-strings"]:
    diff = generate_diff(env, task)
    files = smartapply(diff, files)
    env = build_environment(files)  # Refresh environment

save_files(files)  # Your custom saver