angr::symbolic

tool angr symbolic execution
ConceptsSetupBasic solve Hooksstdin/argvMemory SimProceduresPerformancePatterns
01Concepts
How angr works
# Symbolic execution: run program with symbolic values
# Instead of concrete input (e.g. "AAAA"),
# use symbolic variables (e.g. "sym_0 sym_1 sym_2...")
# Track all constraints on symbols through execution
# At target path: ask SMT solver (z3) to find satisfying values

# Core objects:
# Project    — loaded binary
# SimState   — execution state (regs, mem, constraints)
# SimManager — explore states (find/avoid paths)
# BVS        — BitVectorSymbolic (symbolic variable)
# BVV        — BitVectorValue (concrete value)
# Solver     — z3 constraint solver

# When to use angr:
# - Obfuscated check functions
# - Complex multi-path validation
# - CTF "find password to reach 'Correct'" challenges
# - When static analysis is too time-consuming
Install
# Install (use virtualenv to avoid conflicts)
python3 -m venv angr-env
source angr-env/bin/activate
pip install angr

# Verify
python3 -c "import angr; print(angr.__version__)"

# Common imports
import angr
import claripy          # symbolic variables / solver
import logging
# Optional: suppress verbose output
logging.getLogger('angr').setLevel(logging.WARNING)
logging.getLogger('cle').setLevel(logging.WARNING)
02Basic Solve Pattern
Classic CTF template: find "Correct" avoid "Wrong"
import angr, claripy, logging
logging.getLogger('angr').setLevel(logging.WARNING)

proj = angr.Project('./challenge', auto_load_libs=False)

# --- Find addresses (use Ghidra/GDB to get these) ---
find_addr  = 0x401234   # address of "puts('Correct!')" or similar
avoid_addr = 0x401256   # address of "puts('Wrong!')"

# --- Create symbolic stdin input ---
flag_len = 32                              # guess or known length
flag_sym = claripy.BVS('flag', 8 * flag_len)  # symbolic bitvector

# --- Set up initial state ---
state = proj.factory.entry_state(
    stdin=claripy.Concat(flag_sym, claripy.BVV(b'\n', 8)),
    add_options={angr.options.LAZY_SOLVES}
)

# Constrain to printable ASCII (speeds up solver)
for i in range(flag_len):
    byte = flag_sym.get_byte(i)
    state.solver.add(byte >= 0x20)
    state.solver.add(byte <= 0x7e)

# --- Explore ---
simgr = proj.factory.simulation_manager(state)
simgr.explore(find=find_addr, avoid=avoid_addr)

# --- Extract solution ---
if simgr.found:
    sol = simgr.found[0]
    flag = sol.solver.eval(flag_sym, cast_to=bytes)
    print('Flag:', flag)
else:
    print('No solution found')
03Hooks
Hook functions
# Hook a function to skip or replace it

# Simple: skip a call (NOP it)
@proj.hook(0x401234, length=5)   # length = bytes to skip
def skip_check(state):
    pass                          # do nothing

# Hook with logic: force return value
@proj.hook(0x401234, length=5)
def force_success(state):
    state.regs.rax = 1            # return 1 = success

# Hook by symbol name
proj.hook_symbol('strcmp', angr.SIM_PROCEDURES['libc']['strcmp']())

# Hook to print what strcmp sees
@proj.hook(proj.loader.find_symbol('strcmp').rebased_addr)
def hook_strcmp(state):
    s1 = state.mem[state.regs.rdi].string.concrete
    s2 = state.mem[state.regs.rsi].string.concrete
    print(f'strcmp({s1}, {s2})')
    state.regs.rax = 0
SimProcedures
# Replace functions with angr-aware implementations

# Built-in SimProcedures
angr.SIM_PROCEDURES['libc'].keys()   # all libc hooks
# includes: strcmp, printf, scanf, malloc, puts, gets...

# Apply all libc hooks automatically:
proj = angr.Project('./challenge',
    use_sim_procedures=True)

# Custom SimProcedure class
class MyCheck(angr.SimProcedure):
    def run(self, arg1, arg2):
        # arg1, arg2 are symbolic
        # return symbolic or concrete value
        self.state.solver.add(arg1 == arg2)
        return claripy.BVV(1, 32)    # return 1

proj.hook_symbol('custom_check', MyCheck())
04Input Methods
stdin variants
# Method 1: symbolic stdin via entry_state
flag = claripy.BVS('flag', 8 * 32)
state = proj.factory.entry_state(
    stdin=claripy.Concat(flag, claripy.BVV(b'\n', 8))
)

# Method 2: angr stdin stream (interactive programs)
flag = claripy.BVS('flag', 8 * 32)
state = proj.factory.entry_state()
state.globals['flag'] = flag
state.posix.stdin.write(flag, len_expr=32)

# Method 3: SimFile
from angr.storage import SimFileStream
flag = claripy.BVS('flag', 8*32)
stdin = SimFileStream(name='stdin', content=flag, has_end=False)
state = proj.factory.entry_state(stdin=stdin)
argv / environment
# Symbolic argv[1]
flag = claripy.BVS('flag', 8 * 20)
state = proj.factory.entry_state(
    args=['./challenge', flag]
)

# Add null terminator constraint
state.solver.add(flag.get_byte(19) == 0)

# Symbolic environment variable
flag = claripy.BVS('flag', 8 * 32)
state = proj.factory.entry_state(
    env={'FLAG': flag}
)

# Start at specific address instead of entry
state = proj.factory.blank_state(addr=0x401234)
state.regs.rdi = 0x404060   # set up manually
05Memory & Registers
Read state
# Registers (concrete or symbolic)
state.regs.rax           # symbolic BitVector
state.regs.rip           # instruction pointer
state.regs.rsp

# Concretize register value
state.solver.eval(state.regs.rax)             # one solution
state.solver.eval_upto(state.regs.rax, 10)   # up to 10

# Memory read
state.mem[0x404060].uint64_t.resolved   # 64-bit int
state.mem[0x404060].uint32_t.resolved
state.mem[0x404060].string.concrete     # null-term string
state.mem[0x404060].byte.resolved

# Memory store (write)
state.memory.store(0x404060, claripy.BVV(b'\x41' * 8))
Constraints
# Add constraints to symbolic variables
flag = claripy.BVS('flag', 8 * 20)

# Printable ASCII
for i in range(20):
    b = flag.get_byte(i)
    state.solver.add(b >= 0x20, b <= 0x7e)

# Known prefix
state.solver.add(flag.get_byte(0) == ord('p'))
state.solver.add(flag.get_byte(1) == ord('i'))
state.solver.add(flag.get_byte(2) == ord('c'))

# Alphanumeric only
for i in range(20):
    b = flag.get_byte(i)
    state.solver.add(claripy.Or(
        claripy.And(b >= 0x41, b <= 0x5a),  # A-Z
        claripy.And(b >= 0x61, b <= 0x7a),  # a-z
        claripy.And(b >= 0x30, b <= 0x39),  # 0-9
    ))
06Performance & Exploration
Exploration strategies
# find / avoid by address
simgr.explore(find=0x401234, avoid=0x401256)

# find / avoid by condition (lambda on state)
simgr.explore(
    find=lambda s: b'Correct' in s.posix.dumps(1),
    avoid=lambda s: b'Wrong' in s.posix.dumps(1)
)

# Multiple find addresses
simgr.explore(find=[0x401234, 0x401290], avoid=0x401256)

# Step N instructions manually
simgr.step()              # one basic block
simgr.step(n=100)         # 100 steps
simgr.run()               # run until no active states

# Check state stashes
simgr.active             # currently running states
simgr.found              # states reaching find target
simgr.avoided            # states that hit avoid
simgr.deadended          # states with no successors
simgr.errored            # states that crashed
Speed-up options
# Most important performance options
state = proj.factory.entry_state(add_options={
    angr.options.LAZY_SOLVES,           # defer sat checks
    angr.options.SYMBOLIC_INITIAL_VALUES,
    angr.options.ZERO_FILL_UNCONSTRAINED_MEMORY,
    angr.options.ZERO_FILL_UNCONSTRAINED_REGISTERS,
})

# Remove options that slow things down
state = proj.factory.entry_state(remove_options={
    angr.options.SUPPORT_FLOATING_POINT,
})

# Disable libc loading (huge speedup)
proj = angr.Project('./challenge', auto_load_libs=False)

# Use blank_state to start mid-function (skip init)
state = proj.factory.blank_state(addr=check_func)

# Limit exploration (avoid explosion)
simgr.explore(find=0x401234, avoid=0x401256,
              num_find=1)    # stop after 1 found
07CTF Patterns
Pattern: strcmp-based check
import angr, claripy, logging
logging.getLogger('angr').setLevel(logging.ERROR)

proj = angr.Project('./crackme', auto_load_libs=False)

# Hook strcmp to leak what it compares
answers = []
class HookStrcmp(angr.SimProcedure):
    def run(self, s1_ptr, s2_ptr):
        try:
            s1 = self.state.mem[s1_ptr].string.concrete
            s2 = self.state.mem[s2_ptr].string.concrete
            answers.append((s1, s2))
        except: pass
        return claripy.BVV(0, 32)   # always equal

proj.hook_symbol('strcmp', HookStrcmp())

flag = claripy.BVS('flag', 8*40)
state = proj.factory.entry_state(args=['./crackme', flag])
simgr = proj.factory.simulation_manager(state)
simgr.explore(find=0x401234)

print(answers)   # flag will be in here
Pattern: file-based flag check
import angr, claripy
from angr.storage import SimFile

proj = angr.Project('./challenge', auto_load_libs=False)

# Binary reads from 'flag.txt'
flag_content = claripy.BVS('flag_content', 8 * 50)
flag_file = SimFile('flag.txt', content=flag_content)

state = proj.factory.entry_state(
    fs={'flag.txt': flag_file}
)
for i in range(50):
    b = flag_content.get_byte(i)
    state.solver.add(b >= 0x20, b <= 0x7e)

simgr = proj.factory.simulation_manager(state)
simgr.explore(
    find=lambda s: b'Correct' in s.posix.dumps(1),
    avoid=lambda s: b'Wrong' in s.posix.dumps(1)
)
if simgr.found:
    sol = simgr.found[0]
    print(sol.solver.eval(flag_content, cast_to=bytes))
Extract solution
if simgr.found:
    sol = simgr.found[0]

    # Get concrete bytes for symbolic variable
    flag = sol.solver.eval(flag_sym, cast_to=bytes)
    print('Flag:', flag)

    # Strip null bytes
    flag = flag.rstrip(b'\x00')
    print('Flag:', flag.decode(errors='replace'))

    # All solutions (if multiple)
    solutions = sol.solver.eval_upto(flag_sym, 5, cast_to=bytes)
    for s in solutions:
        print(s.decode(errors='replace'))

    # Check stdout output
    print('stdout:', sol.posix.dumps(1))  # fd=1
    print('stderr:', sol.posix.dumps(2))  # fd=2
ANGR CHECKLIST →  ① auto_load_libs=False always (speed)  ② Find success/failure addresses in Ghidra first  ③ Constrain to printable ASCII immediately (huge solver speedup)  ④ Hook libc functions (strcmp, strncmp) to avoid path explosion  ⑤ blank_state(addr=check_func) to skip initialization  ⑥ find=lambda s: b'Correct' in s.posix.dumps(1) for string output  ⑦ If slow: reduce symbolic input size, add more constraints early