file::carving

tool foremost · scalpel binwalk · photorec
foremostscalpel binwalkphotorec Manual carvingMagic bytes
01foremost
Usage
# Install
sudo apt install foremost

# Basic carve
foremost -i disk.img -o ./carved/

# Specific file types only
foremost -t jpg,png,pdf,zip,doc -i disk.img -o ./out/

# All supported types
foremost -t all -i disk.img -o ./out/

# Verbose
foremost -v -i disk.img -o ./out/

# From unallocated space only (TSK pipeline)
blkls disk.img | foremost -i - -o ./out/

# Results in: out/audit.txt + out/jpg/ out/pdf/ etc.
cat ./out/audit.txt    # summary of found files
ls ./out/              # one dir per file type
Supported types
Type flagFile types
jpgJPEG images
gifGIF images
pngPNG images
bmpBMP images
aviAVI video
mpgMPEG video
mp4MP4 video
wavWAV audio
mp3MP3 audio
pdfPDF documents
docMS Word .doc
zipZIP archives (also docx, xlsx, jar)
exeWindows executables
elfELF binaries
02scalpel
Usage & config
# Install
sudo apt install scalpel

# Edit config: /etc/scalpel/scalpel.conf
# Uncomment lines for types you want
# Format: extension  case  maxsize  header  footer
# jpg   y   200000000  \xff\xd8\xff\xe0  \xff\xd9

# Run with default config
scalpel -o ./out/ disk.img

# Custom config
scalpel -c my.conf -o ./out/ disk.img

# Overlap (don't skip after find)
scalpel -m -o ./out/ disk.img

# Preview — don't write, just report
scalpel -p -o ./out/ disk.img
Custom scalpel.conf entries
# Add custom file type to scalpel.conf:
# format: ext  case  size  header  [footer]

# PNG
png   y   5000000  \x89\x50\x4e\x47\x0d\x0a\x1a\x0a  \x49\x45\x4e\x44\xae\x42\x60\x82

# ZIP / docx / jar
zip   y   50000000  \x50\x4b\x03\x04

# PDF
pdf   y   5000000  \x25\x50\x44\x46  \x25\x25\x45\x4f\x46

# ELF
elf   y   5000000  \x7f\x45\x4c\x46

# SQLite database
sqlite  y  100000000  SQLite format 3

# Flag file (text)
txt   y   10000   picoCTF{
03binwalk
Usage
# Scan for embedded files
binwalk file.bin

# Extract everything
binwalk -e file.bin          # extract to _file.bin.extracted/
binwalk -Me file.bin         # matryoshka — recursive extract
binwalk --dd='.*' file.bin   # extract ALL signatures

# Entropy analysis (find compressed/encrypted regions)
binwalk -E file.bin          # entropy graph
binwalk -E -J file.bin       # output as JSON

# Specific offset
binwalk --raw="\x89PNG" file.bin   # search raw bytes

# Extract specific file type
binwalk -D "png:png" file.bin
binwalk -D "zip archive:zip" file.bin

# Quiet
binwalk -q -e file.bin
binwalk output
# Example output:
# DECIMAL   HEX     DESCRIPTION
# 0         0x0     PNG image
# 1234      0x4D2   Zip archive, compressed
# 5678      0x162E  JPEG image data

# Extract specific offset manually
dd if=file.bin of=extracted.zip bs=1 skip=1234

# After -e: check _file.bin.extracted/
ls _file.bin.extracted/
file _file.bin.extracted/*

# Entropy: high flat = encrypted/compressed
#          varies = interesting boundaries
#          low    = plaintext / code
04photorec
Usage (interactive)
# photorec: comprehensive carving, interactive
photorec disk.img

# Steps in TUI:
# 1. Select disk/image
# 2. Select partition (or whole disk)
# 3. File system type (Other = raw)
# 4. Choose output directory
# → carves to recup_dir.1/ recup_dir.2/ etc.

# Limit to specific file types
# In TUI: File Opt → select only what you need

# Check results
find recup_dir*/ -type f | xargs file | grep -i "image\|pdf\|zip"
strings recup_dir*/*.txt 2>/dev/null | grep "picoCTF"
05Manual Carving
Find header → extract to footer
# 1. Find offset of magic bytes
grep -boa $'\x89PNG' disk.img         # PNG header offset
grep -boa $'\xff\xd8\xff' disk.img     # JPEG header
grep -boa $'\x50\x4b\x03\x04' disk.img # ZIP header
grep -boa $'\x25\x50\x44\x46' disk.img # PDF (%PDF)

python3 -c "
data = open('disk.img','rb').read()
# Find PNG
import re
for m in re.finditer(b'\x89PNG', data):
    print('PNG at offset:', m.start(), hex(m.start()))
"

# 2. Extract from offset to size (or footer)
dd if=disk.img of=carved.png bs=1 skip=<offset> count=<max_size>

# 3. Trim to actual footer (Python)
python3 -c "
data = open('disk.img','rb').read()
start = data.find(b'\x89PNG')
end   = data.find(b'\x49\x45\x4e\x44\xae\x42\x60\x82', start) + 8  # IEND
open('carved.png','wb').write(data[start:end])
print('Saved', end-start, 'bytes')
"

file carved.png    # verify type
06Magic Bytes Quick Reference
Headers & footers for carving
TypeHeader (hex)Footer (hex)grep pattern
PNG89 50 4E 47 0D 0A 1A 0A49 45 4E 44 AE 42 60 82$'\x89PNG'
JPEGFF D8 FFFF D9$'\xff\xd8\xff'
GIF47 49 46 38 (GIF8)00 3BGIF8
ZIP50 4B 03 0450 4B 05 06 (end)$'PK\x03\x04'
PDF25 50 44 46 (%PDF)25 25 45 4F 46 (%%EOF)%PDF
ELF7F 45 4C 46$'\x7fELF'
gzip1F 8B$'\x1f\x8b'
bzip242 5A 68 (BZh)BZh
7-zip37 7A BC AF 27 1C$'7z\xbc\xaf'
RAR52 61 72 21 (Rar!)Rar!
SQLite53 51 4C 69 (SQLi)SQLite format 3
MP349 44 33 (ID3) or FF FBID3
WAV52 49 46 46 (RIFF)RIFF
PE/EXE4D 5A (MZ)MZ
CARVING QUICK REF →  binwalk -Me file recursive extract embedded  · foremost -t jpg,png,pdf,zip -i disk -o out/  · blkls disk.img | foremost -i - -o out/ unallocated only  · grep -boa $'\x89PNG' disk.img find header offset  · dd if=disk bs=1 skip=OFFSET count=SIZE of=out.png  · binwalk entropy spike = encrypted/compressed region of interest