Files
memefast/bash/webm_auto_crop.sh
2025-06-13 09:44:18 +08:00

353 lines
13 KiB
Bash
Executable File

#!/bin/bash
# Script to auto-crop and scale WebM files based on content analysis of all frames
# Usage: ./webm_auto_crop.sh [directory_path]
# Function to display usage
usage() {
echo "Usage: $0 [directory_path]"
echo " directory_path: Path to directory containing WebM files (default: current directory)"
echo ""
echo "This script will:"
echo " - Analyze frames in each WebM file to detect content boundaries"
echo " - Calculate unified crop region that covers all frames"
echo " - Crop and scale the entire video to fill original dimensions"
echo " - Preserve audio when available"
echo " - Save results in a 'crop' subdirectory"
exit 1
}
# Check if ffmpeg is installed
if ! command -v ffmpeg &> /dev/null; then
echo "Error: ffmpeg is not installed or not in PATH"
echo "Please install ffmpeg to use this script"
exit 1
fi
# Check if ffprobe is installed
if ! command -v ffprobe &> /dev/null; then
echo "Error: ffprobe is not installed or not in PATH"
echo "Please install ffprobe (part of ffmpeg) to use this script"
exit 1
fi
# Check if bc is installed (needed for calculations)
if ! command -v bc &> /dev/null; then
echo "Error: bc (basic calculator) is not installed or not in PATH"
echo "Please install bc to use this script"
echo " macOS: brew install bc"
echo " Ubuntu/Debian: sudo apt install bc"
exit 1
fi
echo "Content detection method:"
if command -v identify &> /dev/null && command -v convert &> /dev/null; then
echo " ✓ Using ImageMagick (optimal for transparency)"
else
echo " ⚠ Using ffmpeg cropdetect (fallback method)"
echo " For better results with transparent content, install ImageMagick:"
echo " macOS: brew install imagemagick"
echo " Ubuntu/Debian: sudo apt install imagemagick"
fi
echo ""
# Set directory path (use current directory if not specified)
if [ $# -eq 0 ]; then
DIRECTORY="."
elif [ $# -eq 1 ]; then
DIRECTORY="$1"
else
echo "Error: Too many arguments"
usage
fi
# Check if directory exists
if [ ! -d "$DIRECTORY" ]; then
echo "Error: Directory '$DIRECTORY' does not exist"
exit 1
fi
# Convert to absolute path
DIRECTORY=$(realpath "$DIRECTORY")
echo "Processing WebM files in: $DIRECTORY"
# Create crop output directory
CROP_DIR="$DIRECTORY/crop"
if [ ! -d "$CROP_DIR" ]; then
mkdir -p "$CROP_DIR"
echo "Created output directory: $CROP_DIR"
else
echo "Using existing output directory: $CROP_DIR"
fi
# Find all WebM files in the directory
WEBM_FILES=($(find "$DIRECTORY" -maxdepth 1 -name "*.webm" -type f))
# Check if any WebM files were found
if [ ${#WEBM_FILES[@]} -eq 0 ]; then
echo "No WebM files found in '$DIRECTORY'"
exit 0
fi
echo "Found ${#WEBM_FILES[@]} WebM file(s)"
echo ""
# Counter for processed files
PROCESSED=0
FAILED=0
# Function to detect content bounding box for a frame with transparency
detect_content_bounds() {
local frame_file="$1"
local bounds_file="$2"
# Use ImageMagick to find the bounding box of non-transparent content
if command -v identify &> /dev/null && command -v convert &> /dev/null; then
# Get the trim bounding box (removes transparent borders) - works with WebP
TRIM_INFO=$(convert "$frame_file" -format "%@" info: 2>/dev/null)
if [ -n "$TRIM_INFO" ] && [[ "$TRIM_INFO" =~ ^([0-9]+)x([0-9]+)\+([0-9]+)\+([0-9]+)$ ]]; then
# Parse geometry: WxH+X+Y
echo "${BASH_REMATCH[1]}:${BASH_REMATCH[2]}:${BASH_REMATCH[3]}:${BASH_REMATCH[4]}" > "$bounds_file"
else
echo "" > "$bounds_file"
fi
else
# Fallback: use ffmpeg cropdetect with adjusted threshold for transparency
ffmpeg -i "$frame_file" \
-vf "cropdetect=limit=0.1:round=2" \
-f null \
- 2>&1 | grep "crop=" | tail -1 | sed 's/.*crop=\([0-9:]*\).*/\1/' > "$bounds_file" 2>/dev/null
fi
}
# Function to parse crop parameters
parse_crop() {
local crop_str="$1"
if [[ "$crop_str" =~ ^([0-9]+):([0-9]+):([0-9]+):([0-9]+)$ ]]; then
echo "${BASH_REMATCH[1]} ${BASH_REMATCH[2]} ${BASH_REMATCH[3]} ${BASH_REMATCH[4]}"
else
echo "0 0 0 0"
fi
}
# Process each WebM file
for webm_file in "${WEBM_FILES[@]}"; do
# Get the base filename without path and extension
base_name=$(basename "$webm_file" .webm)
# Output WebM file path
webm_output="$CROP_DIR/${base_name}.webm"
echo "Processing: $(basename "$webm_file")"
# Detect codec type (VP8 or VP9) to use correct decoder
CODEC_INFO=$(ffprobe -v quiet -select_streams v:0 -show_entries stream=codec_name -of csv=p=0 "$webm_file" 2>/dev/null)
# Set appropriate decoder based on codec
if [ "$CODEC_INFO" = "vp9" ]; then
DECODER="libvpx-vp9"
ENCODER="libvpx-vp9"
else
DECODER="libvpx"
ENCODER="libvpx"
fi
# Get video properties - use EXACT same method as webm_to_webp.sh
ORIG_WIDTH=$(ffprobe -v quiet -select_streams v:0 -show_entries stream=width -of csv=p=0 "$webm_file" 2>/dev/null)
ORIG_HEIGHT=$(ffprobe -v quiet -select_streams v:0 -show_entries stream=height -of csv=p=0 "$webm_file" 2>/dev/null)
if [ -z "$ORIG_WIDTH" ] || [ -z "$ORIG_HEIGHT" ]; then
echo " ✗ Could not get video dimensions"
((FAILED++))
continue
fi
echo " → Original dimensions: ${ORIG_WIDTH}x${ORIG_HEIGHT}"
# Get total frame count - EXACT same method as webm_to_webp.sh
FRAME_COUNT=$(ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 "$webm_file" 2>/dev/null)
if [ -z "$FRAME_COUNT" ] || [ "$FRAME_COUNT" -eq 0 ]; then
echo " ✗ Could not determine frame count"
((FAILED++))
continue
fi
echo " → Total frames: $FRAME_COUNT"
# Check if video has audio
AUDIO_STREAMS=$(ffprobe -v quiet -select_streams a -show_entries stream=index -of csv=p=0 "$webm_file" 2>/dev/null | wc -l)
# Create temporary directory for frame analysis
TEMP_DIR=$(mktemp -d)
echo " → Analyzing frames to find optimal crop region..."
# Calculate sample count - sample every 3rd frame to ensure we catch all extreme positions
SAMPLE_COUNT=$((FRAME_COUNT / 3))
if [ "$SAMPLE_COUNT" -gt 300 ]; then
SAMPLE_COUNT=300 # Cap at 300 to avoid excessive processing
elif [ "$SAMPLE_COUNT" -lt 50 ]; then
SAMPLE_COUNT=$FRAME_COUNT # For very short videos, sample all frames
fi
# Initialize crop bounds (will find union of all bounds)
MIN_X=$ORIG_WIDTH
MIN_Y=$ORIG_HEIGHT
MAX_X=0
MAX_Y=0
VALID_FRAMES=0
echo " → Analyzing $SAMPLE_COUNT sample frames from $FRAME_COUNT total frames..."
# Sample frames evenly throughout the video with much higher density
for ((i=0; i<SAMPLE_COUNT; i++)); do
# Calculate frame position (evenly distributed)
FRAME_POS=$((i * FRAME_COUNT / SAMPLE_COUNT))
# Extract frame at specific position
TEMP_FRAME="$TEMP_DIR/frame_${FRAME_POS}.webp"
BOUNDS_FILE="$TEMP_DIR/bounds_${FRAME_POS}.txt"
echo " Extracting frame #${FRAME_POS}..."
# Extract frame - EXACT same command as webm_to_webp.sh
TEMP_FRAME="$TEMP_DIR/frame_${FRAME_POS}.webp"
ffmpeg -c:v "$DECODER" \
-i "$webm_file" \
-vf "select=eq(n\\,$FRAME_POS)" \
-vframes 1 \
-c:v libwebp \
-lossless 1 \
-y \
"$TEMP_FRAME" \
-v quiet 2>/dev/null
if [ -f "$TEMP_FRAME" ]; then
# Check if frame was extracted successfully
FRAME_SIZE=$(stat -f%z "$TEMP_FRAME" 2>/dev/null || stat -c%s "$TEMP_FRAME" 2>/dev/null)
echo " Frame file size: $FRAME_SIZE bytes"
# Detect content bounds
detect_content_bounds "$TEMP_FRAME" "$BOUNDS_FILE"
if [ -f "$BOUNDS_FILE" ]; then
CROP_PARAMS=$(cat "$BOUNDS_FILE")
echo " Crop params: '$CROP_PARAMS'"
if [ -n "$CROP_PARAMS" ] && [[ "$CROP_PARAMS" =~ ^([0-9]+):([0-9]+):([0-9]+):([0-9]+)$ ]]; then
read -r CROP_W CROP_H CROP_X CROP_Y <<< $(parse_crop "$CROP_PARAMS")
if [ "$CROP_W" -gt 0 ] && [ "$CROP_H" -gt 0 ]; then
# Calculate bounds of this frame's content
FRAME_MIN_X=$CROP_X
FRAME_MIN_Y=$CROP_Y
FRAME_MAX_X=$((CROP_X + CROP_W))
FRAME_MAX_Y=$((CROP_Y + CROP_H))
# Update union bounds
if [ "$FRAME_MIN_X" -lt "$MIN_X" ]; then MIN_X=$FRAME_MIN_X; fi
if [ "$FRAME_MIN_Y" -lt "$MIN_Y" ]; then MIN_Y=$FRAME_MIN_Y; fi
if [ "$FRAME_MAX_X" -gt "$MAX_X" ]; then MAX_X=$FRAME_MAX_X; fi
if [ "$FRAME_MAX_Y" -gt "$MAX_Y" ]; then MAX_Y=$FRAME_MAX_Y; fi
((VALID_FRAMES++))
echo " ✓ Frame #${FRAME_POS}: ${CROP_W}x${CROP_H} at (${CROP_X},${CROP_Y})"
else
echo " ✗ Frame #${FRAME_POS}: Invalid crop dimensions"
fi
else
echo " ✗ Frame #${FRAME_POS}: No valid crop params detected"
fi
else
echo " ✗ Frame #${FRAME_POS}: Bounds file not created"
fi
else
echo " ✗ Frame #${FRAME_POS}: Extraction failed"
fi
done
# Clean up temp directory
rm -rf "$TEMP_DIR"
echo " → Valid frames found: $VALID_FRAMES out of $SAMPLE_COUNT"
# Calculate final crop dimensions
FINAL_CROP_W=$((MAX_X - MIN_X))
FINAL_CROP_H=$((MAX_Y - MIN_Y))
# Validate crop dimensions
if [ "$VALID_FRAMES" -eq 0 ]; then
echo " ✗ No content detected in any frame - video may be fully transparent"
((FAILED++))
continue
elif [ "$FINAL_CROP_W" -le 0 ] || [ "$FINAL_CROP_H" -le 0 ] || [ "$MIN_X" -lt 0 ] || [ "$MIN_Y" -lt 0 ]; then
echo " ✗ Could not determine valid crop region (bounds: ${MIN_X},${MIN_Y} to ${MAX_X},${MAX_Y})"
((FAILED++))
continue
elif [ "$FINAL_CROP_W" -ge "$ORIG_WIDTH" ] && [ "$FINAL_CROP_H" -ge "$ORIG_HEIGHT" ]; then
echo " ⚠ Content already fills entire frame - skipping crop"
((FAILED++))
continue
fi
echo " → Crop region: ${FINAL_CROP_W}x${FINAL_CROP_H} at offset (${MIN_X},${MIN_Y})"
# Calculate scale to fit inside original dimensions while maintaining aspect ratio
SCALE_W=$(echo "scale=6; $ORIG_WIDTH / $FINAL_CROP_W" | bc)
SCALE_H=$(echo "scale=6; $ORIG_HEIGHT / $FINAL_CROP_H" | bc)
# Use the smaller scale factor to ensure content fits completely inside
if (( $(echo "$SCALE_W < $SCALE_H" | bc -l) )); then
SCALE_FACTOR=$SCALE_W
else
SCALE_FACTOR=$SCALE_H
fi
# Calculate scaled dimensions
SCALED_W=$(echo "$FINAL_CROP_W * $SCALE_FACTOR" | bc | cut -d. -f1)
SCALED_H=$(echo "$FINAL_CROP_H * $SCALE_FACTOR" | bc | cut -d. -f1)
echo " → Scale factor: ${SCALE_FACTOR}x (content will be ${SCALED_W}x${SCALED_H})"
# Build ffmpeg filter chain - crop, scale to fit, then pad to original size
FILTER_CHAIN="crop=${FINAL_CROP_W}:${FINAL_CROP_H}:${MIN_X}:${MIN_Y},scale=${SCALED_W}:${SCALED_H}:flags=lanczos,pad=${ORIG_WIDTH}:${ORIG_HEIGHT}:(ow-iw)/2:(oh-ih)/2:color=0x00000000"
# Build ffmpeg command
if [ "$AUDIO_STREAMS" -gt 0 ]; then
echo " → Processing with audio..."
FFMPEG_CMD="ffmpeg -c:v $DECODER -i \"$webm_file\" -vf \"$FILTER_CHAIN\" -c:v $ENCODER -b:v 1M -c:a libopus -b:a 128k -y \"$webm_output\""
else
echo " → Processing without audio..."
FFMPEG_CMD="ffmpeg -c:v $DECODER -i \"$webm_file\" -vf \"$FILTER_CHAIN\" -c:v $ENCODER -b:v 1M -an -y \"$webm_output\""
fi
# Execute ffmpeg command
echo " → Running command: $FFMPEG_CMD"
if eval "$FFMPEG_CMD" -v error -stats; then
echo " ✓ Successfully created: ${base_name}.webm"
((PROCESSED++))
else
echo " ✗ Failed to process: $(basename "$webm_file")"
((FAILED++))
fi
echo ""
done
# Summary
echo "=== Processing Complete ==="
echo "Successfully processed: $PROCESSED files"
if [ $FAILED -gt 0 ]; then
echo "Failed to process: $FAILED files"
fi
echo "Output directory: $CROP_DIR"
echo ""
# List generated files
if [ $PROCESSED -gt 0 ]; then
echo "Generated cropped WebM files:"
ls -la "$CROP_DIR"/*.webm 2>/dev/null | while read -r line; do
echo " $line"
done
fi