Initial commit: auto-video-cut project

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-06 21:51:01 +02:00
commit 267070ad52
15 changed files with 2635 additions and 0 deletions
--- a/auto_video_cut/init.py
+++ b/auto_video_cut/init.py
@@ -0,0 +1,3 @@
+"""auto-video-cut — Automatisches Video-Schnitt-Tool."""
+
+__version__ = "0.1.0"
--- a/auto_video_cut/audio.py
+++ b/auto_video_cut/audio.py
@@ -0,0 +1,116 @@
+"""Hintergrundmusik-Mixing und Audio-Logik."""
+
+from __future__ import annotations
+
+import random
+import subprocess
+from pathlib import Path
+
+from .config import MUSIC_EXTENSIONS, get_music_files
+
+
+def _run(cmd: list[str]) -> subprocess.CompletedProcess:
+    return subprocess.run(cmd, capture_output=True, text=True, check=False)
+
+
+def pick_music_file(
+    music_files: list[Path],
+    mode: str = "random",
+) -> Path:
+    """Musikdatei nach Modus auswählen."""
+    if not music_files:
+        raise FileNotFoundError("Keine Musikdateien gefunden.")
+
+    if mode == "random":
+        return random.choice(music_files)
+    elif mode in ("alphabetical", "loop"):
+        return sorted(music_files)[0]
+    else:
+        raise ValueError(f"Unbekannter Musik-Modus: {mode}")
+
+
+def mix_music(
+    video_path: Path,
+    output_path: Path,
+    music_file: Path,
+    volume_original: float = 1.0,
+    volume_music: float = 0.3,
+) -> Path:
+    """Hintergrundmusik in Video mixen."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Prüfen ob Video Audio-Stream hat
+    probe_cmd = [
+        "ffprobe", "-v", "error",
+        "-select_streams", "a",
+        "-show_entries", "stream=codec_type",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        str(video_path),
+    ]
+    probe = _run(probe_cmd)
+    has_audio = bool(probe.stdout.strip())
+
+    if has_audio:
+        filter_complex = (
+            f"[0:a]volume={volume_original}[v1];"
+            f"[1:a]volume={volume_music}[v2];"
+            f"[v1][v2]amix=inputs=2:duration=first[a]"
+        )
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", str(video_path),
+            "-stream_loop", "-1",
+            "-i", str(music_file),
+            "-filter_complex", filter_complex,
+            "-c:v", "copy",
+            "-c:a", "aac",
+            "-map", "0:v:0",
+            "-map", "[a]",
+            "-shortest",
+            str(output_path),
+        ]
+    else:
+        # Kein Original-Audio → Musik direkt als Track
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", str(video_path),
+            "-stream_loop", "-1",
+            "-i", str(music_file),
+            "-filter_complex", f"[1:a]volume={volume_music}[a]",
+            "-c:v", "copy",
+            "-c:a", "aac",
+            "-map", "0:v:0",
+            "-map", "[a]",
+            "-shortest",
+            str(output_path),
+        ]
+
+    result = _run(cmd)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffmpeg Fehler beim Musik-Mixing: {result.stderr}")
+
+    return output_path
+
+
+def add_music_from_config(
+    video_path: Path,
+    output_path: Path,
+    config: dict,
+) -> Path:
+    """Musik aus Konfiguration auswählen und mixen."""
+    music_files = get_music_files(config)
+    if not music_files:
+        raise FileNotFoundError(
+            f"Keine Musikdateien in: {config['resources']['folder']}/music/"
+        )
+
+    mode = config["music"]["mode"]
+    music_file = pick_music_file(music_files, mode)
+
+    return mix_music(
+        video_path=video_path,
+        output_path=output_path,
+        music_file=music_file,
+        volume_original=config["music"]["volume_original"],
+        volume_music=config["music"]["volume_music"],
+    )
--- a/auto_video_cut/cli.py
+++ b/auto_video_cut/cli.py
@@ -0,0 +1,337 @@
+"""CLI-Einstiegspunkt für auto-video-cut."""
+
+from __future__ import annotations
+
+import sys
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+import typer
+
+app = typer.Typer(
+    name="video-cut",
+    help="Automatisches Video-Schnitt-Tool — Stille, Szenen, Musik, Sequenzen.",
+    add_completion=False,
+)
+
+
+def _load_config(config_path: Optional[Path]) -> dict:
+    from .config import load_config, validate_config
+    cfg = load_config(config_path)
+    warnings = validate_config(cfg)
+    for w in warnings:
+        typer.echo(f"[Warnung] {w}", err=True)
+    return cfg
+
+
+def _ensure_output(output: Optional[Path], input_path: Path, suffix: str) -> Path:
+    if output:
+        return output
+    return input_path.parent / f"{input_path.stem}{suffix}{input_path.suffix}"
+
+
+# ---------------------------------------------------------------------------
+# video-cut cut
+# ---------------------------------------------------------------------------
+
+@app.command()
+def cut(
+    input: Path = typer.Option(..., "--input", "-i", help="Eingabe-Videodatei"),
+    output: Optional[Path] = typer.Option(None, "--output", "-o", help="Ausgabedatei"),
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Konfigurationsdatei"),
+    remove_silence: bool = typer.Option(False, "--remove-silence", help="Stille entfernen"),
+    scene_detect: bool = typer.Option(False, "--scene-detect", help="Szenen erkennen und aufteilen"),
+) -> None:
+    """Video schneiden: Stille entfernen und/oder Szenen erkennen."""
+    from .config import load_config, validate_config
+    from .cutter import remove_silence as do_remove_silence, split_scenes
+
+    if not input.exists():
+        typer.echo(f"Fehler: Datei nicht gefunden: {input}", err=True)
+        raise typer.Exit(1)
+
+    cfg = _load_config(config)
+
+    if not remove_silence and not scene_detect:
+        typer.echo("Hinweis: Keine Aktion angegeben. Nutze --remove-silence oder --scene-detect.")
+        raise typer.Exit(0)
+
+    current = input
+
+    if remove_silence:
+        out = _ensure_output(output if not scene_detect else None, input, "_no_silence")
+        typer.echo(f"Stille entfernen: {current} → {out}")
+        do_remove_silence(
+            current,
+            out,
+            threshold_db=cfg["silence"]["threshold_db"],
+            min_duration=cfg["silence"]["min_duration"],
+        )
+        current = out
+        typer.echo("Fertig.")
+
+    if scene_detect:
+        out_folder = (output or input.parent / f"{input.stem}_scenes")
+        typer.echo(f"Szenen erkennen: {current} → {out_folder}/")
+        clips = split_scenes(current, out_folder, threshold=cfg["scenes"]["threshold"])
+        typer.echo(f"Fertig. {len(clips)} Szenen gespeichert.")
+
+
+# ---------------------------------------------------------------------------
+# video-cut merge
+# ---------------------------------------------------------------------------
+
+@app.command()
+def merge(
+    inputs: list[Path] = typer.Option(..., "--inputs", help="Eingabe-Videodateien"),
+    output: Path = typer.Option(..., "--output", "-o", help="Ausgabedatei"),
+    intro: Optional[Path] = typer.Option(None, "--intro", help="Intro-Clip"),
+    outro: Optional[Path] = typer.Option(None, "--outro", help="Outro-Clip"),
+    no_normalize: bool = typer.Option(False, "--no-normalize", help="Kein Re-Encoding"),
+) -> None:
+    """Mehrere Video-Clips zusammenführen."""
+    from .merger import merge_clips
+
+    for p in inputs:
+        if not p.exists():
+            typer.echo(f"Fehler: Datei nicht gefunden: {p}", err=True)
+            raise typer.Exit(1)
+
+    typer.echo(f"Zusammenführen von {len(inputs)} Clip(s) → {output}")
+    merge_clips(
+        clips=list(inputs),
+        output_path=output,
+        intro=intro,
+        outro=outro,
+        normalize=not no_normalize,
+    )
+    typer.echo("Fertig.")
+
+
+# ---------------------------------------------------------------------------
+# video-cut music
+# ---------------------------------------------------------------------------
+
+@app.command()
+def music(
+    input: Path = typer.Option(..., "--input", "-i", help="Eingabe-Videodatei"),
+    output: Optional[Path] = typer.Option(None, "--output", "-o", help="Ausgabedatei"),
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Konfigurationsdatei"),
+    music_file: Optional[Path] = typer.Option(None, "--music-file", help="Direkte Musikdatei (überschreibt config)"),
+    volume_original: float = typer.Option(1.0, "--vol-orig", help="Lautstärke Original (0.0–1.0)"),
+    volume_music: float = typer.Option(0.3, "--vol-music", help="Lautstärke Musik (0.0–1.0)"),
+) -> None:
+    """Hintergrundmusik zu einem Video hinzufügen."""
+    from .audio import mix_music, add_music_from_config
+
+    if not input.exists():
+        typer.echo(f"Fehler: Datei nicht gefunden: {input}", err=True)
+        raise typer.Exit(1)
+
+    out = _ensure_output(output, input, "_music")
+
+    if music_file:
+        if not music_file.exists():
+            typer.echo(f"Fehler: Musikdatei nicht gefunden: {music_file}", err=True)
+            raise typer.Exit(1)
+        typer.echo(f"Musik hinzufügen: {music_file} → {out}")
+        mix_music(input, out, music_file, volume_original, volume_music)
+    else:
+        cfg = _load_config(config)
+        typer.echo(f"Musik aus Konfiguration → {out}")
+        add_music_from_config(input, out, cfg)
+
+    typer.echo("Fertig.")
+
+
+# ---------------------------------------------------------------------------
+# video-cut batch
+# ---------------------------------------------------------------------------
+
+@app.command()
+def batch(
+    input: Path = typer.Option(..., "--input", "-i", help="Ordner mit Videos"),
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Konfigurationsdatei"),
+    remove_silence: bool = typer.Option(False, "--remove-silence"),
+    scene_detect: bool = typer.Option(False, "--scene-detect"),
+    add_music: bool = typer.Option(False, "--music", help="Musik hinzufügen"),
+) -> None:
+    """Alle Videos in einem Ordner verarbeiten."""
+    from .config import VIDEO_EXTENSIONS
+    from .cutter import remove_silence as do_remove_silence, split_scenes
+    from .audio import add_music_from_config
+
+    if not input.is_dir():
+        typer.echo(f"Fehler: Kein Ordner: {input}", err=True)
+        raise typer.Exit(1)
+
+    cfg = _load_config(config)
+    output_folder = Path(cfg["output"]["folder"])
+    output_folder.mkdir(parents=True, exist_ok=True)
+
+    videos = sorted(f for f in input.iterdir() if f.suffix.lower() in VIDEO_EXTENSIONS)
+    if not videos:
+        typer.echo("Keine Videos gefunden.")
+        raise typer.Exit(0)
+
+    typer.echo(f"{len(videos)} Video(s) gefunden.")
+
+    for video in videos:
+        typer.echo(f"\nVerarbeite: {video.name}")
+        current = video
+
+        if remove_silence:
+            out = output_folder / f"{video.stem}_no_silence.mp4"
+            typer.echo(f"  Stille entfernen → {out.name}")
+            do_remove_silence(
+                current, out,
+                threshold_db=cfg["silence"]["threshold_db"],
+                min_duration=cfg["silence"]["min_duration"],
+            )
+            current = out
+
+        if scene_detect:
+            scene_folder = output_folder / f"{video.stem}_scenes"
+            typer.echo(f"  Szenen → {scene_folder.name}/")
+            split_scenes(current, scene_folder, threshold=cfg["scenes"]["threshold"])
+
+        if add_music and not scene_detect:
+            out = output_folder / f"{video.stem}_music.mp4"
+            typer.echo(f"  Musik → {out.name}")
+            add_music_from_config(current, out, cfg)
+
+    typer.echo("\nBatch abgeschlossen.")
+
+
+# ---------------------------------------------------------------------------
+# video-cut sequence
+# ---------------------------------------------------------------------------
+
+@app.command()
+def sequence(
+    seq_file: Path = typer.Option(..., "--seq", "-s", help="Sequenz-Datei (sequence.yaml)"),
+    config: Optional[Path] = typer.Option(None, "--config", "-c", help="Konfigurationsdatei"),
+    output: Optional[Path] = typer.Option(None, "--output", "-o", help="Ausgabedatei"),
+    add_music: bool = typer.Option(True, "--music/--no-music", help="Musik hinzufügen"),
+) -> None:
+    """Video aus sequence.yaml zusammenstellen."""
+    from .config import get_resources_folder
+    from .sequencer import parse_sequence, ClipEntry
+    from .cutter import remove_silence as do_remove_silence
+    from .merger import merge_clips, image_to_clip
+    from .text import create_text_clip, add_text_overlay
+    from .audio import add_music_from_config, mix_music, pick_music_file, get_music_files
+
+    if not seq_file.exists():
+        typer.echo(f"Fehler: Sequenz-Datei nicht gefunden: {seq_file}", err=True)
+        raise typer.Exit(1)
+
+    cfg = _load_config(config)
+    resources = get_resources_folder(cfg)
+
+    typer.echo(f"Sequenz laden: {seq_file}")
+    clips_raw, seq_music = parse_sequence(
+        seq_file,
+        resources_folder=resources,
+        default_image_duration=cfg["images"]["duration"],
+    )
+
+    typer.echo(f"{len(clips_raw)} Einträge in der Sequenz.")
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        tmp = Path(tmp_dir)
+        ready_clips: list[Path] = []
+
+        for i, entry in enumerate(clips_raw):
+            typer.echo(f"  [{i+1}/{len(clips_raw)}] {entry.media_type}: {entry.path.name}")
+
+            if entry.media_type == "image":
+                clip = tmp / f"clip_{i:04d}.mp4"
+                image_to_clip(entry.path, clip, duration=entry.image_duration)
+                ready_clips.append(clip)
+
+            elif entry.media_type == "text":
+                style = getattr(entry, "text_style", {}) or {}
+                clip = tmp / f"clip_{i:04d}.mp4"
+                create_text_clip(
+                    output_path=clip,
+                    content=entry.overlay_text or "",
+                    duration=entry.image_duration,
+                    font_size=style.get("font_size", 72),
+                    font_color=style.get("font_color", "white"),
+                    background_color=style.get("background_color", "black"),
+                    position=style.get("position", "center"),
+                )
+                ready_clips.append(clip)
+
+            elif entry.media_type == "video":
+                current = entry.path
+                if entry.remove_silence or entry.trim_silence:
+                    silenced = tmp / f"clip_{i:04d}_ns.mp4"
+                    do_remove_silence(
+                        current, silenced,
+                        threshold_db=cfg["silence"]["threshold_db"],
+                        min_duration=cfg["silence"]["min_duration"],
+                    )
+                    current = silenced
+
+                if entry.overlay_text:
+                    overlaid = tmp / f"clip_{i:04d}_overlay.mp4"
+                    add_text_overlay(
+                        current, overlaid,
+                        text=entry.overlay_text,
+                        position=entry.overlay_position,
+                        duration=entry.overlay_duration,
+                    )
+                    current = overlaid
+
+                ready_clips.append(current)
+
+        if not ready_clips:
+            typer.echo("Fehler: Keine Clips zum Zusammenführen.", err=True)
+            raise typer.Exit(1)
+
+        # Finales Video ohne Musik
+        out_cfg = Path(cfg["output"]["folder"])
+        out_cfg.mkdir(parents=True, exist_ok=True)
+        final_name = output or out_cfg / "output.mp4"
+        no_music_path = tmp / "merged.mp4"
+
+        typer.echo(f"Clips zusammenführen ({len(ready_clips)} Stück)…")
+        merge_clips(ready_clips, no_music_path, normalize=True)
+
+        # Musik
+        if add_music:
+            music_files = get_music_files(cfg)
+            seq_music_file = seq_music.get("file")
+
+            if seq_music_file and seq_music_file != "random":
+                m_path = resources / "music" / seq_music_file
+                if m_path.exists():
+                    music_files = [m_path]
+
+            if music_files:
+                mode = cfg["music"]["mode"]
+                chosen = pick_music_file(music_files, mode)
+                vol_orig = seq_music.get("volume_original", cfg["music"]["volume_original"])
+                vol_music = seq_music.get("volume_music", cfg["music"]["volume_music"])
+                typer.echo(f"Musik hinzufügen: {chosen.name}")
+                mix_music(no_music_path, Path(final_name), chosen, vol_orig, vol_music)
+            else:
+                import shutil
+                shutil.copy2(no_music_path, final_name)
+                typer.echo("Keine Musikdateien gefunden — ohne Musik gespeichert.")
+        else:
+            import shutil
+            shutil.copy2(no_music_path, final_name)
+
+    typer.echo(f"\nFertig: {final_name}")
+
+
+def main() -> None:
+    app()
+
+
+if __name__ == "__main__":
+    main()
--- a/auto_video_cut/config.py
+++ b/auto_video_cut/config.py
@@ -0,0 +1,115 @@
+"""Konfiguration laden und validieren."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+MUSIC_EXTENSIONS = {".mp3", ".wav", ".flac", ".aac", ".ogg"}
+VIDEO_EXTENSIONS = {".mp4", ".mov", ".avi", ".mkv"}
+IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg"}
+
+DEFAULTS: dict[str, Any] = {
+    "resources": {
+        "folder": "./resources",
+    },
+    "music": {
+        "mode": "random",
+        "volume_original": 1.0,
+        "volume_music": 0.3,
+    },
+    "videos": {
+        "intro": None,
+        "outro": None,
+        "transitions": False,
+    },
+    "images": {
+        "title_card": None,
+        "duration": 3,
+    },
+    "silence": {
+        "threshold_db": -40,
+        "min_duration": 0.5,
+    },
+    "scenes": {
+        "threshold": 27.0,
+    },
+    "output": {
+        "format": "mp4",
+        "folder": "./output",
+    },
+}
+
+
+def _deep_merge(base: dict, override: dict) -> dict:
+    """Rekursiv Dictionaries zusammenführen."""
+    result = dict(base)
+    for key, value in override.items():
+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            result[key] = _deep_merge(result[key], value)
+        else:
+            result[key] = value
+    return result
+
+
+def load_config(config_path: str | Path | None = None) -> dict[str, Any]:
+    """YAML-Konfiguration laden und mit Standardwerten zusammenführen."""
+    config = dict(DEFAULTS)
+    if config_path is None:
+        return config
+
+    path = Path(config_path)
+    if not path.exists():
+        raise FileNotFoundError(f"Konfigurationsdatei nicht gefunden: {path}")
+
+    with open(path, encoding="utf-8") as fh:
+        user_config = yaml.safe_load(fh) or {}
+
+    return _deep_merge(config, user_config)
+
+
+def validate_config(config: dict[str, Any]) -> list[str]:
+    """Konfiguration prüfen, Warnungen zurückgeben."""
+    warnings: list[str] = []
+
+    resources_folder = Path(config["resources"]["folder"])
+    if not resources_folder.exists():
+        warnings.append(f"Ressourcen-Ordner existiert nicht: {resources_folder}")
+    else:
+        music_folder = resources_folder / "music"
+        if not music_folder.exists():
+            warnings.append(f"Musik-Ordner existiert nicht: {music_folder}")
+        else:
+            music_files = [
+                f for f in music_folder.iterdir()
+                if f.suffix.lower() in MUSIC_EXTENSIONS
+            ]
+            if not music_files:
+                warnings.append(f"Keine Musikdateien in: {music_folder}")
+
+    vol_orig = config["music"]["volume_original"]
+    vol_music = config["music"]["volume_music"]
+    if not (0.0 <= vol_orig <= 1.0):
+        warnings.append(f"volume_original muss zwischen 0.0 und 1.0 liegen (ist: {vol_orig})")
+    if not (0.0 <= vol_music <= 1.0):
+        warnings.append(f"volume_music muss zwischen 0.0 und 1.0 liegen (ist: {vol_music})")
+
+    return warnings
+
+
+def get_resources_folder(config: dict[str, Any]) -> Path:
+    return Path(config["resources"]["folder"])
+
+
+def get_music_files(config: dict[str, Any]) -> list[Path]:
+    """Alle Musikdateien aus dem konfigurierten Ordner zurückgeben."""
+    music_folder = get_resources_folder(config) / "music"
+    if not music_folder.exists():
+        return []
+    return sorted(
+        f for f in music_folder.iterdir()
+        if f.suffix.lower() in MUSIC_EXTENSIONS
+    )
--- a/auto_video_cut/cutter.py
+++ b/auto_video_cut/cutter.py
@@ -0,0 +1,198 @@
+"""Stille- und Szenen-Erkennung sowie Schnitt-Logik."""
+
+from __future__ import annotations
+
+import re
+import subprocess
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class TimeRange:
+    start: float
+    end: float
+
+    @property
+    def duration(self) -> float:
+        return self.end - self.start
+
+
+def _run(cmd: list[str]) -> subprocess.CompletedProcess:
+    return subprocess.run(cmd, capture_output=True, text=True, check=False)
+
+
+# ---------------------------------------------------------------------------
+# Stille-Erkennung
+# ---------------------------------------------------------------------------
+
+def detect_silence(
+    input_path: Path,
+    threshold_db: float = -40,
+    min_duration: float = 0.5,
+) -> list[TimeRange]:
+    """Stille-Abschnitte via ffmpeg silencedetect erkennen."""
+    cmd = [
+        "ffmpeg", "-i", str(input_path),
+        "-af", f"silencedetect=n={threshold_db}dB:d={min_duration}",
+        "-f", "null", "-",
+    ]
+    result = _run(cmd)
+    output = result.stderr
+
+    starts = [float(m) for m in re.findall(r"silence_start: ([\d.]+)", output)]
+    ends = [float(m) for m in re.findall(r"silence_end: ([\d.]+)", output)]
+
+    return [TimeRange(s, e) for s, e in zip(starts, ends)]
+
+
+def _get_duration(input_path: Path) -> float:
+    """Video-Länge in Sekunden ermitteln."""
+    cmd = [
+        "ffprobe", "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        str(input_path),
+    ]
+    result = _run(cmd)
+    return float(result.stdout.strip())
+
+
+def invert_ranges(silence_ranges: list[TimeRange], total_duration: float) -> list[TimeRange]:
+    """Stille-Abschnitte umkehren → Abschnitte mit Ton."""
+    speech: list[TimeRange] = []
+    cursor = 0.0
+    for silence in sorted(silence_ranges, key=lambda r: r.start):
+        if silence.start > cursor:
+            speech.append(TimeRange(cursor, silence.start))
+        cursor = silence.end
+    if cursor < total_duration:
+        speech.append(TimeRange(cursor, total_duration))
+    return speech
+
+
+def remove_silence(
+    input_path: Path,
+    output_path: Path,
+    threshold_db: float = -40,
+    min_duration: float = 0.5,
+) -> Path:
+    """Stille aus Video entfernen und Ergebnis speichern."""
+    silence = detect_silence(input_path, threshold_db, min_duration)
+    total = _get_duration(input_path)
+    segments = invert_ranges(silence, total)
+
+    if not segments:
+        raise RuntimeError("Keine Ton-Abschnitte gefunden — Stille-Schwelle zu hoch?")
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        tmp = Path(tmp_dir)
+        clip_files: list[Path] = []
+
+        for i, seg in enumerate(segments):
+            clip = tmp / f"seg_{i:04d}.mp4"
+            cmd = [
+                "ffmpeg", "-y",
+                "-ss", str(seg.start),
+                "-to", str(seg.end),
+                "-i", str(input_path),
+                "-c", "copy",
+                str(clip),
+            ]
+            result = _run(cmd)
+            if result.returncode != 0:
+                raise RuntimeError(f"ffmpeg Fehler beim Ausschneiden: {result.stderr}")
+            clip_files.append(clip)
+
+        _concat_clips(clip_files, output_path)
+
+    return output_path
+
+
+# ---------------------------------------------------------------------------
+# Szenen-Erkennung
+# ---------------------------------------------------------------------------
+
+def detect_scenes(input_path: Path, threshold: float = 27.0) -> list[TimeRange]:
+    """Szenen-Grenzen via PySceneDetect erkennen."""
+    try:
+        from scenedetect import VideoManager, SceneManager
+        from scenedetect.detectors import ContentDetector
+    except ImportError:
+        raise ImportError("PySceneDetect nicht installiert: pip install scenedetect[opencv]")
+
+    video_manager = VideoManager([str(input_path)])
+    scene_manager = SceneManager()
+    scene_manager.add_detector(ContentDetector(threshold=threshold))
+
+    video_manager.set_downscale_factor()
+    video_manager.start()
+    scene_manager.detect_scenes(frame_source=video_manager)
+    scene_list = scene_manager.get_scene_list()
+    video_manager.release()
+
+    return [
+        TimeRange(
+            start.get_seconds(),
+            end.get_seconds(),
+        )
+        for start, end in scene_list
+    ]
+
+
+def split_scenes(
+    input_path: Path,
+    output_folder: Path,
+    threshold: float = 27.0,
+) -> list[Path]:
+    """Video an Szenen-Grenzen aufteilen."""
+    scenes = detect_scenes(input_path, threshold)
+    output_folder.mkdir(parents=True, exist_ok=True)
+    clips: list[Path] = []
+
+    for i, scene in enumerate(scenes):
+        out = output_folder / f"scene_{i:04d}.mp4"
+        cmd = [
+            "ffmpeg", "-y",
+            "-ss", str(scene.start),
+            "-to", str(scene.end),
+            "-i", str(input_path),
+            "-c", "copy",
+            str(out),
+        ]
+        result = _run(cmd)
+        if result.returncode != 0:
+            raise RuntimeError(f"ffmpeg Fehler: {result.stderr}")
+        clips.append(out)
+
+    return clips
+
+
+# ---------------------------------------------------------------------------
+# Hilfsfunktion concat
+# ---------------------------------------------------------------------------
+
+def _concat_clips(clips: list[Path], output: Path) -> None:
+    """Clips via ffmpeg concat demuxer zusammenfügen."""
+    output.parent.mkdir(parents=True, exist_ok=True)
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".txt", delete=False, encoding="utf-8"
+    ) as fh:
+        list_file = Path(fh.name)
+        for clip in clips:
+            fh.write(f"file '{clip.resolve()}'\n")
+
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "concat", "-safe", "0",
+        "-i", str(list_file),
+        "-c", "copy",
+        str(output),
+    ]
+    result = _run(cmd)
+    list_file.unlink(missing_ok=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffmpeg concat Fehler: {result.stderr}")
--- a/auto_video_cut/merger.py
+++ b/auto_video_cut/merger.py
@@ -0,0 +1,117 @@
+"""Clips zusammenführen via ffmpeg concat."""
+
+from __future__ import annotations
+
+import subprocess
+import tempfile
+from pathlib import Path
+
+from .config import VIDEO_EXTENSIONS, IMAGE_EXTENSIONS
+
+
+def _run(cmd: list[str]) -> subprocess.CompletedProcess:
+    return subprocess.run(cmd, capture_output=True, text=True, check=False)
+
+
+def image_to_clip(
+    image_path: Path,
+    output_path: Path,
+    duration: float = 3.0,
+    width: int = 1920,
+    height: int = 1080,
+) -> Path:
+    """Standbild in Video-Clip konvertieren."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        "ffmpeg", "-y",
+        "-loop", "1",
+        "-i", str(image_path),
+        "-t", str(duration),
+        "-vf", f"scale={width}:{height}:force_original_aspect_ratio=decrease,"
+               f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2",
+        "-c:v", "libx264",
+        "-pix_fmt", "yuv420p",
+        "-r", "25",
+        str(output_path),
+    ]
+    result = _run(cmd)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffmpeg image-to-clip Fehler: {result.stderr}")
+    return output_path
+
+
+def normalize_clip(input_path: Path, output_path: Path) -> Path:
+    """Clip auf einheitliches Format re-encoden (für concat-Kompatibilität)."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(input_path),
+        "-c:v", "libx264",
+        "-c:a", "aac",
+        "-pix_fmt", "yuv420p",
+        "-r", "25",
+        str(output_path),
+    ]
+    result = _run(cmd)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffmpeg normalize Fehler: {result.stderr}")
+    return output_path
+
+
+def merge_clips(
+    clips: list[Path],
+    output_path: Path,
+    intro: Path | None = None,
+    outro: Path | None = None,
+    normalize: bool = True,
+) -> Path:
+    """Clips zusammenführen, optional mit Intro und Outro."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    all_clips: list[Path] = []
+    if intro:
+        all_clips.append(intro)
+    all_clips.extend(clips)
+    if outro:
+        all_clips.append(outro)
+
+    if not all_clips:
+        raise ValueError("Keine Clips zum Zusammenführen.")
+
+    if len(all_clips) == 1:
+        if normalize:
+            return normalize_clip(all_clips[0], output_path)
+        cmd = ["ffmpeg", "-y", "-i", str(all_clips[0]), "-c", "copy", str(output_path)]
+        result = _run(cmd)
+        if result.returncode != 0:
+            raise RuntimeError(f"ffmpeg copy Fehler: {result.stderr}")
+        return output_path
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        tmp = Path(tmp_dir)
+
+        if normalize:
+            ready_clips = [
+                normalize_clip(clip, tmp / f"norm_{i:04d}.mp4")
+                for i, clip in enumerate(all_clips)
+            ]
+        else:
+            ready_clips = all_clips
+
+        list_file = tmp / "clips.txt"
+        with open(list_file, "w", encoding="utf-8") as fh:
+            for clip in ready_clips:
+                fh.write(f"file '{clip.resolve()}'\n")
+
+        cmd = [
+            "ffmpeg", "-y",
+            "-f", "concat", "-safe", "0",
+            "-i", str(list_file),
+            "-c", "copy",
+            str(output_path),
+        ]
+        result = _run(cmd)
+        if result.returncode != 0:
+            raise RuntimeError(f"ffmpeg concat Fehler: {result.stderr}")
+
+    return output_path
--- a/auto_video_cut/sequencer.py
+++ b/auto_video_cut/sequencer.py
@@ -0,0 +1,149 @@
+"""sequence.yaml parsen und geordnete Clip-Liste aufbauen."""
+
+from __future__ import annotations
+
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from .config import VIDEO_EXTENSIONS, IMAGE_EXTENSIONS
+
+
+@dataclass
+class ClipEntry:
+    """Ein aufgelöster Clip mit Metadaten."""
+    path: Path
+    media_type: str          # "video" | "image"
+    remove_silence: bool = False
+    trim_silence: bool = False
+    overlay_text: str | None = None
+    overlay_position: str = "bottom"
+    overlay_duration: float | None = None
+    image_duration: float = 3.0
+
+
+def _resolve_media_type(path: Path) -> str:
+    ext = path.suffix.lower()
+    if ext in VIDEO_EXTENSIONS:
+        return "video"
+    if ext in IMAGE_EXTENSIONS:
+        return "image"
+    raise ValueError(f"Unbekannter Dateityp: {path}")
+
+
+def _sort_paths(paths: list[Path], sort: str) -> list[Path]:
+    if sort == "alphabetical":
+        return sorted(paths, key=lambda p: p.name.lower())
+    elif sort == "date":
+        return sorted(paths, key=lambda p: p.stat().st_mtime)
+    else:
+        return paths
+
+
+def _expand_folder(entry: dict[str, Any], default_image_duration: float) -> list[ClipEntry]:
+    """Ordner-Eintrag in einzelne Clips auflösen."""
+    folder = Path(entry["path"])
+    if not folder.exists():
+        raise FileNotFoundError(f"Ordner nicht gefunden: {folder}")
+
+    all_extensions = VIDEO_EXTENSIONS | IMAGE_EXTENSIONS
+    files = [f for f in folder.iterdir() if f.suffix.lower() in all_extensions]
+    files = _sort_paths(files, entry.get("sort", "alphabetical"))
+
+    return [
+        ClipEntry(
+            path=f,
+            media_type=_resolve_media_type(f),
+            remove_silence=entry.get("remove_silence", False),
+            trim_silence=entry.get("trim_silence", False),
+            image_duration=entry.get("image_duration", default_image_duration),
+        )
+        for f in files
+    ]
+
+
+def parse_sequence(
+    sequence_path: Path,
+    resources_folder: Path | None = None,
+    default_image_duration: float = 3.0,
+) -> tuple[list[ClipEntry], dict[str, Any]]:
+    """
+    sequence.yaml einlesen.
+
+    Gibt zurück:
+        clips      — geordnete Liste von ClipEntry-Objekten
+        music_cfg  — Musik-Konfiguration aus der Sequenz-Datei (oder leer)
+    """
+    if not sequence_path.exists():
+        raise FileNotFoundError(f"Sequenz-Datei nicht gefunden: {sequence_path}")
+
+    with open(sequence_path, encoding="utf-8") as fh:
+        data = yaml.safe_load(fh) or {}
+
+    clips: list[ClipEntry] = []
+
+    for entry in data.get("sequence", []):
+        entry_type = entry.get("type")
+
+        if entry_type == "video":
+            raw_file = entry.get("file", "")
+            path = _resolve_path(raw_file, resources_folder, "videos")
+            clips.append(ClipEntry(
+                path=path,
+                media_type="video",
+                remove_silence=entry.get("remove_silence", False),
+                trim_silence=entry.get("trim_silence", False),
+                overlay_text=entry.get("overlay_text"),
+                overlay_position=entry.get("overlay_position", "bottom"),
+                overlay_duration=entry.get("overlay_duration"),
+            ))
+
+        elif entry_type == "image":
+            raw_file = entry.get("file", "")
+            path = _resolve_path(raw_file, resources_folder, "images")
+            clips.append(ClipEntry(
+                path=path,
+                media_type="image",
+                image_duration=float(entry.get("duration", default_image_duration)),
+            ))
+
+        elif entry_type == "text":
+            # Text-Clips werden als Platzhalter gespeichert;
+            # merger.py erzeugt den echten Clip später.
+            clips.append(ClipEntry(
+                path=Path("__text__"),
+                media_type="text",
+                image_duration=float(entry.get("duration", default_image_duration)),
+                overlay_text=entry.get("content", ""),
+                overlay_position=entry.get("style", {}).get("position", "center"),
+            ))
+            # Stil-Infos direkt am Entry speichern
+            clips[-1].__dict__["text_style"] = entry.get("style", {})
+
+        elif entry_type == "folder":
+            clips.extend(_expand_folder(entry, default_image_duration))
+
+        else:
+            raise ValueError(f"Unbekannter Sequenz-Typ: {entry_type!r}")
+
+    music_cfg = data.get("music", {})
+    return clips, music_cfg
+
+
+def _resolve_path(
+    raw: str,
+    resources_folder: Path | None,
+    subfolder: str,
+) -> Path:
+    """Datei-Pfad auflösen: absolut, relativ oder aus resources/."""
+    path = Path(raw)
+    if path.is_absolute() or path.exists():
+        return path
+    if resources_folder:
+        candidate = resources_folder / subfolder / raw
+        if candidate.exists():
+            return candidate
+    return path  # Existenz-Prüfung erfolgt später
--- a/auto_video_cut/text.py
+++ b/auto_video_cut/text.py
@@ -0,0 +1,105 @@
+"""Text-Einblendungen und Overlays via ffmpeg drawtext."""
+
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+
+
+def _run(cmd: list[str]) -> subprocess.CompletedProcess:
+    return subprocess.run(cmd, capture_output=True, text=True, check=False)
+
+
+_POSITION_MAP = {
+    "center": ("(w-text_w)/2", "(h-text_h)/2"),
+    "top": ("(w-text_w)/2", "50"),
+    "bottom": ("(w-text_w)/2", "h-text_h-50"),
+}
+
+
+def create_text_clip(
+    output_path: Path,
+    content: str,
+    duration: float = 3.0,
+    font_size: int = 72,
+    font_color: str = "white",
+    background_color: str = "black",
+    position: str = "center",
+    width: int = 1920,
+    height: int = 1080,
+) -> Path:
+    """Text-Standbild-Clip erzeugen (schwarzer/farbiger Hintergrund)."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    x_expr, y_expr = _POSITION_MAP.get(position, _POSITION_MAP["center"])
+
+    # Hintergrund-Farbe: "transparent" → schwarzer Hintergrund mit alpha
+    bg = "black" if background_color == "transparent" else background_color
+
+    drawtext = (
+        f"drawtext=text='{_escape_text(content)}':"
+        f"fontsize={font_size}:"
+        f"fontcolor={font_color}:"
+        f"x={x_expr}:y={y_expr}"
+    )
+
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "lavfi",
+        "-i", f"color=c={bg}:size={width}x{height}:rate=25:duration={duration}",
+        "-vf", drawtext,
+        "-c:v", "libx264",
+        "-pix_fmt", "yuv420p",
+        str(output_path),
+    ]
+    result = _run(cmd)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffmpeg drawtext Fehler: {result.stderr}")
+
+    return output_path
+
+
+def add_text_overlay(
+    input_path: Path,
+    output_path: Path,
+    text: str,
+    position: str = "bottom",
+    duration: float | None = None,
+    font_size: int = 48,
+    font_color: str = "white",
+) -> Path:
+    """Text-Overlay auf ein laufendes Video legen."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    x_expr, y_expr = _POSITION_MAP.get(position, _POSITION_MAP["bottom"])
+
+    if duration is not None:
+        enable = f"enable='between(t,0,{duration})'"
+    else:
+        enable = "enable=1"
+
+    drawtext = (
+        f"drawtext=text='{_escape_text(text)}':"
+        f"fontsize={font_size}:"
+        f"fontcolor={font_color}:"
+        f"x={x_expr}:y={y_expr}:"
+        f"{enable}"
+    )
+
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(input_path),
+        "-vf", drawtext,
+        "-c:a", "copy",
+        str(output_path),
+    ]
+    result = _run(cmd)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffmpeg overlay Fehler: {result.stderr}")
+
+    return output_path
+
+
+def _escape_text(text: str) -> str:
+    """Sonderzeichen für ffmpeg drawtext escapen."""
+    return text.replace("'", "\\'").replace(":", "\\:").replace("\\", "\\\\")