commit a3bcac55fb32219e494a7bdf906c3e91b06bb3e2 Author: Christoph K. Date: Tue Mar 10 21:07:23 2026 +0100 Initial commit: my-brain-importer RAG knowledge management agent Co-Authored-By: Claude Sonnet 4.6 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..263cdb9 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,50 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +**my-brain-importer** is a personal RAG (Retrieval-Augmented Generation) system written in Go. It ingests Markdown notes and image descriptions into a Qdrant vector database and answers questions using a local LLM via LocalAI. + +## Commands + +```bash +# Build all binaries (Linux + Windows cross-compile) +bash build.sh + +# Run directly without building +go run ./cmd/ingest/ +go run ./cmd/ask/ "your question here" + +# Build individual binaries +go build ./cmd/ingest/ +go build ./cmd/ask/ + +# Run tests +go test ./... + +# Tidy dependencies +go mod tidy +``` + +Binaries are output to `./bin/`. The `config.yml` file must exist in the working directory at runtime. + +## Architecture + +Two CLI tools share a common internal library: + +**`cmd/ingest/`** → `internal/brain/ingest.go` + `internal/brain/ingest_json.go` +- Markdown mode: recursively finds `.md` files, splits by `# `/`## ` headings, chunks long sections (max 800 chars) by paragraphs, embeds in batches of 10, upserts to Qdrant +- JSON mode (when arg ends in `.json`): imports image description records with `file_path`, `file_name`, `description` fields + +**`cmd/ask/`** → `internal/brain/ask.go` +- Embeds the question, searches Qdrant (top-k, score threshold 0.5), deduplicates by text content, streams LLM response constrained to retrieved context + +**`internal/config/config.go`** initializes all clients: gRPC connection to Qdrant and OpenAI-compatible HTTP clients for embeddings and chat (both point to LocalAI). + +## Key Patterns + +- **Deterministic IDs**: SHA256 of `source:text` — upserting the same content is always idempotent +- **Excluded directories**: `05_Agents` and `.git` are skipped during markdown ingest +- **config.yml** must be present in the working directory; defines Qdrant host/port/api_key, embedding model + dimensions, chat model, `brain_root` path, and `top_k` +- External services: Qdrant (gRPC port 6334) and LocalAI (HTTP, OpenAI-compatible API) diff --git a/README.md b/README.md new file mode 100755 index 0000000..1e2173d --- /dev/null +++ b/README.md @@ -0,0 +1,102 @@ +# my-brain-importer + +Persönlicher Wissens-Agent für den AI_Brain. Importiert Markdown-Notizen und Bildbeschreibungen in eine Qdrant-Vektordatenbank und beantwortet Fragen darüber mit einem lokalen LLM. + +## Architektur + +``` +AI_Brain/ + *.md Dateien + │ + ▼ + bin/ingest Embeddings via LocalAI + │ + ▼ + Qdrant (NAS) ◄──── bin/ask ──► LM Studio (Chat) +``` + +- **Embeddings**: LocalAI unter `embedding.url` (Modell konfigurierbar) +- **Vektordatenbank**: Qdrant auf dem NAS +- **Chat-Completion**: LocalAI unter `chat.url` (Modell konfigurierbar) + +## Projektstruktur + +``` +AI-Agent/ + cmd/ + ingest/main.go Entry Point für ingest-Binary + ask/main.go Entry Point für ask-Binary + internal/ + config/config.go Config-Struct, Clients, Verbindungen + brain/ + ingest.go Markdown-Import, Chunking + ingest_json.go JSON-Import (Bildbeschreibungen) + ask.go Suche + LLM-Antwort + bin/ Kompilierte Binaries (von build.sh erzeugt) + config.yml Alle Einstellungen + build.sh Baut beide Binaries +``` + +## Konfiguration + +Alle Einstellungen in `config.yml` (muss im Arbeitsverzeichnis liegen): + +```yaml +qdrant: + host: "192.168.1.4" + port: "6334" + api_key: "..." + collection: "jacek-brain" + +embedding: + url: "http://192.168.1.118:8080/v1" + model: "qwen3-embedding-4b" + dimensions: 2560 # muss zum Modell passen + +chat: + url: "http://192.168.1.118:8080/v1" + model: "qwen3.5-4b-claude-4.6-opus-reasoning-distilled" + +brain_root: "/mnt/c/Users/jacek/AI_Brain" +top_k: 3 +``` + +> **Wichtig:** Wenn du `embedding.model` oder `dimensions` änderst, muss die Qdrant-Collection neu erstellt werden (im Dashboard löschen, dann `ingest` erneut ausführen). + +## Build + +```bash +bash build.sh +``` + +Erzeugt `bin/ingest`, `bin/ingest.exe`, `bin/ask`, `bin/ask.exe`. + +## Nutzung + +```bash +# Markdown-Dateien aus brain_root importieren +./bin/ingest + +# Alternatives Verzeichnis angeben +./bin/ingest /pfad/zum/verzeichnis + +# Bildbeschreibungen aus JSON importieren +./bin/ingest image_descriptions.json + +# Frage stellen +./bin/ask "Was sind meine Reisepläne für Norwegen?" +./bin/ask "Erzähl mir über Veronica Bellmore" +``` + +## Brain aktualisieren + +Kein Löschen der Datenbank nötig — einfach `./bin/ingest` erneut ausführen: +- Bestehende Chunks → gleiche SHA256-ID → Qdrant überschreibt +- Neue Dateien → neue IDs → werden hinzugefügt + +## Voraussetzungen + +- Go 1.22+ +- LocalAI läuft auf `embedding.url` mit dem konfigurierten Embedding-Modell geladen +- LocalAI läuft auf `chat.url` mit dem konfigurierten Chat-Modell geladen +- Qdrant läuft auf dem NAS (Port 6334 gRPC, Port 6333 Dashboard) diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..6528e7e --- /dev/null +++ b/build.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +OUT_DIR="./bin" +mkdir -p "$OUT_DIR" + +echo "Baue ingest ..." +GOOS=linux GOARCH=amd64 go build -o "$OUT_DIR/ingest" ./cmd/ingest/ +GOOS=windows GOARCH=amd64 go build -o "$OUT_DIR/ingest.exe" ./cmd/ingest/ +echo " Linux: $OUT_DIR/ingest" +echo " Windows: $OUT_DIR/ingest.exe" + +echo "Baue ask ..." +GOOS=linux GOARCH=amd64 go build -o "$OUT_DIR/ask" ./cmd/ask/ +GOOS=windows GOARCH=amd64 go build -o "$OUT_DIR/ask.exe" ./cmd/ask/ +echo " Linux: $OUT_DIR/ask" +echo " Windows: $OUT_DIR/ask.exe" + +echo "" +echo "Fertig. Nutzung:" +echo " $OUT_DIR/ingest # Markdown importieren" +echo " $OUT_DIR/ingest bild.json # JSON importieren" +echo " $OUT_DIR/ask \"Was sind meine Pläne?\"" diff --git a/cmd/ask/main.go b/cmd/ask/main.go new file mode 100755 index 0000000..37fc994 --- /dev/null +++ b/cmd/ask/main.go @@ -0,0 +1,30 @@ +// ask – stellt Fragen an die Qdrant-Wissensdatenbank und antwortet mit einem LLM +package main + +import ( + "fmt" + "os" + "strings" + + "my-brain-importer/internal/brain" + "my-brain-importer/internal/config" +) + +func main() { + config.LoadConfig() + + bin := os.Args[0] + + if len(os.Args) < 2 { + fmt.Printf("ask – stellt Fragen an deinen AI Brain\n\n") + fmt.Printf("Usage:\n") + fmt.Printf(" %s \"Deine Frage\"\n\n", bin) + fmt.Printf("Beispiele:\n") + fmt.Printf(" %s \"Was sind meine Reisepläne?\"\n", bin) + fmt.Printf(" %s \"Erzähl mir über Veronica Bellmore\"\n", bin) + os.Exit(1) + } + + question := strings.Join(os.Args[1:], " ") + brain.Ask(question) +} diff --git a/cmd/ingest/main.go b/cmd/ingest/main.go new file mode 100755 index 0000000..809b59e --- /dev/null +++ b/cmd/ingest/main.go @@ -0,0 +1,46 @@ +// ingest – importiert Markdown-Dateien und Bildbeschreibungen in Qdrant +package main + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "my-brain-importer/internal/brain" + "my-brain-importer/internal/config" +) + +func main() { + config.LoadConfig() + + bin := os.Args[0] + + if len(os.Args) < 2 { + // Standard: Markdown aus brain_root importieren + brain.RunIngest(config.Cfg.BrainRoot) + return + } + + arg := os.Args[1] + + switch { + case arg == "-h" || arg == "--help": + printUsage(bin) + case strings.ToLower(filepath.Ext(arg)) == ".json": + // Argument ist eine JSON-Datei → Bildbeschreibungen importieren + brain.RunIngestJSON(arg) + default: + // Argument ist ein Verzeichnis → Markdown importieren + brain.RunIngest(arg) + } +} + +func printUsage(bin string) { + fmt.Printf("ingest – importiert Daten in die Qdrant-Wissensdatenbank\n\n") + fmt.Printf("Usage:\n") + fmt.Printf(" %s – Markdown aus brain_root (config.yml) importieren\n", bin) + fmt.Printf(" %s /pfad/zum/ordner – Markdown aus benutzerdefiniertem Verzeichnis\n", bin) + fmt.Printf(" %s datei.json – Bildbeschreibungen aus JSON importieren\n", bin) + os.Exit(0) +} diff --git a/go.mod b/go.mod new file mode 100755 index 0000000..8e3f07b --- /dev/null +++ b/go.mod @@ -0,0 +1,18 @@ +module my-brain-importer + +go 1.22.2 + +require ( + github.com/qdrant/go-client v1.12.0 + github.com/sashabaranov/go-openai v1.37.0 + google.golang.org/grpc v1.71.0 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + golang.org/x/net v0.34.0 // indirect + golang.org/x/sys v0.29.0 // indirect + golang.org/x/text v0.21.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect + google.golang.org/protobuf v1.36.4 // indirect +) diff --git a/go.sum b/go.sum new file mode 100755 index 0000000..c243372 --- /dev/null +++ b/go.sum @@ -0,0 +1,42 @@ +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/qdrant/go-client v1.12.0 h1:KqsIKDAw5iQmxDzRjbzRjhvQ+Igyr7Y84vDCinf1T4M= +github.com/qdrant/go-client v1.12.0/go.mod h1:zFa6t5Y3Oqecoa0aSsGWhMqQWq3x3kTPvm0sMf5qplw= +github.com/sashabaranov/go-openai v1.37.0 h1:hQQowgYm4OXJ1Z/wTrE+XZaO20BYsL0R3uRPSpfNZkY= +github.com/sashabaranov/go-openai v1.37.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= +go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI= +go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ= +go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE= +go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A= +go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU= +go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk= +go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w= +go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= +go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= +golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= +golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f h1:OxYkA3wjPsZyBylwymxSHa7ViiW1Sml4ToBrncvFehI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f/go.mod h1:+2Yz8+CLJbIfL9z73EW45avw8Lmge3xVElCP9zEKi50= +google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg= +google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec= +google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM= +google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/brain/ask.go b/internal/brain/ask.go new file mode 100755 index 0000000..5174b5a --- /dev/null +++ b/internal/brain/ask.go @@ -0,0 +1,157 @@ +// ask.go – Sucht relevante Chunks in Qdrant und beantwortet Fragen mit einem LLM +package brain + +import ( + "context" + "fmt" + "log" + "strings" + + pb "github.com/qdrant/go-client/qdrant" + openai "github.com/sashabaranov/go-openai" + "google.golang.org/grpc/metadata" + + "my-brain-importer/internal/config" +) + +// KnowledgeChunk repräsentiert ein Suchergebnis aus Qdrant. +type KnowledgeChunk struct { + Text string + Score float32 + Source string +} + +// Ask sucht relevante Chunks und generiert eine LLM-Antwort per Streaming. +func Ask(question string) { + ctx := context.Background() + ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey) + + fmt.Printf("🤔 Frage: \"%s\"\n\n", question) + + embClient := config.NewEmbeddingClient() + chatClient := config.NewChatClient() + + fmt.Println("🔍 Durchsuche lokale Wissensdatenbank...") + chunks := searchKnowledge(ctx, embClient, question) + + if len(chunks) == 0 { + fmt.Println("\n❌ Keine relevanten Informationen in der Datenbank gefunden.") + fmt.Println(" Füge mehr Daten mit './bin/ingest' hinzu.") + return + } + + contextText := buildContext(chunks) + fmt.Printf("✅ %d relevante Informationen gefunden\n\n", len(chunks)) + + systemPrompt := `Du bist ein hilfreicher persönlicher Assistent. +Deine Aufgabe ist es, Fragen basierend auf den bereitgestellten Informationen zu beantworten. + +WICHTIGE REGELN: +- Antworte nur basierend auf den bereitgestellten Informationen +- Wenn die Informationen die Frage nicht beantworten, sage das ehrlich +- Antworte auf Deutsch +- Sei präzise und direkt +- Erfinde keine Informationen hinzu` + + userPrompt := fmt.Sprintf(`Hier sind die relevanten Informationen aus meiner Wissensdatenbank: + +%s + +Basierend auf diesen Informationen, beantworte bitte folgende Frage: +%s`, contextText, question) + + fmt.Println("🧠 Generiere Antwort mit lokalem Modell...") + fmt.Println(strings.Repeat("═", 80)) + + stream, err := chatClient.CreateChatCompletionStream(ctx, openai.ChatCompletionRequest{ + Model: config.Cfg.Chat.Model, + Messages: []openai.ChatCompletionMessage{ + {Role: openai.ChatMessageRoleSystem, Content: systemPrompt}, + {Role: openai.ChatMessageRoleUser, Content: userPrompt}, + }, + Temperature: 0.7, + MaxTokens: 500, + }) + if err != nil { + log.Fatalf("❌ LLM Fehler: %v", err) + } + defer stream.Close() + + fmt.Println("\n💬 Antwort:\n") + for { + response, err := stream.Recv() + if err != nil { + break + } + if len(response.Choices) > 0 { + fmt.Print(response.Choices[0].Delta.Content) + } + } + + fmt.Println("\n") + fmt.Println(strings.Repeat("═", 80)) + fmt.Println("\n📚 Verwendete Quellen:") + for i, chunk := range chunks { + preview := chunk.Text + if len(preview) > 80 { + preview = preview[:80] + "..." + } + fmt.Printf(" [%d] %.1f%% - %s\n", i+1, chunk.Score*100, preview) + } +} + +func searchKnowledge(ctx context.Context, embClient *openai.Client, query string) []KnowledgeChunk { + embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{ + Input: []string{query}, + Model: openai.EmbeddingModel(config.Cfg.Embedding.Model), + }) + if err != nil { + log.Printf("❌ Embedding Fehler: %v", err) + return nil + } + + conn := config.NewQdrantConn() + defer conn.Close() + + searchResult, err := pb.NewPointsClient(conn).Search(ctx, &pb.SearchPoints{ + CollectionName: config.Cfg.Qdrant.Collection, + Vector: embResp.Data[0].Embedding, + Limit: config.Cfg.TopK, + WithPayload: &pb.WithPayloadSelector{ + SelectorOptions: &pb.WithPayloadSelector_Enable{Enable: true}, + }, + ScoreThreshold: floatPtr(0.5), + }) + if err != nil { + log.Printf("❌ Suche fehlgeschlagen: %v", err) + return nil + } + + var chunks []KnowledgeChunk + seen := make(map[string]bool) + for _, hit := range searchResult.Result { + text := hit.Payload["text"].GetStringValue() + if seen[text] { + continue + } + seen[text] = true + chunks = append(chunks, KnowledgeChunk{ + Text: text, + Score: hit.Score, + Source: hit.Payload["source"].GetStringValue(), + }) + } + return chunks +} + +func buildContext(chunks []KnowledgeChunk) string { + var b strings.Builder + for i, chunk := range chunks { + fmt.Fprintf(&b, "--- Information %d (Relevanz: %.1f%%) ---\n", i+1, chunk.Score*100) + b.WriteString(chunk.Text) + b.WriteString("\n\n") + } + return b.String() +} + +func floatPtr(f float32) *float32 { return &f } diff --git a/internal/brain/ingest.go b/internal/brain/ingest.go new file mode 100755 index 0000000..15e7afa --- /dev/null +++ b/internal/brain/ingest.go @@ -0,0 +1,237 @@ +// ingest.go – Importiert Markdown-Dateien in Qdrant +package brain + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "log" + "os" + "path/filepath" + "strings" + "time" + + pb "github.com/qdrant/go-client/qdrant" + openai "github.com/sashabaranov/go-openai" + "google.golang.org/grpc/metadata" + + "my-brain-importer/internal/config" +) + +const maxChunkSize = 800 + +// generateID erstellt eine deterministische ID via SHA256. +// Gleicher Chunk → gleiche ID → kein Duplikat bei erneutem Import. +func generateID(text, source string) string { + hash := sha256.Sum256([]byte(source + ":" + text)) + return hex.EncodeToString(hash[:16]) +} + +// RunIngest importiert alle Markdown-Dateien aus brainRoot in Qdrant. +func RunIngest(brainRoot string) { + ctx := context.Background() + ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey) + + fmt.Printf("📂 Verzeichnis: %s\n", brainRoot) + fmt.Printf("🗄️ Qdrant: %s:%s, Collection: %s\n", config.Cfg.Qdrant.Host, config.Cfg.Qdrant.Port, config.Cfg.Qdrant.Collection) + fmt.Printf("🤖 Embedding: %s (%s)\n\n", config.Cfg.Embedding.Model, config.Cfg.Embedding.URL) + + embClient := config.NewEmbeddingClient() + conn := config.NewQdrantConn() + defer conn.Close() + + ensureCollection(ctx, pb.NewCollectionsClient(conn)) + pointsClient := pb.NewPointsClient(conn) + + files := collectMarkdownFiles(brainRoot) + fmt.Printf("📄 %d Markdown-Dateien gefunden\n\n", len(files)) + + totalChunks := 0 + for _, filePath := range files { + relPath, _ := filepath.Rel(brainRoot, filePath) + chunks := readAndChunk(filePath, relPath) + if len(chunks) == 0 { + continue + } + + fmt.Printf(" %-50s %d Chunks\n", relPath, len(chunks)) + + if err := ingestChunks(ctx, embClient, pointsClient, chunks); err != nil { + log.Printf(" ⚠️ Fehler bei %s: %v", relPath, err) + continue + } + totalChunks += len(chunks) + time.Sleep(100 * time.Millisecond) + } + + fmt.Printf("\n✅ Import abgeschlossen: %d Chunks aus %d Dateien\n", totalChunks, len(files)) + fmt.Printf("🌐 Dashboard: http://%s:6333/dashboard\n", config.Cfg.Qdrant.Host) +} + +func ensureCollection(ctx context.Context, client pb.CollectionsClient) { + _, err := client.Create(ctx, &pb.CreateCollection{ + CollectionName: config.Cfg.Qdrant.Collection, + VectorsConfig: &pb.VectorsConfig{ + Config: &pb.VectorsConfig_Params{ + Params: &pb.VectorParams{ + Size: config.Cfg.Embedding.Dimensions, + Distance: pb.Distance_Cosine, + }, + }, + }, + }) + if err != nil { + if strings.Contains(err.Error(), "already exists") { + fmt.Printf("✅ Collection \"%s\" existiert bereits\n", config.Cfg.Qdrant.Collection) + } else { + log.Fatalf("❌ Collection konnte nicht erstellt werden: %v", err) + } + } else { + fmt.Printf("✅ Collection \"%s\" erstellt\n", config.Cfg.Qdrant.Collection) + } +} + +func collectMarkdownFiles(root string) []string { + var files []string + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + if info.IsDir() && (strings.Contains(path, "05_Agents") || strings.HasSuffix(path, ".git")) { + return filepath.SkipDir + } + if !info.IsDir() && strings.ToLower(filepath.Ext(path)) == ".md" { + files = append(files, path) + } + return nil + }) + return files +} + +type chunk struct { + Text string + Source string + Type string +} + +func readAndChunk(filePath, relPath string) []chunk { + data, err := os.ReadFile(filePath) + if err != nil { + log.Printf("⚠️ Datei nicht lesbar: %s", filePath) + return nil + } + content := strings.TrimSpace(string(data)) + if content == "" { + return nil + } + var chunks []chunk + for _, section := range splitByHeadings(content) { + section = strings.TrimSpace(section) + if len(section) < 20 { + continue + } + for _, text := range splitLongSection(section) { + chunks = append(chunks, chunk{Text: text, Source: relPath, Type: "text"}) + } + } + return chunks +} + +func splitByHeadings(text string) []string { + lines := strings.Split(text, "\n") + var sections []string + var current strings.Builder + for _, line := range lines { + if strings.HasPrefix(line, "# ") || strings.HasPrefix(line, "## ") { + if current.Len() > 0 { + sections = append(sections, current.String()) + current.Reset() + } + } + current.WriteString(line) + current.WriteString("\n") + } + if current.Len() > 0 { + sections = append(sections, current.String()) + } + return sections +} + +func splitLongSection(section string) []string { + if len(section) <= maxChunkSize { + return []string{section} + } + paragraphs := strings.Split(section, "\n\n") + var chunks []string + var current strings.Builder + for _, para := range paragraphs { + para = strings.TrimSpace(para) + if para == "" { + continue + } + if current.Len()+len(para) > maxChunkSize && current.Len() > 0 { + chunks = append(chunks, current.String()) + current.Reset() + } + if current.Len() > 0 { + current.WriteString("\n\n") + } + current.WriteString(para) + } + if current.Len() > 0 { + chunks = append(chunks, current.String()) + } + return chunks +} + +func ingestChunks(ctx context.Context, embClient *openai.Client, pointsClient pb.PointsClient, chunks []chunk) error { + texts := make([]string, len(chunks)) + for i, c := range chunks { + texts[i] = c.Text + } + + batchSize := 10 + var points []*pb.PointStruct + + for i := 0; i < len(texts); i += batchSize { + end := i + batchSize + if end > len(texts) { + end = len(texts) + } + embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{ + Input: texts[i:end], + Model: openai.EmbeddingModel(config.Cfg.Embedding.Model), + }) + if err != nil { + return fmt.Errorf("Embedding fehlgeschlagen: %w", err) + } + for j, emb := range embResp.Data { + c := chunks[i+j] + points = append(points, &pb.PointStruct{ + Id: &pb.PointId{ + PointIdOptions: &pb.PointId_Uuid{Uuid: generateID(c.Text, c.Source)}, + }, + Vectors: &pb.Vectors{ + VectorsOptions: &pb.Vectors_Vector{ + Vector: &pb.Vector{Data: emb.Embedding}, + }, + }, + Payload: map[string]*pb.Value{ + "text": {Kind: &pb.Value_StringValue{StringValue: c.Text}}, + "source": {Kind: &pb.Value_StringValue{StringValue: c.Source}}, + "type": {Kind: &pb.Value_StringValue{StringValue: c.Type}}, + }, + }) + } + } + + _, err := pointsClient.Upsert(ctx, &pb.UpsertPoints{ + CollectionName: config.Cfg.Qdrant.Collection, + Points: points, + Wait: boolPtr(true), + }) + return err +} + +func boolPtr(b bool) *bool { return &b } diff --git a/internal/brain/ingest_json.go b/internal/brain/ingest_json.go new file mode 100755 index 0000000..c44f87e --- /dev/null +++ b/internal/brain/ingest_json.go @@ -0,0 +1,99 @@ +// ingest_json.go – Importiert KI-Bildbeschreibungen aus einer JSON-Datei in Qdrant +package brain + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + + pb "github.com/qdrant/go-client/qdrant" + openai "github.com/sashabaranov/go-openai" + "google.golang.org/grpc/metadata" + + "my-brain-importer/internal/config" +) + +// ImageEntry entspricht der JSON-Ausgabe von analyze-images.go +type ImageEntry struct { + FilePath string `json:"file_path"` + FileName string `json:"file_name"` + Description string `json:"description"` +} + +// RunIngestJSON importiert Bildbeschreibungen aus einer JSON-Datei in Qdrant. +func RunIngestJSON(inputFile string) { + fmt.Printf("📂 Lade \"%s\"...\n", inputFile) + raw, err := os.ReadFile(inputFile) + if err != nil { + log.Fatalf("❌ Datei nicht gefunden: %v", err) + } + + var entries []ImageEntry + if err := json.Unmarshal(raw, &entries); err != nil { + log.Fatalf("❌ JSON Fehler: %v", err) + } + if len(entries) == 0 { + log.Fatal("❌ Keine Einträge in JSON") + } + fmt.Printf("✅ %d Einträge geladen\n\n", len(entries)) + + ctx := context.Background() + ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey) + + conn := config.NewQdrantConn() + defer conn.Close() + + ensureCollection(ctx, pb.NewCollectionsClient(conn)) + pointsClient := pb.NewPointsClient(conn) + embClient := config.NewEmbeddingClient() + + fmt.Printf("🤖 Embedding: %s (%s)\n\n", config.Cfg.Embedding.Model, config.Cfg.Embedding.URL) + + success := 0 + for i, entry := range entries { + fmt.Printf("[%d/%d] 🔄 %s\n", i+1, len(entries), entry.FileName) + + embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{ + Input: []string{entry.Description}, + Model: openai.EmbeddingModel(config.Cfg.Embedding.Model), + }) + if err != nil { + log.Printf(" ❌ Embedding Fehler: %v\n", err) + continue + } + + _, err = pointsClient.Upsert(ctx, &pb.UpsertPoints{ + CollectionName: config.Cfg.Qdrant.Collection, + Points: []*pb.PointStruct{ + { + Id: &pb.PointId{ + PointIdOptions: &pb.PointId_Uuid{ + Uuid: generateID(entry.Description, entry.FileName), + }, + }, + Vectors: &pb.Vectors{ + VectorsOptions: &pb.Vectors_Vector{ + Vector: &pb.Vector{Data: embResp.Data[0].Embedding}, + }, + }, + Payload: map[string]*pb.Value{ + "text": {Kind: &pb.Value_StringValue{StringValue: entry.Description}}, + "source": {Kind: &pb.Value_StringValue{StringValue: entry.FileName}}, + "path": {Kind: &pb.Value_StringValue{StringValue: entry.FilePath}}, + "type": {Kind: &pb.Value_StringValue{StringValue: "image"}}, + }, + }, + }, + }) + if err != nil { + log.Printf(" ❌ Speichern Fehler: %v\n", err) + } else { + success++ + } + } + + fmt.Printf("\n✅ Fertig: %d von %d Bildern importiert\n", success, len(entries)) + fmt.Printf("🌐 Dashboard: http://%s:6333/dashboard\n", config.Cfg.Qdrant.Host) +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100755 index 0000000..54f73e6 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,76 @@ +// config.go – Konfiguration, Clients und gemeinsame Verbindungen +package config + +import ( + "fmt" + "log" + "os" + + openai "github.com/sashabaranov/go-openai" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "gopkg.in/yaml.v3" +) + +type Config struct { + Qdrant struct { + Host string `yaml:"host"` + Port string `yaml:"port"` + APIKey string `yaml:"api_key"` + Collection string `yaml:"collection"` + } `yaml:"qdrant"` + + Embedding struct { + URL string `yaml:"url"` + Model string `yaml:"model"` + Dimensions uint64 `yaml:"dimensions"` + } `yaml:"embedding"` + + Chat struct { + URL string `yaml:"url"` + Model string `yaml:"model"` + } `yaml:"chat"` + + BrainRoot string `yaml:"brain_root"` + TopK uint64 `yaml:"top_k"` +} + +var Cfg Config + +// NewQdrantConn öffnet eine gRPC-Verbindung zur Qdrant-Instanz. +// Der Aufrufer ist verantwortlich für conn.Close(). +func NewQdrantConn() *grpc.ClientConn { + conn, err := grpc.Dial( + fmt.Sprintf("%s:%s", Cfg.Qdrant.Host, Cfg.Qdrant.Port), + grpc.WithTransportCredentials(insecure.NewCredentials()), + ) + if err != nil { + log.Fatalf("❌ Qdrant Verbindung fehlgeschlagen: %v", err) + } + return conn +} + +// NewEmbeddingClient erstellt einen Client für LocalAI (Embeddings). +func NewEmbeddingClient() *openai.Client { + c := openai.DefaultConfig("localai") + c.BaseURL = Cfg.Embedding.URL + return openai.NewClientWithConfig(c) +} + +// NewChatClient erstellt einen Client für Chat-Completion (LocalAI). +func NewChatClient() *openai.Client { + c := openai.DefaultConfig("localai") + c.BaseURL = Cfg.Chat.URL + return openai.NewClientWithConfig(c) +} + +// LoadConfig liest config.yml aus dem aktuellen Verzeichnis. +func LoadConfig() { + data, err := os.ReadFile("config.yml") + if err != nil { + log.Fatalf("❌ config.yml nicht gefunden: %v\n Lege config.yml im selben Verzeichnis an.", err) + } + if err := yaml.Unmarshal(data, &Cfg); err != nil { + log.Fatalf("❌ config.yml ungültig: %v", err) + } +} diff --git a/my-brain-importer b/my-brain-importer new file mode 100755 index 0000000..95d166e Binary files /dev/null and b/my-brain-importer differ