Initial commit: my-brain-importer RAG knowledge management agent

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Christoph K.
2026-03-10 21:07:23 +01:00
commit a3bcac55fb
12 changed files with 880 additions and 0 deletions

157
internal/brain/ask.go Executable file
View File

@@ -0,0 +1,157 @@
// ask.go Sucht relevante Chunks in Qdrant und beantwortet Fragen mit einem LLM
package brain
import (
"context"
"fmt"
"log"
"strings"
pb "github.com/qdrant/go-client/qdrant"
openai "github.com/sashabaranov/go-openai"
"google.golang.org/grpc/metadata"
"my-brain-importer/internal/config"
)
// KnowledgeChunk repräsentiert ein Suchergebnis aus Qdrant.
type KnowledgeChunk struct {
Text string
Score float32
Source string
}
// Ask sucht relevante Chunks und generiert eine LLM-Antwort per Streaming.
func Ask(question string) {
ctx := context.Background()
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
fmt.Printf("🤔 Frage: \"%s\"\n\n", question)
embClient := config.NewEmbeddingClient()
chatClient := config.NewChatClient()
fmt.Println("🔍 Durchsuche lokale Wissensdatenbank...")
chunks := searchKnowledge(ctx, embClient, question)
if len(chunks) == 0 {
fmt.Println("\n❌ Keine relevanten Informationen in der Datenbank gefunden.")
fmt.Println(" Füge mehr Daten mit './bin/ingest' hinzu.")
return
}
contextText := buildContext(chunks)
fmt.Printf("✅ %d relevante Informationen gefunden\n\n", len(chunks))
systemPrompt := `Du bist ein hilfreicher persönlicher Assistent.
Deine Aufgabe ist es, Fragen basierend auf den bereitgestellten Informationen zu beantworten.
WICHTIGE REGELN:
- Antworte nur basierend auf den bereitgestellten Informationen
- Wenn die Informationen die Frage nicht beantworten, sage das ehrlich
- Antworte auf Deutsch
- Sei präzise und direkt
- Erfinde keine Informationen hinzu`
userPrompt := fmt.Sprintf(`Hier sind die relevanten Informationen aus meiner Wissensdatenbank:
%s
Basierend auf diesen Informationen, beantworte bitte folgende Frage:
%s`, contextText, question)
fmt.Println("🧠 Generiere Antwort mit lokalem Modell...")
fmt.Println(strings.Repeat("═", 80))
stream, err := chatClient.CreateChatCompletionStream(ctx, openai.ChatCompletionRequest{
Model: config.Cfg.Chat.Model,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleSystem, Content: systemPrompt},
{Role: openai.ChatMessageRoleUser, Content: userPrompt},
},
Temperature: 0.7,
MaxTokens: 500,
})
if err != nil {
log.Fatalf("❌ LLM Fehler: %v", err)
}
defer stream.Close()
fmt.Println("\n💬 Antwort:\n")
for {
response, err := stream.Recv()
if err != nil {
break
}
if len(response.Choices) > 0 {
fmt.Print(response.Choices[0].Delta.Content)
}
}
fmt.Println("\n")
fmt.Println(strings.Repeat("═", 80))
fmt.Println("\n📚 Verwendete Quellen:")
for i, chunk := range chunks {
preview := chunk.Text
if len(preview) > 80 {
preview = preview[:80] + "..."
}
fmt.Printf(" [%d] %.1f%% - %s\n", i+1, chunk.Score*100, preview)
}
}
func searchKnowledge(ctx context.Context, embClient *openai.Client, query string) []KnowledgeChunk {
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
Input: []string{query},
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
})
if err != nil {
log.Printf("❌ Embedding Fehler: %v", err)
return nil
}
conn := config.NewQdrantConn()
defer conn.Close()
searchResult, err := pb.NewPointsClient(conn).Search(ctx, &pb.SearchPoints{
CollectionName: config.Cfg.Qdrant.Collection,
Vector: embResp.Data[0].Embedding,
Limit: config.Cfg.TopK,
WithPayload: &pb.WithPayloadSelector{
SelectorOptions: &pb.WithPayloadSelector_Enable{Enable: true},
},
ScoreThreshold: floatPtr(0.5),
})
if err != nil {
log.Printf("❌ Suche fehlgeschlagen: %v", err)
return nil
}
var chunks []KnowledgeChunk
seen := make(map[string]bool)
for _, hit := range searchResult.Result {
text := hit.Payload["text"].GetStringValue()
if seen[text] {
continue
}
seen[text] = true
chunks = append(chunks, KnowledgeChunk{
Text: text,
Score: hit.Score,
Source: hit.Payload["source"].GetStringValue(),
})
}
return chunks
}
func buildContext(chunks []KnowledgeChunk) string {
var b strings.Builder
for i, chunk := range chunks {
fmt.Fprintf(&b, "--- Information %d (Relevanz: %.1f%%) ---\n", i+1, chunk.Score*100)
b.WriteString(chunk.Text)
b.WriteString("\n\n")
}
return b.String()
}
func floatPtr(f float32) *float32 { return &f }

237
internal/brain/ingest.go Executable file
View File

@@ -0,0 +1,237 @@
// ingest.go Importiert Markdown-Dateien in Qdrant
package brain
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"time"
pb "github.com/qdrant/go-client/qdrant"
openai "github.com/sashabaranov/go-openai"
"google.golang.org/grpc/metadata"
"my-brain-importer/internal/config"
)
const maxChunkSize = 800
// generateID erstellt eine deterministische ID via SHA256.
// Gleicher Chunk → gleiche ID → kein Duplikat bei erneutem Import.
func generateID(text, source string) string {
hash := sha256.Sum256([]byte(source + ":" + text))
return hex.EncodeToString(hash[:16])
}
// RunIngest importiert alle Markdown-Dateien aus brainRoot in Qdrant.
func RunIngest(brainRoot string) {
ctx := context.Background()
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
fmt.Printf("📂 Verzeichnis: %s\n", brainRoot)
fmt.Printf("🗄️ Qdrant: %s:%s, Collection: %s\n", config.Cfg.Qdrant.Host, config.Cfg.Qdrant.Port, config.Cfg.Qdrant.Collection)
fmt.Printf("🤖 Embedding: %s (%s)\n\n", config.Cfg.Embedding.Model, config.Cfg.Embedding.URL)
embClient := config.NewEmbeddingClient()
conn := config.NewQdrantConn()
defer conn.Close()
ensureCollection(ctx, pb.NewCollectionsClient(conn))
pointsClient := pb.NewPointsClient(conn)
files := collectMarkdownFiles(brainRoot)
fmt.Printf("📄 %d Markdown-Dateien gefunden\n\n", len(files))
totalChunks := 0
for _, filePath := range files {
relPath, _ := filepath.Rel(brainRoot, filePath)
chunks := readAndChunk(filePath, relPath)
if len(chunks) == 0 {
continue
}
fmt.Printf(" %-50s %d Chunks\n", relPath, len(chunks))
if err := ingestChunks(ctx, embClient, pointsClient, chunks); err != nil {
log.Printf(" ⚠️ Fehler bei %s: %v", relPath, err)
continue
}
totalChunks += len(chunks)
time.Sleep(100 * time.Millisecond)
}
fmt.Printf("\n✅ Import abgeschlossen: %d Chunks aus %d Dateien\n", totalChunks, len(files))
fmt.Printf("🌐 Dashboard: http://%s:6333/dashboard\n", config.Cfg.Qdrant.Host)
}
func ensureCollection(ctx context.Context, client pb.CollectionsClient) {
_, err := client.Create(ctx, &pb.CreateCollection{
CollectionName: config.Cfg.Qdrant.Collection,
VectorsConfig: &pb.VectorsConfig{
Config: &pb.VectorsConfig_Params{
Params: &pb.VectorParams{
Size: config.Cfg.Embedding.Dimensions,
Distance: pb.Distance_Cosine,
},
},
},
})
if err != nil {
if strings.Contains(err.Error(), "already exists") {
fmt.Printf("✅ Collection \"%s\" existiert bereits\n", config.Cfg.Qdrant.Collection)
} else {
log.Fatalf("❌ Collection konnte nicht erstellt werden: %v", err)
}
} else {
fmt.Printf("✅ Collection \"%s\" erstellt\n", config.Cfg.Qdrant.Collection)
}
}
func collectMarkdownFiles(root string) []string {
var files []string
filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if info.IsDir() && (strings.Contains(path, "05_Agents") || strings.HasSuffix(path, ".git")) {
return filepath.SkipDir
}
if !info.IsDir() && strings.ToLower(filepath.Ext(path)) == ".md" {
files = append(files, path)
}
return nil
})
return files
}
type chunk struct {
Text string
Source string
Type string
}
func readAndChunk(filePath, relPath string) []chunk {
data, err := os.ReadFile(filePath)
if err != nil {
log.Printf("⚠️ Datei nicht lesbar: %s", filePath)
return nil
}
content := strings.TrimSpace(string(data))
if content == "" {
return nil
}
var chunks []chunk
for _, section := range splitByHeadings(content) {
section = strings.TrimSpace(section)
if len(section) < 20 {
continue
}
for _, text := range splitLongSection(section) {
chunks = append(chunks, chunk{Text: text, Source: relPath, Type: "text"})
}
}
return chunks
}
func splitByHeadings(text string) []string {
lines := strings.Split(text, "\n")
var sections []string
var current strings.Builder
for _, line := range lines {
if strings.HasPrefix(line, "# ") || strings.HasPrefix(line, "## ") {
if current.Len() > 0 {
sections = append(sections, current.String())
current.Reset()
}
}
current.WriteString(line)
current.WriteString("\n")
}
if current.Len() > 0 {
sections = append(sections, current.String())
}
return sections
}
func splitLongSection(section string) []string {
if len(section) <= maxChunkSize {
return []string{section}
}
paragraphs := strings.Split(section, "\n\n")
var chunks []string
var current strings.Builder
for _, para := range paragraphs {
para = strings.TrimSpace(para)
if para == "" {
continue
}
if current.Len()+len(para) > maxChunkSize && current.Len() > 0 {
chunks = append(chunks, current.String())
current.Reset()
}
if current.Len() > 0 {
current.WriteString("\n\n")
}
current.WriteString(para)
}
if current.Len() > 0 {
chunks = append(chunks, current.String())
}
return chunks
}
func ingestChunks(ctx context.Context, embClient *openai.Client, pointsClient pb.PointsClient, chunks []chunk) error {
texts := make([]string, len(chunks))
for i, c := range chunks {
texts[i] = c.Text
}
batchSize := 10
var points []*pb.PointStruct
for i := 0; i < len(texts); i += batchSize {
end := i + batchSize
if end > len(texts) {
end = len(texts)
}
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
Input: texts[i:end],
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
})
if err != nil {
return fmt.Errorf("Embedding fehlgeschlagen: %w", err)
}
for j, emb := range embResp.Data {
c := chunks[i+j]
points = append(points, &pb.PointStruct{
Id: &pb.PointId{
PointIdOptions: &pb.PointId_Uuid{Uuid: generateID(c.Text, c.Source)},
},
Vectors: &pb.Vectors{
VectorsOptions: &pb.Vectors_Vector{
Vector: &pb.Vector{Data: emb.Embedding},
},
},
Payload: map[string]*pb.Value{
"text": {Kind: &pb.Value_StringValue{StringValue: c.Text}},
"source": {Kind: &pb.Value_StringValue{StringValue: c.Source}},
"type": {Kind: &pb.Value_StringValue{StringValue: c.Type}},
},
})
}
}
_, err := pointsClient.Upsert(ctx, &pb.UpsertPoints{
CollectionName: config.Cfg.Qdrant.Collection,
Points: points,
Wait: boolPtr(true),
})
return err
}
func boolPtr(b bool) *bool { return &b }

99
internal/brain/ingest_json.go Executable file
View File

@@ -0,0 +1,99 @@
// ingest_json.go Importiert KI-Bildbeschreibungen aus einer JSON-Datei in Qdrant
package brain
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
pb "github.com/qdrant/go-client/qdrant"
openai "github.com/sashabaranov/go-openai"
"google.golang.org/grpc/metadata"
"my-brain-importer/internal/config"
)
// ImageEntry entspricht der JSON-Ausgabe von analyze-images.go
type ImageEntry struct {
FilePath string `json:"file_path"`
FileName string `json:"file_name"`
Description string `json:"description"`
}
// RunIngestJSON importiert Bildbeschreibungen aus einer JSON-Datei in Qdrant.
func RunIngestJSON(inputFile string) {
fmt.Printf("📂 Lade \"%s\"...\n", inputFile)
raw, err := os.ReadFile(inputFile)
if err != nil {
log.Fatalf("❌ Datei nicht gefunden: %v", err)
}
var entries []ImageEntry
if err := json.Unmarshal(raw, &entries); err != nil {
log.Fatalf("❌ JSON Fehler: %v", err)
}
if len(entries) == 0 {
log.Fatal("❌ Keine Einträge in JSON")
}
fmt.Printf("✅ %d Einträge geladen\n\n", len(entries))
ctx := context.Background()
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
conn := config.NewQdrantConn()
defer conn.Close()
ensureCollection(ctx, pb.NewCollectionsClient(conn))
pointsClient := pb.NewPointsClient(conn)
embClient := config.NewEmbeddingClient()
fmt.Printf("🤖 Embedding: %s (%s)\n\n", config.Cfg.Embedding.Model, config.Cfg.Embedding.URL)
success := 0
for i, entry := range entries {
fmt.Printf("[%d/%d] 🔄 %s\n", i+1, len(entries), entry.FileName)
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
Input: []string{entry.Description},
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
})
if err != nil {
log.Printf(" ❌ Embedding Fehler: %v\n", err)
continue
}
_, err = pointsClient.Upsert(ctx, &pb.UpsertPoints{
CollectionName: config.Cfg.Qdrant.Collection,
Points: []*pb.PointStruct{
{
Id: &pb.PointId{
PointIdOptions: &pb.PointId_Uuid{
Uuid: generateID(entry.Description, entry.FileName),
},
},
Vectors: &pb.Vectors{
VectorsOptions: &pb.Vectors_Vector{
Vector: &pb.Vector{Data: embResp.Data[0].Embedding},
},
},
Payload: map[string]*pb.Value{
"text": {Kind: &pb.Value_StringValue{StringValue: entry.Description}},
"source": {Kind: &pb.Value_StringValue{StringValue: entry.FileName}},
"path": {Kind: &pb.Value_StringValue{StringValue: entry.FilePath}},
"type": {Kind: &pb.Value_StringValue{StringValue: "image"}},
},
},
},
})
if err != nil {
log.Printf(" ❌ Speichern Fehler: %v\n", err)
} else {
success++
}
}
fmt.Printf("\n✅ Fertig: %d von %d Bildern importiert\n", success, len(entries))
fmt.Printf("🌐 Dashboard: http://%s:6333/dashboard\n", config.Cfg.Qdrant.Host)
}