Files
ai-agent/internal/brain/ingest_json.go
Christoph K. 92f520101a sync
2026-03-12 17:34:49 +01:00

119 lines
3.3 KiB
Go
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// ingest_json.go Importiert KI-Bildbeschreibungen aus einer JSON-Datei in Qdrant
package brain
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
pb "github.com/qdrant/go-client/qdrant"
openai "github.com/sashabaranov/go-openai"
"google.golang.org/grpc/metadata"
"my-brain-importer/internal/config"
)
// ImageEntry entspricht der JSON-Ausgabe von analyze-images.go
type ImageEntry struct {
FilePath string `json:"file_path"`
FileName string `json:"file_name"`
Description string `json:"description"`
}
// RunIngestJSON importiert Bildbeschreibungen aus einer JSON-Datei in Qdrant.
func RunIngestJSON(inputFile string) {
fmt.Printf("📂 Lade \"%s\"...\n", inputFile)
raw, err := os.ReadFile(inputFile)
if err != nil {
log.Fatalf("❌ Datei nicht gefunden: %v", err)
}
var entries []ImageEntry
if err := json.Unmarshal(raw, &entries); err != nil {
log.Fatalf("❌ JSON Fehler: %v", err)
}
if len(entries) == 0 {
log.Fatal("❌ Keine Einträge in JSON")
}
fmt.Printf("✅ %d Einträge geladen\n\n", len(entries))
ctx := context.Background()
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
conn := config.NewQdrantConn()
defer conn.Close()
ensureCollection(ctx, pb.NewCollectionsClient(conn))
pointsClient := pb.NewPointsClient(conn)
embClient := config.NewEmbeddingClient()
fmt.Printf("🤖 Embedding: %s (%s)\n\n", config.Cfg.Embedding.Model, config.Cfg.Embedding.URL)
// Batched embedding identisch zur Markdown-Ingest-Logik
batchSize := 10
success := 0
for i := 0; i < len(entries); i += batchSize {
end := i + batchSize
if end > len(entries) {
end = len(entries)
}
batch := entries[i:end]
texts := make([]string, len(batch))
for j, e := range batch {
texts[j] = e.Description
}
fmt.Printf("[%d%d/%d] 🔄 Embedding-Batch...\n", i+1, end, len(entries))
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
Input: texts,
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
})
if err != nil {
log.Printf(" ❌ Embedding Fehler: %v\n", err)
continue
}
var points []*pb.PointStruct
for j, emb := range embResp.Data {
e := batch[j]
points = append(points, &pb.PointStruct{
Id: &pb.PointId{
PointIdOptions: &pb.PointId_Uuid{
Uuid: generateID(e.Description, e.FileName),
},
},
Vectors: &pb.Vectors{
VectorsOptions: &pb.Vectors_Vector{
Vector: &pb.Vector{Data: emb.Embedding},
},
},
Payload: map[string]*pb.Value{
"text": {Kind: &pb.Value_StringValue{StringValue: e.Description}},
"source": {Kind: &pb.Value_StringValue{StringValue: e.FileName}},
"path": {Kind: &pb.Value_StringValue{StringValue: e.FilePath}},
"type": {Kind: &pb.Value_StringValue{StringValue: "image"}},
},
})
}
_, err = pointsClient.Upsert(ctx, &pb.UpsertPoints{
CollectionName: config.Cfg.Qdrant.Collection,
Points: points,
Wait: boolPtr(true),
})
if err != nil {
log.Printf(" ❌ Speichern Fehler: %v\n", err)
} else {
success += len(batch)
}
}
fmt.Printf("\n✅ Fertig: %d von %d Bildern importiert\n", success, len(entries))
fmt.Printf("🌐 Dashboard: http://%s:6333/dashboard\n", config.Cfg.Qdrant.Host)
}