This commit is contained in:
Christoph K.
2026-03-12 17:34:49 +01:00
parent fa5c15b607
commit 92f520101a
5 changed files with 180 additions and 32 deletions

View File

@@ -51,12 +51,26 @@ func RunIngestJSON(inputFile string) {
fmt.Printf("🤖 Embedding: %s (%s)\n\n", config.Cfg.Embedding.Model, config.Cfg.Embedding.URL)
// Batched embedding identisch zur Markdown-Ingest-Logik
batchSize := 10
success := 0
for i, entry := range entries {
fmt.Printf("[%d/%d] 🔄 %s\n", i+1, len(entries), entry.FileName)
for i := 0; i < len(entries); i += batchSize {
end := i + batchSize
if end > len(entries) {
end = len(entries)
}
batch := entries[i:end]
texts := make([]string, len(batch))
for j, e := range batch {
texts[j] = e.Description
}
fmt.Printf("[%d%d/%d] 🔄 Embedding-Batch...\n", i+1, end, len(entries))
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
Input: []string{entry.Description},
Input: texts,
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
})
if err != nil {
@@ -64,33 +78,38 @@ func RunIngestJSON(inputFile string) {
continue
}
_, err = pointsClient.Upsert(ctx, &pb.UpsertPoints{
CollectionName: config.Cfg.Qdrant.Collection,
Points: []*pb.PointStruct{
{
Id: &pb.PointId{
PointIdOptions: &pb.PointId_Uuid{
Uuid: generateID(entry.Description, entry.FileName),
},
},
Vectors: &pb.Vectors{
VectorsOptions: &pb.Vectors_Vector{
Vector: &pb.Vector{Data: embResp.Data[0].Embedding},
},
},
Payload: map[string]*pb.Value{
"text": {Kind: &pb.Value_StringValue{StringValue: entry.Description}},
"source": {Kind: &pb.Value_StringValue{StringValue: entry.FileName}},
"path": {Kind: &pb.Value_StringValue{StringValue: entry.FilePath}},
"type": {Kind: &pb.Value_StringValue{StringValue: "image"}},
var points []*pb.PointStruct
for j, emb := range embResp.Data {
e := batch[j]
points = append(points, &pb.PointStruct{
Id: &pb.PointId{
PointIdOptions: &pb.PointId_Uuid{
Uuid: generateID(e.Description, e.FileName),
},
},
},
Vectors: &pb.Vectors{
VectorsOptions: &pb.Vectors_Vector{
Vector: &pb.Vector{Data: emb.Embedding},
},
},
Payload: map[string]*pb.Value{
"text": {Kind: &pb.Value_StringValue{StringValue: e.Description}},
"source": {Kind: &pb.Value_StringValue{StringValue: e.FileName}},
"path": {Kind: &pb.Value_StringValue{StringValue: e.FilePath}},
"type": {Kind: &pb.Value_StringValue{StringValue: "image"}},
},
})
}
_, err = pointsClient.Upsert(ctx, &pb.UpsertPoints{
CollectionName: config.Cfg.Qdrant.Collection,
Points: points,
Wait: boolPtr(true),
})
if err != nil {
log.Printf(" ❌ Speichern Fehler: %v\n", err)
} else {
success++
success += len(batch)
}
}