sync
This commit is contained in:
@@ -51,12 +51,26 @@ func RunIngestJSON(inputFile string) {
|
||||
|
||||
fmt.Printf("🤖 Embedding: %s (%s)\n\n", config.Cfg.Embedding.Model, config.Cfg.Embedding.URL)
|
||||
|
||||
// Batched embedding – identisch zur Markdown-Ingest-Logik
|
||||
batchSize := 10
|
||||
success := 0
|
||||
for i, entry := range entries {
|
||||
fmt.Printf("[%d/%d] 🔄 %s\n", i+1, len(entries), entry.FileName)
|
||||
|
||||
for i := 0; i < len(entries); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(entries) {
|
||||
end = len(entries)
|
||||
}
|
||||
batch := entries[i:end]
|
||||
|
||||
texts := make([]string, len(batch))
|
||||
for j, e := range batch {
|
||||
texts[j] = e.Description
|
||||
}
|
||||
|
||||
fmt.Printf("[%d–%d/%d] 🔄 Embedding-Batch...\n", i+1, end, len(entries))
|
||||
|
||||
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
|
||||
Input: []string{entry.Description},
|
||||
Input: texts,
|
||||
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
|
||||
})
|
||||
if err != nil {
|
||||
@@ -64,33 +78,38 @@ func RunIngestJSON(inputFile string) {
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = pointsClient.Upsert(ctx, &pb.UpsertPoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
Points: []*pb.PointStruct{
|
||||
{
|
||||
Id: &pb.PointId{
|
||||
PointIdOptions: &pb.PointId_Uuid{
|
||||
Uuid: generateID(entry.Description, entry.FileName),
|
||||
},
|
||||
},
|
||||
Vectors: &pb.Vectors{
|
||||
VectorsOptions: &pb.Vectors_Vector{
|
||||
Vector: &pb.Vector{Data: embResp.Data[0].Embedding},
|
||||
},
|
||||
},
|
||||
Payload: map[string]*pb.Value{
|
||||
"text": {Kind: &pb.Value_StringValue{StringValue: entry.Description}},
|
||||
"source": {Kind: &pb.Value_StringValue{StringValue: entry.FileName}},
|
||||
"path": {Kind: &pb.Value_StringValue{StringValue: entry.FilePath}},
|
||||
"type": {Kind: &pb.Value_StringValue{StringValue: "image"}},
|
||||
var points []*pb.PointStruct
|
||||
for j, emb := range embResp.Data {
|
||||
e := batch[j]
|
||||
points = append(points, &pb.PointStruct{
|
||||
Id: &pb.PointId{
|
||||
PointIdOptions: &pb.PointId_Uuid{
|
||||
Uuid: generateID(e.Description, e.FileName),
|
||||
},
|
||||
},
|
||||
},
|
||||
Vectors: &pb.Vectors{
|
||||
VectorsOptions: &pb.Vectors_Vector{
|
||||
Vector: &pb.Vector{Data: emb.Embedding},
|
||||
},
|
||||
},
|
||||
Payload: map[string]*pb.Value{
|
||||
"text": {Kind: &pb.Value_StringValue{StringValue: e.Description}},
|
||||
"source": {Kind: &pb.Value_StringValue{StringValue: e.FileName}},
|
||||
"path": {Kind: &pb.Value_StringValue{StringValue: e.FilePath}},
|
||||
"type": {Kind: &pb.Value_StringValue{StringValue: "image"}},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
_, err = pointsClient.Upsert(ctx, &pb.UpsertPoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
Points: points,
|
||||
Wait: boolPtr(true),
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf(" ❌ Speichern Fehler: %v\n", err)
|
||||
} else {
|
||||
success++
|
||||
success += len(batch)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user