/deepask: Multi-Step Reasoning mit iterativer RAG-Suche
Neuer Discord-Command für tiefe Recherche in 3 Phasen: 1. Initiale Qdrant-Suche mit der Originalfrage 2. LLM generiert Folgefragen, sucht erneut (max 2 Iterationen) 3. Synthese aller gesammelten Chunks zu umfassender Antwort Nutzbar via /deepask oder @bot deepask. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -64,6 +64,13 @@ var (
|
|||||||
{Type: discordgo.ApplicationCommandOptionString, Name: "frage", Description: "Die Frage", Required: true},
|
{Type: discordgo.ApplicationCommandOptionString, Name: "frage", Description: "Die Frage", Required: true},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "deepask",
|
||||||
|
Description: "Tiefe Recherche mit Multi-Step Reasoning (mehrere Suchdurchläufe)",
|
||||||
|
Options: []*discordgo.ApplicationCommandOption{
|
||||||
|
{Type: discordgo.ApplicationCommandOptionString, Name: "frage", Description: "Die Frage", Required: true},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Name: "asknobrain",
|
Name: "asknobrain",
|
||||||
Description: "Stelle eine Frage direkt ans LLM (ohne Wissensdatenbank)",
|
Description: "Stelle eine Frage direkt ans LLM (ohne Wissensdatenbank)",
|
||||||
@@ -449,6 +456,22 @@ func onInteraction(s *discordgo.Session, i *discordgo.InteractionCreate) {
|
|||||||
return resp
|
return resp
|
||||||
})
|
})
|
||||||
|
|
||||||
|
case "deepask":
|
||||||
|
question := data.Options[0].StringValue()
|
||||||
|
channelID := i.ChannelID
|
||||||
|
handleAgentResponse(s, i, func() agents.Response {
|
||||||
|
resp := researchAgent.Handle(agents.Request{
|
||||||
|
Action: agents.ActionDeepAsk,
|
||||||
|
Args: []string{question},
|
||||||
|
History: getHistory(channelID),
|
||||||
|
})
|
||||||
|
if resp.RawAnswer != "" {
|
||||||
|
addToHistory(channelID, "user", question)
|
||||||
|
addToHistory(channelID, "assistant", resp.RawAnswer)
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
})
|
||||||
|
|
||||||
case "asknobrain":
|
case "asknobrain":
|
||||||
handleAskNoBrain(s, i, data.Options[0].StringValue())
|
handleAskNoBrain(s, i, data.Options[0].StringValue())
|
||||||
|
|
||||||
@@ -990,6 +1013,18 @@ func routeMessage(text, author, channelID string) agents.Response {
|
|||||||
Source: "discord/mention",
|
Source: "discord/mention",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
case "deepask":
|
||||||
|
resp := researchAgent.Handle(agents.Request{
|
||||||
|
Action: agents.ActionDeepAsk,
|
||||||
|
Args: args,
|
||||||
|
History: getHistory(channelID),
|
||||||
|
})
|
||||||
|
if resp.RawAnswer != "" {
|
||||||
|
addToHistory(channelID, "user", strings.Join(args, " "))
|
||||||
|
addToHistory(channelID, "assistant", resp.RawAnswer)
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
default:
|
default:
|
||||||
resp := researchAgent.Handle(agents.Request{
|
resp := researchAgent.Handle(agents.Request{
|
||||||
Action: agents.ActionQuery,
|
Action: agents.ActionQuery,
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package agents
|
|||||||
const (
|
const (
|
||||||
// Research
|
// Research
|
||||||
ActionQuery = "query"
|
ActionQuery = "query"
|
||||||
|
ActionDeepAsk = "deepask"
|
||||||
|
|
||||||
// Memory
|
// Memory
|
||||||
ActionStore = "store"
|
ActionStore = "store"
|
||||||
|
|||||||
@@ -20,6 +20,10 @@ func (a *Agent) Handle(req agents.Request) agents.Response {
|
|||||||
}
|
}
|
||||||
question := strings.Join(req.Args, " ")
|
question := strings.Join(req.Args, " ")
|
||||||
|
|
||||||
|
if req.Action == agents.ActionDeepAsk {
|
||||||
|
return a.handleDeepAsk(question, req.History)
|
||||||
|
}
|
||||||
|
|
||||||
answer, chunks, err := brain.AskQuery(question, req.History)
|
answer, chunks, err := brain.AskQuery(question, req.History)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return agents.Response{Error: err, Text: fmt.Sprintf("❌ Fehler: %v", err)}
|
return agents.Response{Error: err, Text: fmt.Sprintf("❌ Fehler: %v", err)}
|
||||||
@@ -37,3 +41,31 @@ func (a *Agent) Handle(req agents.Request) agents.Response {
|
|||||||
}
|
}
|
||||||
return agents.Response{Text: sb.String(), RawAnswer: answer}
|
return agents.Response{Text: sb.String(), RawAnswer: answer}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleDeepAsk führt eine tiefe Recherche mit Multi-Step Reasoning durch.
|
||||||
|
func (a *Agent) handleDeepAsk(question string, history []agents.HistoryMessage) agents.Response {
|
||||||
|
answer, chunks, err := brain.DeepAskQuery(question, history)
|
||||||
|
if err != nil {
|
||||||
|
return agents.Response{Error: err, Text: fmt.Sprintf("❌ Fehler: %v", err)}
|
||||||
|
}
|
||||||
|
if len(chunks) == 0 {
|
||||||
|
return agents.Response{Text: "❌ Keine relevanten Informationen in der Datenbank gefunden.\nFüge mehr Daten mit `/ingest` hinzu."}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quellen deduplizieren
|
||||||
|
seenSources := make(map[string]float32)
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
if existing, ok := seenSources[chunk.Source]; !ok || chunk.Score > existing {
|
||||||
|
seenSources[chunk.Source] = chunk.Score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
fmt.Fprintf(&sb, "🔬 **Tiefe Recherche:** _%s_\n\n", question)
|
||||||
|
sb.WriteString(answer)
|
||||||
|
fmt.Fprintf(&sb, "\n\n📚 **Quellen** (%d Chunks aus %d Quellen):\n", len(chunks), len(seenSources))
|
||||||
|
for source, score := range seenSources {
|
||||||
|
fmt.Fprintf(&sb, "• %.1f%% – %s\n", score*100, source)
|
||||||
|
}
|
||||||
|
return agents.Response{Text: sb.String(), RawAnswer: answer}
|
||||||
|
}
|
||||||
|
|||||||
224
internal/brain/deepask.go
Normal file
224
internal/brain/deepask.go
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
// deepask.go – Multi-Step Reasoning: iterative RAG-Suche mit Folgefragen
|
||||||
|
package brain
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
openai "github.com/sashabaranov/go-openai"
|
||||||
|
"google.golang.org/grpc/metadata"
|
||||||
|
|
||||||
|
"my-brain-importer/internal/agents"
|
||||||
|
"my-brain-importer/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
const maxDeepSteps = 3
|
||||||
|
|
||||||
|
// DeepAskQuery führt eine iterative RAG-Suche durch:
|
||||||
|
// 1. Initiale Suche → LLM generiert Folgefragen
|
||||||
|
// 2. Vertiefungssuchen mit Folgefragen (max 2 Iterationen)
|
||||||
|
// 3. Synthese: alle gesammelten Chunks → umfassende Antwort
|
||||||
|
func DeepAskQuery(question string, history []agents.HistoryMessage) (string, []KnowledgeChunk, error) {
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||||
|
|
||||||
|
embClient := config.NewEmbeddingClient()
|
||||||
|
chatClient := config.NewChatClient()
|
||||||
|
|
||||||
|
// Deduplizierung über alle Schritte
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
var allChunks []KnowledgeChunk
|
||||||
|
|
||||||
|
addChunks := func(chunks []KnowledgeChunk) int {
|
||||||
|
added := 0
|
||||||
|
for _, c := range chunks {
|
||||||
|
if !seen[c.Text] {
|
||||||
|
seen[c.Text] = true
|
||||||
|
allChunks = append(allChunks, c)
|
||||||
|
added++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return added
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 1: Initiale Suche
|
||||||
|
slog.Info("[DeepAsk] Phase 1: Initiale Suche", "frage", question)
|
||||||
|
initialChunks := searchKnowledge(ctx, embClient, question)
|
||||||
|
addChunks(initialChunks)
|
||||||
|
|
||||||
|
if len(allChunks) == 0 {
|
||||||
|
return "", nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: Folgefragen generieren und suchen (max 2 Iterationen)
|
||||||
|
queries := []string{question}
|
||||||
|
for step := 1; step < maxDeepSteps; step++ {
|
||||||
|
followUps := generateFollowUpQueries(ctx, chatClient, question, allChunks)
|
||||||
|
if len(followUps) == 0 {
|
||||||
|
slog.Info("[DeepAsk] Keine Folgefragen generiert, überspringe", "schritt", step)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("[DeepAsk] Phase 2: Vertiefung",
|
||||||
|
"schritt", step,
|
||||||
|
"folgefragen", len(followUps),
|
||||||
|
"fragen", followUps,
|
||||||
|
)
|
||||||
|
|
||||||
|
newFound := 0
|
||||||
|
for _, fq := range followUps {
|
||||||
|
chunks := searchKnowledge(ctx, embClient, fq)
|
||||||
|
newFound += addChunks(chunks)
|
||||||
|
queries = append(queries, fq)
|
||||||
|
}
|
||||||
|
|
||||||
|
if newFound == 0 {
|
||||||
|
slog.Info("[DeepAsk] Keine neuen Chunks gefunden, beende Vertiefung", "schritt", step)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("[DeepAsk] Neue Chunks gefunden", "schritt", step, "neu", newFound, "gesamt", len(allChunks))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 3: Synthese — alle Chunks + Frage → umfassende Antwort
|
||||||
|
slog.Info("[DeepAsk] Phase 3: Synthese", "chunks", len(allChunks), "schritte", len(queries))
|
||||||
|
|
||||||
|
contextText := buildContext(allChunks)
|
||||||
|
coreMemory := LoadCoreMemory()
|
||||||
|
|
||||||
|
systemPrompt := `Du bist ein hilfreicher persönlicher Assistent mit Zugang zu einer umfangreichen Wissensdatenbank.
|
||||||
|
Dir wurden Informationen aus mehreren Suchdurchläufen bereitgestellt.
|
||||||
|
|
||||||
|
WICHTIGE REGELN:
|
||||||
|
- Nutze ALLE bereitgestellten Informationen für eine umfassende Antwort
|
||||||
|
- Verbinde Informationen aus verschiedenen Quellen zu einer kohärenten Antwort
|
||||||
|
- Ergänze mit eigenem Wissen wenn sinnvoll, kennzeichne es mit "Aus meinem Wissen:"
|
||||||
|
- Antworte auf Deutsch
|
||||||
|
- Sei gründlich aber strukturiert`
|
||||||
|
|
||||||
|
if coreMemory != "" {
|
||||||
|
systemPrompt += "\n\n## Fakten über den Nutzer:\n" + coreMemory
|
||||||
|
}
|
||||||
|
|
||||||
|
userPrompt := fmt.Sprintf(`Hier sind relevante Informationen aus mehreren Suchdurchläufen in meiner Wissensdatenbank:
|
||||||
|
|
||||||
|
%s
|
||||||
|
|
||||||
|
Basierend auf ALLEN diesen Informationen, beantworte bitte umfassend folgende Frage:
|
||||||
|
%s`, contextText, question)
|
||||||
|
|
||||||
|
msgs := []openai.ChatCompletionMessage{
|
||||||
|
{Role: openai.ChatMessageRoleSystem, Content: systemPrompt},
|
||||||
|
}
|
||||||
|
for _, h := range history {
|
||||||
|
msgs = append(msgs, openai.ChatCompletionMessage{
|
||||||
|
Role: h.Role,
|
||||||
|
Content: h.Content,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
msgs = append(msgs, openai.ChatCompletionMessage{
|
||||||
|
Role: openai.ChatMessageRoleUser,
|
||||||
|
Content: userPrompt,
|
||||||
|
})
|
||||||
|
|
||||||
|
stream, err := chatClient.CreateChatCompletionStream(ctx, openai.ChatCompletionRequest{
|
||||||
|
Model: config.Cfg.Chat.Model,
|
||||||
|
Messages: msgs,
|
||||||
|
Temperature: 0.7,
|
||||||
|
MaxTokens: 800,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("LLM Fehler: %w", err)
|
||||||
|
}
|
||||||
|
defer stream.Close()
|
||||||
|
|
||||||
|
var answer strings.Builder
|
||||||
|
for {
|
||||||
|
response, err := stream.Recv()
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if len(response.Choices) > 0 {
|
||||||
|
answer.WriteString(response.Choices[0].Delta.Content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("[DeepAsk] Abgeschlossen",
|
||||||
|
"dauer", time.Since(start).Round(time.Millisecond),
|
||||||
|
"chunks_gesamt", len(allChunks),
|
||||||
|
"suchanfragen", len(queries),
|
||||||
|
"antwort_zeichen", answer.Len(),
|
||||||
|
)
|
||||||
|
|
||||||
|
return answer.String(), allChunks, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateFollowUpQueries lässt das LLM basierend auf bisherigen Ergebnissen Folgefragen generieren.
|
||||||
|
// Gibt 0-3 Folgefragen zurück.
|
||||||
|
func generateFollowUpQueries(ctx context.Context, chatClient *openai.Client, question string, chunks []KnowledgeChunk) []string {
|
||||||
|
contextText := buildContext(chunks)
|
||||||
|
|
||||||
|
prompt := fmt.Sprintf(`Originalfrage: %s
|
||||||
|
|
||||||
|
Bisherige Suchergebnisse:
|
||||||
|
%s
|
||||||
|
|
||||||
|
Generiere 1-3 Folgefragen, die helfen würden, die Originalfrage besser zu beantworten.
|
||||||
|
Jede Folgefrage muss auf einer eigenen Zeile stehen und mit "FOLGEFRAGE:" beginnen.
|
||||||
|
Wenn die bisherigen Ergebnisse die Frage bereits vollständig beantworten, schreibe: KEINE FOLGEFRAGEN`, question, contextText)
|
||||||
|
|
||||||
|
resp, err := chatClient.CreateChatCompletion(ctx, openai.ChatCompletionRequest{
|
||||||
|
Model: config.Cfg.Chat.Model,
|
||||||
|
Messages: []openai.ChatCompletionMessage{
|
||||||
|
{Role: openai.ChatMessageRoleSystem, Content: "Du generierst präzise Suchfragen für eine Wissensdatenbank. Antworte NUR im vorgegebenen Format."},
|
||||||
|
{Role: openai.ChatMessageRoleUser, Content: prompt},
|
||||||
|
},
|
||||||
|
Temperature: 0.3,
|
||||||
|
MaxTokens: 300,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("[DeepAsk] Folgefragen-Generierung fehlgeschlagen", "fehler", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if len(resp.Choices) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseFollowUpQueries(resp.Choices[0].Message.Content)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseFollowUpQueries extrahiert Folgefragen aus der LLM-Antwort.
|
||||||
|
// Erwartet Zeilen im Format "FOLGEFRAGE: <frage>".
|
||||||
|
func parseFollowUpQueries(response string) []string {
|
||||||
|
// Reasoning-Modelle: Antwort nach </think>-Tag
|
||||||
|
if idx := strings.LastIndex(response, "</think>"); idx >= 0 {
|
||||||
|
response = response[idx+len("</think>"):]
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(strings.ToUpper(response), "KEINE FOLGEFRAGEN") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var queries []string
|
||||||
|
for _, line := range strings.Split(response, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
upper := strings.ToUpper(line)
|
||||||
|
if strings.HasPrefix(upper, "FOLGEFRAGE:") {
|
||||||
|
q := strings.TrimSpace(line[len("FOLGEFRAGE:"):])
|
||||||
|
if q != "" {
|
||||||
|
queries = append(queries, q)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Max 3 Folgefragen
|
||||||
|
if len(queries) > 3 {
|
||||||
|
queries = queries[:3]
|
||||||
|
}
|
||||||
|
return queries
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user