zwischenstand
This commit is contained in:
@@ -41,7 +41,8 @@ func AskQuery(question string, history []agents.HistoryMessage) (string, []Knowl
|
||||
|
||||
contextText := buildContext(chunks)
|
||||
|
||||
systemPrompt := `Du bist ein hilfreicher persönlicher Assistent.
|
||||
coreMemory := LoadCoreMemory()
|
||||
systemPromptBase := `Du bist ein hilfreicher persönlicher Assistent.
|
||||
Beantworte Fragen primär anhand der bereitgestellten Informationen aus der Wissensdatenbank.
|
||||
Ergänze fehlende Details mit deinem eigenen Wissen, kennzeichne dies aber klar mit "Aus meinem Wissen:".
|
||||
|
||||
@@ -50,6 +51,10 @@ WICHTIGE REGELN:
|
||||
- Ergänze mit eigenem Wissen wenn sinnvoll, kennzeichne es deutlich
|
||||
- Antworte auf Deutsch
|
||||
- Sei präzise und direkt`
|
||||
systemPrompt := systemPromptBase
|
||||
if coreMemory != "" {
|
||||
systemPrompt = systemPromptBase + "\n\n## Fakten über den Nutzer:\n" + coreMemory
|
||||
}
|
||||
|
||||
userPrompt := fmt.Sprintf(`Hier sind die relevanten Informationen aus meiner Wissensdatenbank:
|
||||
|
||||
|
||||
54
internal/brain/core_memory.go
Normal file
54
internal/brain/core_memory.go
Normal file
@@ -0,0 +1,54 @@
|
||||
// core_memory.go – Persistente Kernfakten über den Nutzer (core_memory.md)
|
||||
package brain
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"my-brain-importer/internal/config"
|
||||
)
|
||||
|
||||
// CoreMemoryPath gibt den Pfad zur core_memory.md-Datei zurück.
|
||||
func CoreMemoryPath() string {
|
||||
return filepath.Join(config.Cfg.BrainRoot, "core_memory.md")
|
||||
}
|
||||
|
||||
// LoadCoreMemory liest den Inhalt der core_memory.md-Datei.
|
||||
// Gibt leeren String zurück wenn die Datei nicht existiert.
|
||||
func LoadCoreMemory() string {
|
||||
data, err := os.ReadFile(CoreMemoryPath())
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(string(data))
|
||||
}
|
||||
|
||||
// AppendCoreMemory fügt einen Fakt zur core_memory.md-Datei hinzu.
|
||||
func AppendCoreMemory(text string) error {
|
||||
path := CoreMemoryPath()
|
||||
// Datei erstellen falls nicht vorhanden, sonst anhängen
|
||||
f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("core_memory.md öffnen: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Führenden Bindestrich ergänzen wenn nicht vorhanden
|
||||
line := strings.TrimSpace(text)
|
||||
if !strings.HasPrefix(line, "-") {
|
||||
line = "- " + line
|
||||
}
|
||||
_, err = fmt.Fprintf(f, "%s\n", line)
|
||||
return err
|
||||
}
|
||||
|
||||
// ShowCoreMemory gibt den Inhalt der core_memory.md als formatierte Nachricht zurück.
|
||||
func ShowCoreMemory() string {
|
||||
content := LoadCoreMemory()
|
||||
if content == "" {
|
||||
return "📭 Keine Kernfakten gespeichert. Nutze `/memory profile <text>` um Fakten hinzuzufügen."
|
||||
}
|
||||
return fmt.Sprintf("🧠 **Kerngedächtnis:**\n```\n%s\n```", content)
|
||||
}
|
||||
@@ -256,3 +256,30 @@ func IngestChatMessage(text, author, source string) error {
|
||||
}
|
||||
|
||||
func boolPtr(b bool) *bool { return &b }
|
||||
|
||||
// IngestText speichert einen beliebigen Text mit Quelle und Typ in Qdrant.
|
||||
// Verwendet die gleiche Chunking-Logik wie der Markdown-Ingest.
|
||||
func IngestText(text, source, docType string) error {
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
embClient := config.NewEmbeddingClient()
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
ensureCollection(ctx, pb.NewCollectionsClient(conn))
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
|
||||
var chunks []chunk
|
||||
for _, part := range splitLongSection(text) {
|
||||
part = strings.TrimSpace(part)
|
||||
if len(part) < 20 {
|
||||
continue
|
||||
}
|
||||
chunks = append(chunks, chunk{Text: part, Source: source, Type: docType})
|
||||
}
|
||||
if len(chunks) == 0 {
|
||||
return nil
|
||||
}
|
||||
return ingestChunks(ctx, embClient, pointsClient, chunks)
|
||||
}
|
||||
|
||||
98
internal/brain/ingest_email.go
Normal file
98
internal/brain/ingest_email.go
Normal file
@@ -0,0 +1,98 @@
|
||||
// ingest_email.go – Importiert Emails aus einem IMAP-Ordner in Qdrant
|
||||
package brain
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
pb "github.com/qdrant/go-client/qdrant"
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
||||
"my-brain-importer/internal/agents/tool/email"
|
||||
"my-brain-importer/internal/config"
|
||||
)
|
||||
|
||||
// IngestEmailFolder importiert alle Emails aus einem IMAP-Ordner in Qdrant.
|
||||
// Gibt Anzahl der importierten Emails zurück.
|
||||
// maxEmails = 0 bedeutet: alle (bis max. 500).
|
||||
func IngestEmailFolder(acc config.EmailAccount, folder string, maxEmails uint32) (int, error) {
|
||||
if maxEmails == 0 {
|
||||
maxEmails = 500
|
||||
}
|
||||
|
||||
cl, err := email.ConnectAccount(acc)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("IMAP-Verbindung: %w", err)
|
||||
}
|
||||
defer cl.Close()
|
||||
|
||||
slog.Info("Email-Ingest: Lade Emails", "account", acc.Name, "folder", folder, "max", maxEmails)
|
||||
msgs, err := cl.FetchWithBody(folder, maxEmails)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Emails laden: %w", err)
|
||||
}
|
||||
if len(msgs) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
embClient := config.NewEmbeddingClient()
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
ensureCollection(ctx, pb.NewCollectionsClient(conn))
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
|
||||
var chunks []chunk
|
||||
for _, m := range msgs {
|
||||
text := formatEmailForIngest(m)
|
||||
if len(strings.TrimSpace(text)) < 20 {
|
||||
continue
|
||||
}
|
||||
source := fmt.Sprintf("email/%s/%s", folder, m.Date)
|
||||
chunks = append(chunks, chunk{Text: text, Source: source, Type: "email"})
|
||||
}
|
||||
|
||||
if len(chunks) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
slog.Info("Email-Ingest: Starte Embedding", "emails", len(msgs), "chunks", len(chunks))
|
||||
|
||||
// In Batches von 20 ingesten (Embeddings können langsam sein)
|
||||
ingested := 0
|
||||
for i := 0; i < len(chunks); i += 20 {
|
||||
end := i + 20
|
||||
if end > len(chunks) {
|
||||
end = len(chunks)
|
||||
}
|
||||
batch := chunks[i:end]
|
||||
if err := ingestChunks(ctx, embClient, pointsClient, batch); err != nil {
|
||||
slog.Warn("Email-Ingest Batch-Fehler", "batch_start", i, "fehler", err)
|
||||
continue
|
||||
}
|
||||
ingested += len(batch)
|
||||
slog.Info("Email-Ingest Fortschritt", "ingested", ingested, "total", len(chunks))
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
|
||||
return ingested, nil
|
||||
}
|
||||
|
||||
// formatEmailForIngest formatiert eine Email als durchsuchbaren Text.
|
||||
func formatEmailForIngest(m email.MessageWithBody) string {
|
||||
var sb strings.Builder
|
||||
fmt.Fprintf(&sb, "Betreff: %s\n", m.Subject)
|
||||
fmt.Fprintf(&sb, "Von: %s\n", m.From)
|
||||
fmt.Fprintf(&sb, "Datum: %s\n", m.Date)
|
||||
if m.Body != "" {
|
||||
sb.WriteString("\n")
|
||||
sb.WriteString(m.Body)
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
82
internal/brain/ingest_pdf.go
Normal file
82
internal/brain/ingest_pdf.go
Normal file
@@ -0,0 +1,82 @@
|
||||
// ingest_pdf.go – Extrahiert Text aus einer PDF-Datei und importiert ihn in Qdrant
|
||||
package brain
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ledongthuc/pdf"
|
||||
pb "github.com/qdrant/go-client/qdrant"
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
||||
"my-brain-importer/internal/config"
|
||||
)
|
||||
|
||||
// IngestPDF extrahiert Text aus einer PDF-Datei und importiert ihn in Qdrant.
|
||||
// source ist der Anzeigename der Quelle (z.B. Dateiname).
|
||||
// Gibt Anzahl der importierten Chunks zurück.
|
||||
func IngestPDF(filePath, source string) (int, error) {
|
||||
text, err := extractPDFText(filePath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("PDF-Parsing fehlgeschlagen: %w", err)
|
||||
}
|
||||
|
||||
text = strings.TrimSpace(text)
|
||||
if len(text) < 20 {
|
||||
return 0, fmt.Errorf("kein verwertbarer Text in PDF gefunden")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
embClient := config.NewEmbeddingClient()
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
ensureCollection(ctx, pb.NewCollectionsClient(conn))
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
|
||||
var chunks []chunk
|
||||
for _, part := range splitLongSection(text) {
|
||||
part = strings.TrimSpace(part)
|
||||
if len(part) < 20 {
|
||||
continue
|
||||
}
|
||||
chunks = append(chunks, chunk{Text: part, Source: source, Type: "pdf"})
|
||||
}
|
||||
|
||||
if len(chunks) == 0 {
|
||||
return 0, fmt.Errorf("kein verwertbarer Inhalt nach Aufteilung")
|
||||
}
|
||||
|
||||
if err := ingestChunks(ctx, embClient, pointsClient, chunks); err != nil {
|
||||
return 0, fmt.Errorf("Ingest fehlgeschlagen: %w", err)
|
||||
}
|
||||
return len(chunks), nil
|
||||
}
|
||||
|
||||
// extractPDFText liest alle Seiten einer PDF-Datei und gibt den Text zurück.
|
||||
func extractPDFText(filePath string) (string, error) {
|
||||
f, r, err := pdf.Open(filePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var sb strings.Builder
|
||||
totalPages := r.NumPage()
|
||||
for pageNum := 1; pageNum <= totalPages; pageNum++ {
|
||||
page := r.Page(pageNum)
|
||||
if page.V.IsNull() {
|
||||
continue
|
||||
}
|
||||
text, err := page.GetPlainText(nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
sb.WriteString(text)
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
return sb.String(), nil
|
||||
}
|
||||
124
internal/brain/ingest_url.go
Normal file
124
internal/brain/ingest_url.go
Normal file
@@ -0,0 +1,124 @@
|
||||
// ingest_url.go – Fetcht eine URL und importiert den Textinhalt in Qdrant
|
||||
package brain
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
pb "github.com/qdrant/go-client/qdrant"
|
||||
"golang.org/x/net/html"
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
||||
"my-brain-importer/internal/config"
|
||||
)
|
||||
|
||||
// IngestURL fetcht eine URL, extrahiert den Textinhalt und importiert ihn in Qdrant.
|
||||
// Gibt Anzahl der importierten Chunks zurück.
|
||||
func IngestURL(rawURL string) (int, error) {
|
||||
client := &http.Client{Timeout: 30 * time.Second}
|
||||
resp, err := client.Get(rawURL)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("HTTP-Fehler: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return 0, fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
|
||||
}
|
||||
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
var text string
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
text, err = extractHTMLText(resp.Body)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("HTML-Parsing fehlgeschlagen: %w", err)
|
||||
}
|
||||
} else {
|
||||
raw, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) // max 1MB
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Lesen fehlgeschlagen: %w", err)
|
||||
}
|
||||
text = string(raw)
|
||||
}
|
||||
|
||||
text = strings.TrimSpace(text)
|
||||
if len(text) < 20 {
|
||||
return 0, fmt.Errorf("kein verwertbarer Inhalt gefunden")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
embClient := config.NewEmbeddingClient()
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
ensureCollection(ctx, pb.NewCollectionsClient(conn))
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
|
||||
var chunks []chunk
|
||||
for _, part := range splitLongSection(text) {
|
||||
part = strings.TrimSpace(part)
|
||||
if len(part) < 20 {
|
||||
continue
|
||||
}
|
||||
chunks = append(chunks, chunk{Text: part, Source: rawURL, Type: "url"})
|
||||
}
|
||||
|
||||
if len(chunks) == 0 {
|
||||
return 0, fmt.Errorf("kein verwertbarer Inhalt nach Aufteilung")
|
||||
}
|
||||
|
||||
if err := ingestChunks(ctx, embClient, pointsClient, chunks); err != nil {
|
||||
return 0, fmt.Errorf("Ingest fehlgeschlagen: %w", err)
|
||||
}
|
||||
return len(chunks), nil
|
||||
}
|
||||
|
||||
// extractHTMLText extrahiert sichtbaren Text aus einem HTML-Dokument.
|
||||
func extractHTMLText(r io.Reader) (string, error) {
|
||||
doc, err := html.Parse(r)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var sb strings.Builder
|
||||
extractTextNode(doc, &sb)
|
||||
// Mehrfach-Leerzeilen reduzieren
|
||||
lines := strings.Split(sb.String(), "\n")
|
||||
var cleaned []string
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line != "" {
|
||||
cleaned = append(cleaned, line)
|
||||
}
|
||||
}
|
||||
return strings.Join(cleaned, "\n"), nil
|
||||
}
|
||||
|
||||
// skipTags sind HTML-Elemente deren Inhalt nicht extrahiert wird.
|
||||
var skipTags = map[string]bool{
|
||||
"script": true, "style": true, "noscript": true,
|
||||
"head": true, "meta": true, "link": true,
|
||||
"nav": true, "footer": true, "header": true,
|
||||
}
|
||||
|
||||
func extractTextNode(n *html.Node, sb *strings.Builder) {
|
||||
if n.Type == html.TextNode {
|
||||
text := strings.TrimSpace(n.Data)
|
||||
if text != "" {
|
||||
sb.WriteString(text)
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
return
|
||||
}
|
||||
if n.Type == html.ElementNode && skipTags[n.Data] {
|
||||
return
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
extractTextNode(c, sb)
|
||||
}
|
||||
}
|
||||
108
internal/brain/knowledge.go
Normal file
108
internal/brain/knowledge.go
Normal file
@@ -0,0 +1,108 @@
|
||||
// knowledge.go – Listet und löscht Einträge in der Qdrant-Wissensdatenbank
|
||||
package brain
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
pb "github.com/qdrant/go-client/qdrant"
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
||||
"my-brain-importer/internal/config"
|
||||
)
|
||||
|
||||
// ListSources gibt alle eindeutigen Quellen in der Wissensdatenbank zurück.
|
||||
// Limit begrenzt die Anzahl der zu scrollenden Punkte (0 = Standard 1000).
|
||||
func ListSources(limit uint32) ([]string, error) {
|
||||
if limit == 0 {
|
||||
limit = 1000
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
|
||||
seen := map[string]bool{}
|
||||
var offset *pb.PointId
|
||||
|
||||
for {
|
||||
req := &pb.ScrollPoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
WithPayload: &pb.WithPayloadSelector{
|
||||
SelectorOptions: &pb.WithPayloadSelector_Include{
|
||||
Include: &pb.PayloadIncludeSelector{Fields: []string{"source"}},
|
||||
},
|
||||
},
|
||||
Limit: uint32Ptr(250),
|
||||
}
|
||||
if offset != nil {
|
||||
req.Offset = offset
|
||||
}
|
||||
|
||||
result, err := pointsClient.Scroll(ctx, req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Scroll fehlgeschlagen: %w", err)
|
||||
}
|
||||
|
||||
for _, pt := range result.Result {
|
||||
if src := pt.Payload["source"].GetStringValue(); src != "" {
|
||||
seen[src] = true
|
||||
}
|
||||
}
|
||||
|
||||
if result.NextPageOffset == nil || uint32(len(seen)) >= limit {
|
||||
break
|
||||
}
|
||||
offset = result.NextPageOffset
|
||||
}
|
||||
|
||||
sources := make([]string, 0, len(seen))
|
||||
for s := range seen {
|
||||
sources = append(sources, s)
|
||||
}
|
||||
sort.Strings(sources)
|
||||
return sources, nil
|
||||
}
|
||||
|
||||
// DeleteBySource löscht alle Punkte mit dem gegebenen Quellennamen aus Qdrant.
|
||||
// Gibt Anzahl gelöschter Punkte zurück (Qdrant liefert keine genaue Zahl — gibt 0 zurück wenn erfolgreich).
|
||||
func DeleteBySource(source string) error {
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
|
||||
_, err := pointsClient.Delete(ctx, &pb.DeletePoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
Points: &pb.PointsSelector{
|
||||
PointsSelectorOneOf: &pb.PointsSelector_Filter{
|
||||
Filter: &pb.Filter{
|
||||
Must: []*pb.Condition{
|
||||
{
|
||||
ConditionOneOf: &pb.Condition_Field{
|
||||
Field: &pb.FieldCondition{
|
||||
Key: "source",
|
||||
Match: &pb.Match{
|
||||
MatchValue: &pb.Match_Keyword{Keyword: source},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Wait: boolPtr(true),
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func uint32Ptr(v uint32) *uint32 { return &v }
|
||||
Reference in New Issue
Block a user