Email-Triage: Lernen aus IMAP-Ordnern, manuelle Korrektur, reichere Daten
- Automatisches Triage-Lernen aus Archiv-Ordnern im Nacht-Ingest: retention_days=0 (Archiv) → wichtig, retention_days>0 → unwichtig - Drei neue Discord-Commands: /email triage-history, triage-correct, triage-search - StoreDecision speichert jetzt Datum + Body-Zusammenfassung (max 200 Zeichen) - MIME-Multipart-Parsing mit PDF-Attachment-Extraktion (FetchWithBodyAndAttachments) - Deterministische IDs basierend auf Absender+Betreff (idempotente Upserts) - Rueckwaertskompatibles Parsing fuer alte Triage-Eintraege ohne Datum/Body Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -210,6 +210,30 @@ var (
|
||||
Name: "triage",
|
||||
Description: "Letzte 10 Emails klassifizieren und in Wichtig/Unwichtig verschieben",
|
||||
},
|
||||
{
|
||||
Type: discordgo.ApplicationCommandOptionSubCommand,
|
||||
Name: "triage-history",
|
||||
Description: "Letzte Triage-Entscheidungen anzeigen",
|
||||
Options: []*discordgo.ApplicationCommandOption{
|
||||
{Type: discordgo.ApplicationCommandOptionInteger, Name: "anzahl", Description: "Anzahl (Standard: 10)", Required: false},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: discordgo.ApplicationCommandOptionSubCommand,
|
||||
Name: "triage-correct",
|
||||
Description: "Triage-Entscheidung korrigieren (wichtig↔unwichtig umkehren)",
|
||||
Options: []*discordgo.ApplicationCommandOption{
|
||||
{Type: discordgo.ApplicationCommandOptionString, Name: "betreff", Description: "Email-Betreff (Teilstring reicht)", Required: true},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: discordgo.ApplicationCommandOptionSubCommand,
|
||||
Name: "triage-search",
|
||||
Description: "Ähnliche Triage-Entscheidungen suchen",
|
||||
Options: []*discordgo.ApplicationCommandOption{
|
||||
{Type: discordgo.ApplicationCommandOptionString, Name: "query", Description: "Suchbegriff", Required: true},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: discordgo.ApplicationCommandOptionSubCommand,
|
||||
Name: "move",
|
||||
@@ -570,8 +594,17 @@ func handleEmailCommand(s *discordgo.Session, i *discordgo.InteractionCreate) {
|
||||
}
|
||||
|
||||
args := []string{sub.Name}
|
||||
if sub.Name == agents.ActionEmailIngest && len(sub.Options) > 0 {
|
||||
args = append(args, sub.Options[0].StringValue())
|
||||
if len(sub.Options) > 0 {
|
||||
switch sub.Name {
|
||||
case agents.ActionEmailIngest:
|
||||
args = append(args, sub.Options[0].StringValue())
|
||||
case agents.ActionEmailTriageHistory:
|
||||
args = append(args, fmt.Sprintf("%d", sub.Options[0].IntValue()))
|
||||
case agents.ActionEmailTriageCorrect:
|
||||
args = append(args, sub.Options[0].StringValue())
|
||||
case agents.ActionEmailTriageSearch:
|
||||
args = append(args, sub.Options[0].StringValue())
|
||||
}
|
||||
}
|
||||
handleAgentResponse(s, i, func() agents.Response {
|
||||
return toolAgent.Handle(agents.Request{Action: agents.ActionEmail, Args: args})
|
||||
@@ -1215,14 +1248,34 @@ func nightlyIngest(channelID string) {
|
||||
}
|
||||
}
|
||||
|
||||
// Triage-Lernen: Entscheidungen aus Ordnerstruktur ableiten
|
||||
wichtig, unwichtig, learnErr := email.LearnFromFoldersAllAccounts()
|
||||
if learnErr != nil {
|
||||
slog.Error("Triage-Lernen Fehler", "fehler", learnErr)
|
||||
} else if wichtig+unwichtig > 0 {
|
||||
slog.Info("Triage-Lernen abgeschlossen", "wichtig", wichtig, "unwichtig", unwichtig)
|
||||
}
|
||||
|
||||
if channelID == "" {
|
||||
return
|
||||
}
|
||||
msg := ""
|
||||
if len(errs) > 0 {
|
||||
dg.ChannelMessageSend(channelID, fmt.Sprintf("⚠️ Nacht-Ingest: %d Emails importiert, %d Fehler:\n%s",
|
||||
total, len(errs), strings.Join(errs, "\n")))
|
||||
msg = fmt.Sprintf("⚠️ Nacht-Ingest: %d Emails importiert, %d Fehler:\n%s",
|
||||
total, len(errs), strings.Join(errs, "\n"))
|
||||
} else if total > 0 {
|
||||
dg.ChannelMessageSend(channelID, fmt.Sprintf("🗄️ Nacht-Ingest: %d Emails aus Archiv-Ordnern importiert.", total))
|
||||
msg = fmt.Sprintf("🗄️ Nacht-Ingest: %d Emails aus Archiv-Ordnern importiert.", total)
|
||||
}
|
||||
if wichtig+unwichtig > 0 {
|
||||
learnMsg := fmt.Sprintf("🧠 Triage-Lernen: %d wichtig, %d unwichtig gelernt.", wichtig, unwichtig)
|
||||
if msg != "" {
|
||||
msg += "\n" + learnMsg
|
||||
} else {
|
||||
msg = learnMsg
|
||||
}
|
||||
}
|
||||
if msg != "" {
|
||||
dg.ChannelMessageSend(channelID, msg)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,5 +32,8 @@ const (
|
||||
ActionEmailRemind = "remind"
|
||||
ActionEmailIngest = "ingest"
|
||||
ActionEmailMove = "move"
|
||||
ActionEmailTriage = "triage"
|
||||
ActionEmailTriage = "triage"
|
||||
ActionEmailTriageHistory = "triage-history"
|
||||
ActionEmailTriageCorrect = "triage-correct"
|
||||
ActionEmailTriageSearch = "triage-search"
|
||||
)
|
||||
|
||||
@@ -3,12 +3,14 @@ package tool
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"my-brain-importer/internal/agents"
|
||||
"my-brain-importer/internal/agents/tool/email"
|
||||
"my-brain-importer/internal/brain"
|
||||
"my-brain-importer/internal/config"
|
||||
"my-brain-importer/internal/triage"
|
||||
)
|
||||
|
||||
// Agent verteilt Tool-Anfragen an spezialisierte Sub-Agenten.
|
||||
@@ -50,8 +52,14 @@ func (a *Agent) handleEmail(req agents.Request) agents.Response {
|
||||
return a.handleEmailMove(req)
|
||||
case agents.ActionEmailTriage:
|
||||
return a.handleEmailTriage()
|
||||
case agents.ActionEmailTriageHistory:
|
||||
return a.handleTriageHistory(req)
|
||||
case agents.ActionEmailTriageCorrect:
|
||||
return a.handleTriageCorrect(req)
|
||||
case agents.ActionEmailTriageSearch:
|
||||
return a.handleTriageSearch(req)
|
||||
default:
|
||||
return agents.Response{Text: fmt.Sprintf("❌ Unbekannte Email-Aktion `%s`. Verfügbar: summary, unread, remind, ingest, move, triage", subAction)}
|
||||
return agents.Response{Text: fmt.Sprintf("❌ Unbekannte Email-Aktion `%s`. Verfügbar: summary, unread, remind, ingest, move, triage, triage-history, triage-correct, triage-search", subAction)}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -147,6 +155,68 @@ func (a *Agent) handleEmailTriage() agents.Response {
|
||||
return agents.Response{Text: "🗂️ **Email-Triage (letzte 10 Emails):**\n\n" + result}
|
||||
}
|
||||
|
||||
// handleTriageHistory zeigt die letzten N Triage-Entscheidungen.
|
||||
func (a *Agent) handleTriageHistory(req agents.Request) agents.Response {
|
||||
limit := uint32(10)
|
||||
if len(req.Args) > 1 && req.Args[1] != "" {
|
||||
if n, err := strconv.ParseUint(req.Args[1], 10, 32); err == nil && n > 0 {
|
||||
limit = uint32(n)
|
||||
}
|
||||
}
|
||||
|
||||
results, err := triage.ListRecent(limit)
|
||||
if err != nil {
|
||||
return agents.Response{Error: err, Text: fmt.Sprintf("❌ Triage-History fehlgeschlagen: %v", err)}
|
||||
}
|
||||
if len(results) == 0 {
|
||||
return agents.Response{Text: "📭 Keine Triage-Entscheidungen gespeichert."}
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
fmt.Fprintf(&sb, "🗂️ **Triage-History (%d Einträge):**\n\n", len(results))
|
||||
for i, r := range results {
|
||||
fmt.Fprintf(&sb, "**%d.** %s\n", i+1, r.Text)
|
||||
}
|
||||
return agents.Response{Text: sb.String()}
|
||||
}
|
||||
|
||||
// handleTriageCorrect korrigiert eine Triage-Entscheidung (wichtig↔unwichtig).
|
||||
func (a *Agent) handleTriageCorrect(req agents.Request) agents.Response {
|
||||
if len(req.Args) < 2 || req.Args[1] == "" {
|
||||
return agents.Response{Text: "❌ Betreff fehlt. Beispiel: `/email triage-correct Newsletter`"}
|
||||
}
|
||||
query := strings.Join(req.Args[1:], " ")
|
||||
|
||||
msg, err := triage.CorrectDecision(query)
|
||||
if err != nil {
|
||||
return agents.Response{Error: err, Text: fmt.Sprintf("❌ Korrektur fehlgeschlagen: %v", err)}
|
||||
}
|
||||
return agents.Response{Text: "✅ " + msg}
|
||||
}
|
||||
|
||||
// handleTriageSearch sucht ähnliche Triage-Entscheidungen.
|
||||
func (a *Agent) handleTriageSearch(req agents.Request) agents.Response {
|
||||
if len(req.Args) < 2 || req.Args[1] == "" {
|
||||
return agents.Response{Text: "❌ Suchbegriff fehlt. Beispiel: `/email triage-search Newsletter`"}
|
||||
}
|
||||
query := strings.Join(req.Args[1:], " ")
|
||||
|
||||
results, err := triage.SearchExtended(query, 10)
|
||||
if err != nil {
|
||||
return agents.Response{Error: err, Text: fmt.Sprintf("❌ Triage-Suche fehlgeschlagen: %v", err)}
|
||||
}
|
||||
if len(results) == 0 {
|
||||
return agents.Response{Text: "📭 Keine ähnlichen Triage-Entscheidungen gefunden."}
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
fmt.Fprintf(&sb, "🔍 **Triage-Suche** (query: `%s`, %d Treffer):\n\n", query, len(results))
|
||||
for i, r := range results {
|
||||
fmt.Fprintf(&sb, "**%d.** [%.0f%%] %s\n", i+1, r.Score*100, r.Text)
|
||||
}
|
||||
return agents.Response{Text: sb.String()}
|
||||
}
|
||||
|
||||
// ResolveArchiveFolder ist die exportierte Version von resolveArchiveFolder für den Discord-Layer.
|
||||
func ResolveArchiveFolder(name string) (imapFolder string, ok bool) {
|
||||
return resolveArchiveFolder(name)
|
||||
|
||||
@@ -2,16 +2,23 @@
|
||||
package email
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"mime"
|
||||
"mime/multipart"
|
||||
"mime/quotedprintable"
|
||||
"net/mail"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
imap "github.com/emersion/go-imap/v2"
|
||||
"github.com/emersion/go-imap/v2/imapclient"
|
||||
"github.com/ledongthuc/pdf"
|
||||
|
||||
"my-brain-importer/internal/config"
|
||||
)
|
||||
@@ -144,6 +151,36 @@ func (cl *Client) FetchRecent(n uint32) ([]Message, error) {
|
||||
return parseMessages(msgs), nil
|
||||
}
|
||||
|
||||
// FetchRecentFromFolder holt die letzten n Emails aus einem bestimmten IMAP-Ordner.
|
||||
func (cl *Client) FetchRecentFromFolder(folder string, n uint32) ([]Message, error) {
|
||||
if folder == "" {
|
||||
folder = "INBOX"
|
||||
}
|
||||
|
||||
selectData, err := cl.c.Select(folder, &imap.SelectOptions{ReadOnly: true}).Wait()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("IMAP select %s: %w", folder, err)
|
||||
}
|
||||
if selectData.NumMessages == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
start := uint32(1)
|
||||
if selectData.NumMessages > n {
|
||||
start = selectData.NumMessages - n + 1
|
||||
}
|
||||
|
||||
var seqSet imap.SeqSet
|
||||
seqSet.AddRange(start, selectData.NumMessages)
|
||||
|
||||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{Envelope: true}).Collect()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("IMAP fetch %s: %w", folder, err)
|
||||
}
|
||||
|
||||
return parseMessages(msgs), nil
|
||||
}
|
||||
|
||||
// FetchUnread holt ungelesene Emails (Envelope-Daten, kein Body).
|
||||
func (cl *Client) FetchUnread() ([]Message, error) {
|
||||
folder := cl.folder
|
||||
@@ -546,6 +583,212 @@ func parseMessage(msg *imapclient.FetchMessageBuffer) Message {
|
||||
return m
|
||||
}
|
||||
|
||||
// FetchWithBodyAndAttachments holt bis zu n Emails aus dem angegebenen Ordner mit Text-Body
|
||||
// und extrahiert Text aus PDF-Anhängen. Der kombinierte Text wird in MessageWithBody.Body gespeichert.
|
||||
// Nutzt stdlib mime/multipart — keine externen Abhängigkeiten außer dem bereits vorhandenen PDF-Parser.
|
||||
func (cl *Client) FetchWithBodyAndAttachments(folder string, n uint32) ([]MessageWithBody, error) {
|
||||
if folder == "" {
|
||||
folder = "INBOX"
|
||||
}
|
||||
|
||||
selectData, err := cl.c.Select(folder, &imap.SelectOptions{ReadOnly: true}).Wait()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("IMAP select %s: %w", folder, err)
|
||||
}
|
||||
if selectData.NumMessages == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
total := selectData.NumMessages
|
||||
start := uint32(1)
|
||||
if total > n {
|
||||
start = total - n + 1
|
||||
}
|
||||
|
||||
// Komplette RFC822-Nachricht fetchen (Header + Body + Attachments)
|
||||
fullMsgSec := &imap.FetchItemBodySection{}
|
||||
|
||||
var result []MessageWithBody
|
||||
batchSize := uint32(50)
|
||||
|
||||
for i := start; i <= total; i += batchSize {
|
||||
end := i + batchSize - 1
|
||||
if end > total {
|
||||
end = total
|
||||
}
|
||||
var seqSet imap.SeqSet
|
||||
seqSet.AddRange(i, end)
|
||||
|
||||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{
|
||||
Envelope: true,
|
||||
BodySection: []*imap.FetchItemBodySection{fullMsgSec},
|
||||
}).Collect()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("IMAP fetch batch %d-%d: %w", i, end, err)
|
||||
}
|
||||
|
||||
for _, msg := range msgs {
|
||||
if msg.Envelope == nil {
|
||||
continue
|
||||
}
|
||||
m := MessageWithBody{Message: parseMessage(msg)}
|
||||
|
||||
rawMsg := msg.FindBodySection(fullMsgSec)
|
||||
if rawMsg != nil {
|
||||
body, err := extractBodyAndAttachments(rawMsg)
|
||||
if err != nil {
|
||||
slog.Warn("[Email] MIME-Parsing fehlgeschlagen", "betreff", m.Subject, "fehler", err)
|
||||
} else {
|
||||
m.Body = body
|
||||
}
|
||||
}
|
||||
result = append(result, m)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// extractBodyAndAttachments parst eine rohe RFC822-Nachricht und gibt den kombinierten Text zurück.
|
||||
// Text/plain-Teile werden direkt übernommen, PDF-Anhänge werden in Text extrahiert.
|
||||
func extractBodyAndAttachments(rawMsg []byte) (string, error) {
|
||||
parsed, err := mail.ReadMessage(bytes.NewReader(rawMsg))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("mail parsen: %w", err)
|
||||
}
|
||||
|
||||
contentType := parsed.Header.Get("Content-Type")
|
||||
mediaType, params, err := mime.ParseMediaType(contentType)
|
||||
if err != nil {
|
||||
// Kein valider Content-Type — Body direkt lesen
|
||||
body, readErr := io.ReadAll(parsed.Body)
|
||||
if readErr != nil {
|
||||
return "", fmt.Errorf("body lesen: %w", readErr)
|
||||
}
|
||||
enc := strings.ToLower(parsed.Header.Get("Content-Transfer-Encoding"))
|
||||
return decodeBody(body, enc), nil
|
||||
}
|
||||
|
||||
var parts []string
|
||||
|
||||
if strings.HasPrefix(mediaType, "multipart/") {
|
||||
boundary := params["boundary"]
|
||||
mr := multipart.NewReader(parsed.Body, boundary)
|
||||
for {
|
||||
part, err := mr.NextPart()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
slog.Warn("[Email] Multipart-Teil fehlgeschlagen", "fehler", err)
|
||||
break
|
||||
}
|
||||
|
||||
partContentType := part.Header.Get("Content-Type")
|
||||
partMediaType, _, parseErr := mime.ParseMediaType(partContentType)
|
||||
if parseErr != nil {
|
||||
part.Close()
|
||||
continue
|
||||
}
|
||||
|
||||
enc := strings.ToLower(part.Header.Get("Content-Transfer-Encoding"))
|
||||
data, readErr := io.ReadAll(part)
|
||||
part.Close()
|
||||
if readErr != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
switch {
|
||||
case partMediaType == "text/plain":
|
||||
parts = append(parts, decodeBody(data, enc))
|
||||
case partMediaType == "application/pdf":
|
||||
pdfText, pdfErr := extractPDFTextFromBytes(data, enc)
|
||||
if pdfErr != nil {
|
||||
slog.Warn("[Email] PDF-Anhang konnte nicht gelesen werden", "fehler", pdfErr)
|
||||
} else if pdfText != "" {
|
||||
parts = append(parts, "[PDF-Anhang] "+pdfText)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Einfache (nicht-multipart) Nachricht
|
||||
body, readErr := io.ReadAll(parsed.Body)
|
||||
if readErr != nil {
|
||||
return "", fmt.Errorf("body lesen: %w", readErr)
|
||||
}
|
||||
enc := strings.ToLower(parsed.Header.Get("Content-Transfer-Encoding"))
|
||||
parts = append(parts, decodeBody(body, enc))
|
||||
}
|
||||
|
||||
combined := strings.TrimSpace(strings.Join(parts, "\n"))
|
||||
if len(combined) > 2000 {
|
||||
combined = combined[:2000]
|
||||
}
|
||||
return combined, nil
|
||||
}
|
||||
|
||||
// extractPDFTextFromBytes dekodiert die rohen Anhang-Bytes (ggf. base64) und extrahiert PDF-Text.
|
||||
func extractPDFTextFromBytes(data []byte, enc string) (string, error) {
|
||||
// PDF-Anhänge sind fast immer base64-kodiert
|
||||
var pdfBytes []byte
|
||||
switch enc {
|
||||
case "base64":
|
||||
cleaned := strings.ReplaceAll(strings.TrimSpace(string(data)), "\r\n", "")
|
||||
cleaned = strings.ReplaceAll(cleaned, "\n", "")
|
||||
decoded, err := base64.StdEncoding.DecodeString(cleaned)
|
||||
if err != nil {
|
||||
decoded, err = base64.RawStdEncoding.DecodeString(cleaned)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("base64 dekodieren: %w", err)
|
||||
}
|
||||
}
|
||||
pdfBytes = decoded
|
||||
default:
|
||||
pdfBytes = data
|
||||
}
|
||||
|
||||
// PDF in temporäre Datei schreiben, da die pdf-Bibliothek einen Datei-Pfad erwartet
|
||||
tmp, err := os.CreateTemp("", "email-pdf-*.pdf")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("temp-Datei anlegen: %w", err)
|
||||
}
|
||||
tmpPath := tmp.Name()
|
||||
defer os.Remove(tmpPath)
|
||||
|
||||
if _, err := tmp.Write(pdfBytes); err != nil {
|
||||
tmp.Close()
|
||||
return "", fmt.Errorf("temp-Datei schreiben: %w", err)
|
||||
}
|
||||
tmp.Close()
|
||||
|
||||
return extractPDFTextFromFile(tmpPath)
|
||||
}
|
||||
|
||||
// extractPDFTextFromFile liest alle Seiten einer PDF-Datei und gibt den Plain-Text zurück.
|
||||
// Dupliziert die Logik aus brain/ingest_pdf.go um Import-Zyklen zu vermeiden.
|
||||
func extractPDFTextFromFile(filePath string) (string, error) {
|
||||
f, r, err := pdf.Open(filePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var sb strings.Builder
|
||||
totalPages := r.NumPage()
|
||||
for pageNum := 1; pageNum <= totalPages; pageNum++ {
|
||||
page := r.Page(pageNum)
|
||||
if page.V.IsNull() {
|
||||
continue
|
||||
}
|
||||
text, err := page.GetPlainText(nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
sb.WriteString(text)
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
return strings.TrimSpace(sb.String()), nil
|
||||
}
|
||||
|
||||
func parseMessages(msgs []*imapclient.FetchMessageBuffer) []Message {
|
||||
result := make([]Message, 0, len(msgs))
|
||||
for _, msg := range msgs {
|
||||
|
||||
@@ -328,7 +328,8 @@ func ClassifyImportance(msg Message, model string) bool {
|
||||
)
|
||||
|
||||
// Entscheidung für künftiges Lernen in Qdrant speichern
|
||||
if err := triage.StoreDecision(msg.Subject, msg.From, isImportant); err != nil {
|
||||
// Body ist bei ClassifyImportance nicht verfügbar (nur Envelope), daher leer.
|
||||
if err := triage.StoreDecision(msg.Subject, msg.From, msg.Date, "", isImportant); err != nil {
|
||||
slog.Warn("[Triage] Entscheidung nicht gespeichert", "fehler", err)
|
||||
}
|
||||
|
||||
@@ -646,3 +647,64 @@ func fallbackList(msgs []Message) string {
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// LearnFromFolders scannt die Archiv-Ordner eines Accounts und speichert Triage-Entscheidungen in Qdrant.
|
||||
// Ordner mit retention_days == 0 (Archiv/dauerhaft) → wichtig, retention_days > 0 → unwichtig.
|
||||
// Pro Ordner werden maximal die letzten 50 Emails verarbeitet.
|
||||
func LearnFromFolders(acc config.EmailAccount) (wichtig, unwichtig int, err error) {
|
||||
if len(acc.ArchiveFolders) == 0 {
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
cl, err := ConnectAccount(acc)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("IMAP verbinden: %w", err)
|
||||
}
|
||||
defer cl.Close()
|
||||
|
||||
for _, af := range acc.ArchiveFolders {
|
||||
isImportant := af.RetentionDays == 0
|
||||
|
||||
msgs, err := cl.FetchWithBodyAndAttachments(af.IMAPFolder, 50)
|
||||
if err != nil {
|
||||
slog.Warn("[Triage-Learn] Ordner nicht lesbar", "ordner", af.IMAPFolder, "fehler", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, m := range msgs {
|
||||
if err := triage.StoreDecision(m.Subject, m.From, m.Date, m.Body, isImportant); err != nil {
|
||||
slog.Warn("[Triage-Learn] Speichern fehlgeschlagen", "betreff", m.Subject, "fehler", err)
|
||||
continue
|
||||
}
|
||||
if isImportant {
|
||||
wichtig++
|
||||
} else {
|
||||
unwichtig++
|
||||
}
|
||||
}
|
||||
|
||||
slog.Info("[Triage-Learn] Ordner verarbeitet",
|
||||
"account", accountLabel(acc),
|
||||
"ordner", af.IMAPFolder,
|
||||
"emails", len(msgs),
|
||||
"wichtig", isImportant,
|
||||
)
|
||||
}
|
||||
|
||||
return wichtig, unwichtig, nil
|
||||
}
|
||||
|
||||
// LearnFromFoldersAllAccounts führt LearnFromFolders für alle konfigurierten Accounts aus.
|
||||
func LearnFromFoldersAllAccounts() (wichtig, unwichtig int, err error) {
|
||||
accounts := config.AllEmailAccounts()
|
||||
for _, acc := range accounts {
|
||||
w, u, accErr := LearnFromFolders(acc)
|
||||
if accErr != nil {
|
||||
slog.Error("[Triage-Learn] Account fehlgeschlagen", "account", accountLabel(acc), "fehler", accErr)
|
||||
continue
|
||||
}
|
||||
wichtig += w
|
||||
unwichtig += u
|
||||
}
|
||||
return wichtig, unwichtig, nil
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
pb "github.com/qdrant/go-client/qdrant"
|
||||
openai "github.com/sashabaranov/go-openai"
|
||||
@@ -23,13 +24,31 @@ type TriageResult struct {
|
||||
}
|
||||
|
||||
// StoreDecision speichert eine Triage-Entscheidung in Qdrant.
|
||||
// Bei gleicher Email (deterministischer ID) wird die Entscheidung überschrieben.
|
||||
func StoreDecision(subject, from string, isImportant bool) error {
|
||||
// Bei gleicher Email (Von + Betreff als deterministischer Schlüssel) wird die Entscheidung überschrieben.
|
||||
// date und bodySummary sind optional (leerer String = weglassen).
|
||||
// bodySummary wird auf max. 200 Zeichen gekürzt.
|
||||
func StoreDecision(subject, from, date, bodySummary string, isImportant bool) error {
|
||||
label := "wichtig"
|
||||
if !isImportant {
|
||||
label = "unwichtig"
|
||||
}
|
||||
text := fmt.Sprintf("Email-Triage | Von: %s | Betreff: %s | Entscheidung: %s", from, subject, label)
|
||||
|
||||
// Kopfzeile immer vorhanden
|
||||
header := fmt.Sprintf("Email-Triage | Von: %s | Betreff: %s", from, subject)
|
||||
if date != "" {
|
||||
header += fmt.Sprintf(" | Datum: %s", date)
|
||||
}
|
||||
header += fmt.Sprintf(" | Entscheidung: %s", label)
|
||||
|
||||
// Body-Zusammenfassung anhängen wenn vorhanden
|
||||
text := header
|
||||
if bodySummary != "" {
|
||||
summary := bodySummary
|
||||
if len(summary) > 200 {
|
||||
summary = summary[:200]
|
||||
}
|
||||
text = header + "\nBody: " + summary
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
@@ -46,7 +65,9 @@ func StoreDecision(subject, from string, isImportant bool) error {
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
id := triageID(text)
|
||||
// ID basiert auf Von+Betreff — nicht auf dem vollständigen Text, damit
|
||||
// Re-Processing derselben Email den bestehenden Eintrag überschreibt.
|
||||
id := triageID(from + "|" + subject)
|
||||
wait := true
|
||||
_, err = pb.NewPointsClient(conn).Upsert(ctx, &pb.UpsertPoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
@@ -132,7 +153,236 @@ func SearchSimilar(query string) []TriageResult {
|
||||
return results
|
||||
}
|
||||
|
||||
func triageID(text string) string {
|
||||
hash := sha256.Sum256([]byte("email_triage:" + text))
|
||||
// ListRecent gibt die letzten N Triage-Entscheidungen aus Qdrant zurück.
|
||||
func ListRecent(limit uint32) ([]TriageResult, error) {
|
||||
if limit == 0 {
|
||||
limit = 10
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
|
||||
var results []TriageResult
|
||||
var offset *pb.PointId
|
||||
|
||||
for {
|
||||
req := &pb.ScrollPoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
WithPayload: &pb.WithPayloadSelector{
|
||||
SelectorOptions: &pb.WithPayloadSelector_Enable{Enable: true},
|
||||
},
|
||||
Filter: triageFilter(),
|
||||
Limit: &limit,
|
||||
}
|
||||
if offset != nil {
|
||||
req.Offset = offset
|
||||
}
|
||||
|
||||
result, err := pointsClient.Scroll(ctx, req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scroll fehlgeschlagen: %w", err)
|
||||
}
|
||||
|
||||
for _, pt := range result.Result {
|
||||
text := pt.Payload["text"].GetStringValue()
|
||||
if text != "" {
|
||||
results = append(results, TriageResult{Text: text})
|
||||
}
|
||||
}
|
||||
|
||||
if result.NextPageOffset == nil || uint32(len(results)) >= limit {
|
||||
break
|
||||
}
|
||||
offset = result.NextPageOffset
|
||||
}
|
||||
|
||||
if uint32(len(results)) > limit {
|
||||
results = results[:limit]
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// CorrectDecision sucht eine Triage-Entscheidung per Embedding-Suche und flippt sie (wichtig↔unwichtig).
|
||||
// Gibt eine Bestätigungsmeldung zurück.
|
||||
func CorrectDecision(query string) (string, error) {
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
embClient := config.NewEmbeddingClient()
|
||||
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
|
||||
Input: []string{query},
|
||||
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("embedding: %w", err)
|
||||
}
|
||||
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
threshold := float32(0.8)
|
||||
result, err := pb.NewPointsClient(conn).Search(ctx, &pb.SearchPoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
Vector: embResp.Data[0].Embedding,
|
||||
Limit: 1,
|
||||
WithPayload: &pb.WithPayloadSelector{
|
||||
SelectorOptions: &pb.WithPayloadSelector_Enable{Enable: true},
|
||||
},
|
||||
ScoreThreshold: &threshold,
|
||||
Filter: triageFilter(),
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("suche fehlgeschlagen: %w", err)
|
||||
}
|
||||
if len(result.Result) == 0 {
|
||||
return "", fmt.Errorf("keine passende Triage-Entscheidung gefunden (Score < 0.8)")
|
||||
}
|
||||
|
||||
hit := result.Result[0]
|
||||
text := hit.Payload["text"].GetStringValue()
|
||||
|
||||
// Text parsen: "Email-Triage | Von: X | Betreff: Y | Entscheidung: Z"
|
||||
from, subject, wasImportant, err := parseTriageText(text)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Alten Eintrag löschen — ID basiert jetzt auf from|subject
|
||||
oldID := triageID(from + "|" + subject)
|
||||
pointsClient := pb.NewPointsClient(conn)
|
||||
if err := deleteByUUID(ctx, pointsClient, oldID); err != nil {
|
||||
return "", fmt.Errorf("löschen fehlgeschlagen: %w", err)
|
||||
}
|
||||
|
||||
// Neuen Eintrag mit geflipptem Label speichern (Datum/Body aus altem Text nicht übertragen)
|
||||
newImportant := !wasImportant
|
||||
if err := StoreDecision(subject, from, "", "", newImportant); err != nil {
|
||||
return "", fmt.Errorf("speichern fehlgeschlagen: %w", err)
|
||||
}
|
||||
|
||||
oldLabel := "wichtig"
|
||||
newLabel := "unwichtig"
|
||||
if newImportant {
|
||||
oldLabel = "unwichtig"
|
||||
newLabel = "wichtig"
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf("Korrigiert: '%s' von %s → %s (vorher: %s)", subject, from, newLabel, oldLabel)
|
||||
slog.Info("[Triage] Entscheidung korrigiert", "betreff", subject, "von", from, "neu", newLabel)
|
||||
return msg, nil
|
||||
}
|
||||
|
||||
// SearchExtended sucht ähnliche Triage-Entscheidungen mit niedrigerem Threshold und konfigurierbarem Limit.
|
||||
func SearchExtended(query string, limit uint64) ([]TriageResult, error) {
|
||||
ctx := context.Background()
|
||||
ctx = metadata.AppendToOutgoingContext(ctx, "api-key", config.Cfg.Qdrant.APIKey)
|
||||
|
||||
embClient := config.NewEmbeddingClient()
|
||||
embResp, err := embClient.CreateEmbeddings(ctx, openai.EmbeddingRequest{
|
||||
Input: []string{query},
|
||||
Model: openai.EmbeddingModel(config.Cfg.Embedding.Model),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("embedding: %w", err)
|
||||
}
|
||||
|
||||
conn := config.NewQdrantConn()
|
||||
defer conn.Close()
|
||||
|
||||
threshold := float32(0.5)
|
||||
result, err := pb.NewPointsClient(conn).Search(ctx, &pb.SearchPoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
Vector: embResp.Data[0].Embedding,
|
||||
Limit: limit,
|
||||
WithPayload: &pb.WithPayloadSelector{
|
||||
SelectorOptions: &pb.WithPayloadSelector_Enable{Enable: true},
|
||||
},
|
||||
ScoreThreshold: &threshold,
|
||||
Filter: triageFilter(),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("suche fehlgeschlagen: %w", err)
|
||||
}
|
||||
|
||||
var results []TriageResult
|
||||
for _, hit := range result.Result {
|
||||
text := hit.Payload["text"].GetStringValue()
|
||||
if text != "" {
|
||||
results = append(results, TriageResult{Text: text, Score: hit.Score})
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// triageFilter gibt den Standard-Filter für Triage-Einträge zurück.
|
||||
func triageFilter() *pb.Filter {
|
||||
return &pb.Filter{
|
||||
Must: []*pb.Condition{{
|
||||
ConditionOneOf: &pb.Condition_Field{
|
||||
Field: &pb.FieldCondition{
|
||||
Key: "type",
|
||||
Match: &pb.Match{
|
||||
MatchValue: &pb.Match_Keyword{Keyword: "email_triage"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}},
|
||||
}
|
||||
}
|
||||
|
||||
// parseTriageText extrahiert Von, Betreff und Entscheidung aus dem gespeicherten Text.
|
||||
// Unterstützt sowohl das alte Format (4 Felder) als auch das neue Format mit optionalem Datum und Body.
|
||||
// Altes Format: "Email-Triage | Von: X | Betreff: Y | Entscheidung: Z"
|
||||
// Neues Format: "Email-Triage | Von: X | Betreff: Y | Datum: D | Entscheidung: Z\nBody: ..."
|
||||
func parseTriageText(text string) (from, subject string, isImportant bool, err error) {
|
||||
// Body-Zeile abtrennen – nur die Header-Zeile parsen
|
||||
headerLine := text
|
||||
if idx := strings.Index(text, "\nBody:"); idx >= 0 {
|
||||
headerLine = text[:idx]
|
||||
}
|
||||
|
||||
parts := strings.Split(headerLine, " | ")
|
||||
if len(parts) < 4 {
|
||||
return "", "", false, fmt.Errorf("ungültiges Triage-Format: %s", text)
|
||||
}
|
||||
from = strings.TrimPrefix(parts[1], "Von: ")
|
||||
subject = strings.TrimPrefix(parts[2], "Betreff: ")
|
||||
|
||||
// Letztes Feld enthält immer "Entscheidung: X", egal ob Datum dazwischen steht
|
||||
lastPart := parts[len(parts)-1]
|
||||
decision := strings.TrimPrefix(lastPart, "Entscheidung: ")
|
||||
isImportant = !strings.Contains(decision, "unwichtig")
|
||||
return from, subject, isImportant, nil
|
||||
}
|
||||
|
||||
// deleteByUUID löscht einen einzelnen Punkt aus Qdrant anhand seiner UUID.
|
||||
func deleteByUUID(ctx context.Context, client pb.PointsClient, uuid string) error {
|
||||
wait := true
|
||||
_, err := client.Delete(ctx, &pb.DeletePoints{
|
||||
CollectionName: config.Cfg.Qdrant.Collection,
|
||||
Points: &pb.PointsSelector{
|
||||
PointsSelectorOneOf: &pb.PointsSelector_Points{
|
||||
Points: &pb.PointsIdsList{
|
||||
Ids: []*pb.PointId{{
|
||||
PointIdOptions: &pb.PointId_Uuid{Uuid: uuid},
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
Wait: &wait,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
// triageID erzeugt eine deterministische UUID aus einem Schlüssel.
|
||||
// Für Email-Triage sollte der Schlüssel "from|subject" sein, damit
|
||||
// Re-Processing derselben Email (mit ggf. anderem Body/Datum) upsert statt insert auslöst.
|
||||
func triageID(key string) string {
|
||||
hash := sha256.Sum256([]byte("email_triage:" + key))
|
||||
return hex.EncodeToString(hash[:16])
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user