- Automatisches Triage-Lernen aus Archiv-Ordnern im Nacht-Ingest: retention_days=0 (Archiv) → wichtig, retention_days>0 → unwichtig - Drei neue Discord-Commands: /email triage-history, triage-correct, triage-search - StoreDecision speichert jetzt Datum + Body-Zusammenfassung (max 200 Zeichen) - MIME-Multipart-Parsing mit PDF-Attachment-Extraktion (FetchWithBodyAndAttachments) - Deterministische IDs basierend auf Absender+Betreff (idempotente Upserts) - Rueckwaertskompatibles Parsing fuer alte Triage-Eintraege ohne Datum/Body Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
814 lines
21 KiB
Go
814 lines
21 KiB
Go
// email/client.go – IMAP-Client für Email-Abfragen
|
||
package email
|
||
|
||
import (
|
||
"bytes"
|
||
"crypto/tls"
|
||
"encoding/base64"
|
||
"fmt"
|
||
"io"
|
||
"log/slog"
|
||
"mime"
|
||
"mime/multipart"
|
||
"mime/quotedprintable"
|
||
"net/mail"
|
||
"os"
|
||
"strings"
|
||
"time"
|
||
|
||
imap "github.com/emersion/go-imap/v2"
|
||
"github.com/emersion/go-imap/v2/imapclient"
|
||
"github.com/ledongthuc/pdf"
|
||
|
||
"my-brain-importer/internal/config"
|
||
)
|
||
|
||
// Message repräsentiert eine Email (ohne Body für schnelle Übersichten).
|
||
type Message struct {
|
||
Subject string
|
||
From string
|
||
Date string
|
||
}
|
||
|
||
// SelectMessage koppelt eine Message mit ihrer IMAP-Sequenznummer für UI-Zwecke.
|
||
type SelectMessage struct {
|
||
Message
|
||
SeqNum uint32
|
||
Unread bool // true = \Seen flag nicht gesetzt
|
||
}
|
||
|
||
// MessageWithBody repräsentiert eine Email mit Text-Inhalt (für Datenbankimport).
|
||
type MessageWithBody struct {
|
||
Message
|
||
Body string
|
||
}
|
||
|
||
// Client wraps die IMAP-Verbindung.
|
||
type Client struct {
|
||
c *imapclient.Client
|
||
folder string // INBOX-Ordner (leer = "INBOX")
|
||
}
|
||
|
||
// Connect öffnet eine IMAP-Verbindung mit dem Legacy-Email-Block aus der Config.
|
||
func Connect() (*Client, error) {
|
||
cfg := config.Cfg.Email
|
||
acc := config.EmailAccount{
|
||
Host: cfg.Host,
|
||
Port: cfg.Port,
|
||
User: cfg.User,
|
||
Password: cfg.Password,
|
||
TLS: cfg.TLS,
|
||
StartTLS: cfg.StartTLS,
|
||
Folder: cfg.Folder,
|
||
}
|
||
return ConnectAccount(acc)
|
||
}
|
||
|
||
// ConnectAccount öffnet eine IMAP-Verbindung für einen bestimmten EmailAccount.
|
||
func ConnectAccount(acc config.EmailAccount) (*Client, error) {
|
||
addr := fmt.Sprintf("%s:%d", acc.Host, acc.Port)
|
||
|
||
var (
|
||
c *imapclient.Client
|
||
err error
|
||
)
|
||
|
||
switch {
|
||
case acc.TLS:
|
||
tlsCfg := &tls.Config{ServerName: acc.Host}
|
||
c, err = imapclient.DialTLS(addr, &imapclient.Options{TLSConfig: tlsCfg})
|
||
case acc.StartTLS:
|
||
tlsCfg := &tls.Config{ServerName: acc.Host}
|
||
c, err = imapclient.DialStartTLS(addr, &imapclient.Options{TLSConfig: tlsCfg})
|
||
default:
|
||
c, err = imapclient.DialInsecure(addr, nil)
|
||
}
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP verbinden: %w", err)
|
||
}
|
||
|
||
if err := c.Login(acc.User, acc.Password).Wait(); err != nil {
|
||
c.Close()
|
||
return nil, fmt.Errorf("IMAP login: %w", err)
|
||
}
|
||
|
||
return &Client{c: c, folder: acc.Folder}, nil
|
||
}
|
||
|
||
// Close schließt die Verbindung.
|
||
func (cl *Client) Close() {
|
||
cl.c.Logout().Wait()
|
||
cl.c.Close()
|
||
}
|
||
|
||
// EnsureFolder legt einen IMAP-Ordner an falls er nicht existiert.
|
||
// Strato-kompatibel: ignoriert alle "already exists"-Varianten.
|
||
func (cl *Client) EnsureFolder(folder string) error {
|
||
err := cl.c.Create(folder, nil).Wait()
|
||
if err == nil {
|
||
slog.Info("IMAP: Ordner angelegt", "ordner", folder)
|
||
return nil
|
||
}
|
||
errLower := strings.ToLower(err.Error())
|
||
if strings.Contains(errLower, "already exists") ||
|
||
strings.Contains(errLower, "alreadyexists") ||
|
||
strings.Contains(errLower, "mailbox exists") ||
|
||
strings.Contains(errLower, "exists") {
|
||
return nil // Ordner existiert bereits — kein Fehler
|
||
}
|
||
slog.Error("IMAP: Ordner anlegen fehlgeschlagen", "ordner", folder, "fehler", err)
|
||
return fmt.Errorf("IMAP create folder %s: %w", folder, err)
|
||
}
|
||
|
||
// FetchRecent holt die letzten n Emails (Envelope-Daten, kein Body).
|
||
func (cl *Client) FetchRecent(n uint32) ([]Message, error) {
|
||
folder := cl.folder
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
selectData, err := cl.c.Select(folder, &imap.SelectOptions{ReadOnly: true}).Wait()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP select: %w", err)
|
||
}
|
||
if selectData.NumMessages == 0 {
|
||
return nil, nil
|
||
}
|
||
|
||
start := uint32(1)
|
||
if selectData.NumMessages > n {
|
||
start = selectData.NumMessages - n + 1
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddRange(start, selectData.NumMessages)
|
||
|
||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{Envelope: true}).Collect()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP fetch: %w", err)
|
||
}
|
||
|
||
return parseMessages(msgs), nil
|
||
}
|
||
|
||
// FetchRecentFromFolder holt die letzten n Emails aus einem bestimmten IMAP-Ordner.
|
||
func (cl *Client) FetchRecentFromFolder(folder string, n uint32) ([]Message, error) {
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
selectData, err := cl.c.Select(folder, &imap.SelectOptions{ReadOnly: true}).Wait()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP select %s: %w", folder, err)
|
||
}
|
||
if selectData.NumMessages == 0 {
|
||
return nil, nil
|
||
}
|
||
|
||
start := uint32(1)
|
||
if selectData.NumMessages > n {
|
||
start = selectData.NumMessages - n + 1
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddRange(start, selectData.NumMessages)
|
||
|
||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{Envelope: true}).Collect()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP fetch %s: %w", folder, err)
|
||
}
|
||
|
||
return parseMessages(msgs), nil
|
||
}
|
||
|
||
// FetchUnread holt ungelesene Emails (Envelope-Daten, kein Body).
|
||
func (cl *Client) FetchUnread() ([]Message, error) {
|
||
folder := cl.folder
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
if _, err := cl.c.Select(folder, &imap.SelectOptions{ReadOnly: true}).Wait(); err != nil {
|
||
return nil, fmt.Errorf("IMAP select: %w", err)
|
||
}
|
||
|
||
searchData, err := cl.c.Search(&imap.SearchCriteria{
|
||
NotFlag: []imap.Flag{imap.FlagSeen},
|
||
}, nil).Wait()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP search: %w", err)
|
||
}
|
||
|
||
seqNums := searchData.AllSeqNums()
|
||
if len(seqNums) == 0 {
|
||
return nil, nil
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddNum(seqNums...)
|
||
|
||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{Envelope: true}).Collect()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP fetch: %w", err)
|
||
}
|
||
|
||
return parseMessages(msgs), nil
|
||
}
|
||
|
||
// FetchUnreadSeqNums holt ungelesene Emails und gibt zusätzlich die Sequenznummern zurück.
|
||
// Selektiert den Ordner im Lese-Schreib-Modus (für nachfolgendes Verschieben).
|
||
func (cl *Client) FetchUnreadSeqNums() ([]Message, []uint32, error) {
|
||
folder := cl.folder
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
if _, err := cl.c.Select(folder, nil).Wait(); err != nil {
|
||
return nil, nil, fmt.Errorf("IMAP select: %w", err)
|
||
}
|
||
|
||
searchData, err := cl.c.Search(&imap.SearchCriteria{
|
||
NotFlag: []imap.Flag{imap.FlagSeen},
|
||
}, nil).Wait()
|
||
if err != nil {
|
||
return nil, nil, fmt.Errorf("IMAP search: %w", err)
|
||
}
|
||
|
||
seqNums := searchData.AllSeqNums()
|
||
if len(seqNums) == 0 {
|
||
return nil, nil, nil
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddNum(seqNums...)
|
||
|
||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{Envelope: true}).Collect()
|
||
if err != nil {
|
||
return nil, nil, fmt.Errorf("IMAP fetch: %w", err)
|
||
}
|
||
|
||
return parseMessages(msgs), seqNums, nil
|
||
}
|
||
|
||
// MoveMessages verschiebt Nachrichten in einen anderen IMAP-Ordner.
|
||
// Der Ordner muss im Lese-Schreib-Modus selektiert sein (via FetchUnreadSeqNums).
|
||
func (cl *Client) MoveMessages(seqNums []uint32, destFolder string) error {
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddNum(seqNums...)
|
||
if _, err := cl.c.Move(seqSet, destFolder).Wait(); err != nil {
|
||
return fmt.Errorf("IMAP move: %w", err)
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// FetchUnreadForSelect gibt ungelesene Emails mit ihren Sequenznummern zurück.
|
||
// Selektiert den Ordner im Lese-Schreib-Modus (für nachfolgendes Verschieben).
|
||
func (cl *Client) FetchUnreadForSelect() ([]SelectMessage, error) {
|
||
folder := cl.folder
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
if _, err := cl.c.Select(folder, nil).Wait(); err != nil {
|
||
return nil, fmt.Errorf("IMAP select: %w", err)
|
||
}
|
||
|
||
searchData, err := cl.c.Search(&imap.SearchCriteria{
|
||
NotFlag: []imap.Flag{imap.FlagSeen},
|
||
}, nil).Wait()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP search: %w", err)
|
||
}
|
||
|
||
seqNums := searchData.AllSeqNums()
|
||
if len(seqNums) == 0 {
|
||
return nil, nil
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddNum(seqNums...)
|
||
|
||
rawMsgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{Envelope: true}).Collect()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP fetch: %w", err)
|
||
}
|
||
|
||
seqToMsg := make(map[uint32]*imapclient.FetchMessageBuffer, len(rawMsgs))
|
||
for _, m := range rawMsgs {
|
||
if m.Envelope != nil {
|
||
seqToMsg[m.SeqNum] = m
|
||
}
|
||
}
|
||
|
||
result := make([]SelectMessage, 0, len(seqNums))
|
||
for _, sn := range seqNums {
|
||
m, ok := seqToMsg[sn]
|
||
if !ok {
|
||
continue
|
||
}
|
||
result = append(result, SelectMessage{
|
||
Message: parseMessage(m),
|
||
SeqNum: sn,
|
||
Unread: true,
|
||
})
|
||
}
|
||
return result, nil
|
||
}
|
||
|
||
// FetchRecentForSelect gibt die letzten n Emails mit Sequenznummern und Unread-Status zurück.
|
||
// Selektiert den Ordner im Lese-Schreib-Modus (für nachfolgendes Verschieben).
|
||
func (cl *Client) FetchRecentForSelect(n uint32) ([]SelectMessage, error) {
|
||
folder := cl.folder
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
selectData, err := cl.c.Select(folder, nil).Wait()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP select: %w", err)
|
||
}
|
||
if selectData.NumMessages == 0 {
|
||
return nil, nil
|
||
}
|
||
|
||
start := uint32(1)
|
||
if selectData.NumMessages > n {
|
||
start = selectData.NumMessages - n + 1
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddRange(start, selectData.NumMessages)
|
||
|
||
rawMsgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{Envelope: true, Flags: true}).Collect()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP fetch: %w", err)
|
||
}
|
||
|
||
result := make([]SelectMessage, 0, len(rawMsgs))
|
||
for _, m := range rawMsgs {
|
||
if m.Envelope == nil {
|
||
continue
|
||
}
|
||
unread := true
|
||
for _, f := range m.Flags {
|
||
if f == imap.FlagSeen {
|
||
unread = false
|
||
break
|
||
}
|
||
}
|
||
result = append(result, SelectMessage{
|
||
Message: parseMessage(m),
|
||
SeqNum: m.SeqNum,
|
||
Unread: unread,
|
||
})
|
||
}
|
||
return result, nil
|
||
}
|
||
|
||
// MoveOldMessages verschiebt alle Emails im Ordner, die älter als olderThanDays Tage sind, nach destFolder.
|
||
// Gibt die Anzahl verschobener Nachrichten zurück. olderThanDays <= 0 ist ein No-op.
|
||
func (cl *Client) MoveOldMessages(folder, destFolder string, olderThanDays int) (int, error) {
|
||
if olderThanDays <= 0 {
|
||
return 0, nil
|
||
}
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
cutoff := time.Now().AddDate(0, 0, -olderThanDays).Truncate(24 * time.Hour)
|
||
|
||
if _, err := cl.c.Select(folder, nil).Wait(); err != nil {
|
||
return 0, fmt.Errorf("IMAP select %s: %w", folder, err)
|
||
}
|
||
|
||
searchData, err := cl.c.Search(&imap.SearchCriteria{Before: cutoff}, nil).Wait()
|
||
if err != nil {
|
||
return 0, fmt.Errorf("IMAP search: %w", err)
|
||
}
|
||
|
||
seqNums := searchData.AllSeqNums()
|
||
if len(seqNums) == 0 {
|
||
return 0, nil
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddNum(seqNums...)
|
||
|
||
if _, err := cl.c.Move(seqSet, destFolder).Wait(); err != nil {
|
||
return 0, fmt.Errorf("IMAP move: %w", err)
|
||
}
|
||
|
||
return len(seqNums), nil
|
||
}
|
||
|
||
// MoveSpecificMessages selektiert den Inbox-Ordner und verschiebt die angegebenen Sequenznummern.
|
||
func (cl *Client) MoveSpecificMessages(seqNums []uint32, destFolder string) error {
|
||
folder := cl.folder
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
if _, err := cl.c.Select(folder, nil).Wait(); err != nil {
|
||
return fmt.Errorf("IMAP select: %w", err)
|
||
}
|
||
return cl.MoveMessages(seqNums, destFolder)
|
||
}
|
||
|
||
// CleanupOldEmails löscht Emails im Ordner, die älter als retentionDays sind.
|
||
// Gibt die Anzahl gelöschter Nachrichten zurück. retentionDays <= 0 ist ein No-op.
|
||
func (cl *Client) CleanupOldEmails(folder string, retentionDays int) (int, error) {
|
||
if retentionDays <= 0 {
|
||
return 0, nil
|
||
}
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
cutoff := time.Now().AddDate(0, 0, -retentionDays).Truncate(24 * time.Hour)
|
||
|
||
if _, err := cl.c.Select(folder, nil).Wait(); err != nil {
|
||
return 0, fmt.Errorf("IMAP select %s: %w", folder, err)
|
||
}
|
||
|
||
searchData, err := cl.c.Search(&imap.SearchCriteria{Before: cutoff}, nil).Wait()
|
||
if err != nil {
|
||
return 0, fmt.Errorf("IMAP search: %w", err)
|
||
}
|
||
|
||
seqNums := searchData.AllSeqNums()
|
||
if len(seqNums) == 0 {
|
||
return 0, nil
|
||
}
|
||
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddNum(seqNums...)
|
||
|
||
storeFlags := &imap.StoreFlags{
|
||
Op: imap.StoreFlagsAdd,
|
||
Silent: true,
|
||
Flags: []imap.Flag{imap.FlagDeleted},
|
||
}
|
||
if _, err := cl.c.Store(seqSet, storeFlags, nil).Collect(); err != nil {
|
||
return 0, fmt.Errorf("IMAP store flags: %w", err)
|
||
}
|
||
|
||
if _, err := cl.c.Expunge().Collect(); err != nil {
|
||
return 0, fmt.Errorf("IMAP expunge: %w", err)
|
||
}
|
||
|
||
return len(seqNums), nil
|
||
}
|
||
|
||
// FetchWithBody holt bis zu n Emails aus dem angegebenen Ordner mit Text-Body.
|
||
// Emails werden in Batches von 50 gefetcht um den IMAP-Server nicht zu überlasten.
|
||
func (cl *Client) FetchWithBody(folder string, n uint32) ([]MessageWithBody, error) {
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
selectData, err := cl.c.Select(folder, &imap.SelectOptions{ReadOnly: true}).Wait()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP select %s: %w", folder, err)
|
||
}
|
||
if selectData.NumMessages == 0 {
|
||
return nil, nil
|
||
}
|
||
|
||
// Letzte n Nachrichten
|
||
total := selectData.NumMessages
|
||
start := uint32(1)
|
||
if total > n {
|
||
start = total - n + 1
|
||
}
|
||
|
||
bodySec := &imap.FetchItemBodySection{Specifier: imap.PartSpecifierText}
|
||
hdrSec := &imap.FetchItemBodySection{Specifier: imap.PartSpecifierHeader}
|
||
|
||
var result []MessageWithBody
|
||
batchSize := uint32(50)
|
||
|
||
for i := start; i <= total; i += batchSize {
|
||
end := i + batchSize - 1
|
||
if end > total {
|
||
end = total
|
||
}
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddRange(i, end)
|
||
|
||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{
|
||
Envelope: true,
|
||
BodySection: []*imap.FetchItemBodySection{bodySec, hdrSec},
|
||
}).Collect()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP fetch batch %d-%d: %w", i, end, err)
|
||
}
|
||
|
||
for _, msg := range msgs {
|
||
if msg.Envelope == nil {
|
||
continue
|
||
}
|
||
m := MessageWithBody{Message: parseMessage(msg)}
|
||
|
||
// Content-Transfer-Encoding aus Header lesen
|
||
enc := ""
|
||
if hdr := msg.FindBodySection(hdrSec); hdr != nil {
|
||
for _, line := range strings.Split(string(hdr), "\n") {
|
||
if strings.HasPrefix(strings.ToLower(line), "content-transfer-encoding:") {
|
||
enc = strings.TrimSpace(strings.ToLower(strings.SplitN(line, ":", 2)[1]))
|
||
}
|
||
}
|
||
}
|
||
|
||
if body := msg.FindBodySection(bodySec); body != nil {
|
||
m.Body = decodeBody(body, enc)
|
||
}
|
||
result = append(result, m)
|
||
}
|
||
}
|
||
return result, nil
|
||
}
|
||
|
||
// decodeBody dekodiert einen Email-Body je nach Content-Transfer-Encoding.
|
||
func decodeBody(raw []byte, enc string) string {
|
||
var text string
|
||
switch enc {
|
||
case "base64":
|
||
cleaned := strings.ReplaceAll(strings.TrimSpace(string(raw)), "\r\n", "")
|
||
if decoded, err := base64.StdEncoding.DecodeString(cleaned); err == nil {
|
||
text = string(decoded)
|
||
} else if decoded, err := base64.RawStdEncoding.DecodeString(cleaned); err == nil {
|
||
text = string(decoded)
|
||
} else {
|
||
text = string(raw) // Fallback: roh
|
||
}
|
||
case "quoted-printable":
|
||
r := quotedprintable.NewReader(strings.NewReader(string(raw)))
|
||
if buf := new(strings.Builder); true {
|
||
buf.Grow(len(raw))
|
||
tmp := make([]byte, 4096)
|
||
for {
|
||
n, err := r.Read(tmp)
|
||
buf.Write(tmp[:n])
|
||
if err != nil {
|
||
break
|
||
}
|
||
}
|
||
text = buf.String()
|
||
}
|
||
default:
|
||
text = string(raw)
|
||
}
|
||
|
||
// Kürzen auf max 2000 Zeichen
|
||
text = strings.TrimSpace(text)
|
||
if len(text) > 2000 {
|
||
text = text[:2000]
|
||
}
|
||
return text
|
||
}
|
||
|
||
// parseMessage extrahiert eine Message aus einem FetchMessageBuffer.
|
||
func parseMessage(msg *imapclient.FetchMessageBuffer) Message {
|
||
m := Message{
|
||
Subject: msg.Envelope.Subject,
|
||
Date: msg.Envelope.Date.Format("2006-01-02 15:04"),
|
||
}
|
||
if len(msg.Envelope.From) > 0 {
|
||
addr := msg.Envelope.From[0]
|
||
if addr.Name != "" {
|
||
m.From = fmt.Sprintf("%s <%s@%s>", addr.Name, addr.Mailbox, addr.Host)
|
||
} else {
|
||
m.From = fmt.Sprintf("%s@%s", addr.Mailbox, addr.Host)
|
||
}
|
||
}
|
||
return m
|
||
}
|
||
|
||
// FetchWithBodyAndAttachments holt bis zu n Emails aus dem angegebenen Ordner mit Text-Body
|
||
// und extrahiert Text aus PDF-Anhängen. Der kombinierte Text wird in MessageWithBody.Body gespeichert.
|
||
// Nutzt stdlib mime/multipart — keine externen Abhängigkeiten außer dem bereits vorhandenen PDF-Parser.
|
||
func (cl *Client) FetchWithBodyAndAttachments(folder string, n uint32) ([]MessageWithBody, error) {
|
||
if folder == "" {
|
||
folder = "INBOX"
|
||
}
|
||
|
||
selectData, err := cl.c.Select(folder, &imap.SelectOptions{ReadOnly: true}).Wait()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP select %s: %w", folder, err)
|
||
}
|
||
if selectData.NumMessages == 0 {
|
||
return nil, nil
|
||
}
|
||
|
||
total := selectData.NumMessages
|
||
start := uint32(1)
|
||
if total > n {
|
||
start = total - n + 1
|
||
}
|
||
|
||
// Komplette RFC822-Nachricht fetchen (Header + Body + Attachments)
|
||
fullMsgSec := &imap.FetchItemBodySection{}
|
||
|
||
var result []MessageWithBody
|
||
batchSize := uint32(50)
|
||
|
||
for i := start; i <= total; i += batchSize {
|
||
end := i + batchSize - 1
|
||
if end > total {
|
||
end = total
|
||
}
|
||
var seqSet imap.SeqSet
|
||
seqSet.AddRange(i, end)
|
||
|
||
msgs, err := cl.c.Fetch(seqSet, &imap.FetchOptions{
|
||
Envelope: true,
|
||
BodySection: []*imap.FetchItemBodySection{fullMsgSec},
|
||
}).Collect()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("IMAP fetch batch %d-%d: %w", i, end, err)
|
||
}
|
||
|
||
for _, msg := range msgs {
|
||
if msg.Envelope == nil {
|
||
continue
|
||
}
|
||
m := MessageWithBody{Message: parseMessage(msg)}
|
||
|
||
rawMsg := msg.FindBodySection(fullMsgSec)
|
||
if rawMsg != nil {
|
||
body, err := extractBodyAndAttachments(rawMsg)
|
||
if err != nil {
|
||
slog.Warn("[Email] MIME-Parsing fehlgeschlagen", "betreff", m.Subject, "fehler", err)
|
||
} else {
|
||
m.Body = body
|
||
}
|
||
}
|
||
result = append(result, m)
|
||
}
|
||
}
|
||
return result, nil
|
||
}
|
||
|
||
// extractBodyAndAttachments parst eine rohe RFC822-Nachricht und gibt den kombinierten Text zurück.
|
||
// Text/plain-Teile werden direkt übernommen, PDF-Anhänge werden in Text extrahiert.
|
||
func extractBodyAndAttachments(rawMsg []byte) (string, error) {
|
||
parsed, err := mail.ReadMessage(bytes.NewReader(rawMsg))
|
||
if err != nil {
|
||
return "", fmt.Errorf("mail parsen: %w", err)
|
||
}
|
||
|
||
contentType := parsed.Header.Get("Content-Type")
|
||
mediaType, params, err := mime.ParseMediaType(contentType)
|
||
if err != nil {
|
||
// Kein valider Content-Type — Body direkt lesen
|
||
body, readErr := io.ReadAll(parsed.Body)
|
||
if readErr != nil {
|
||
return "", fmt.Errorf("body lesen: %w", readErr)
|
||
}
|
||
enc := strings.ToLower(parsed.Header.Get("Content-Transfer-Encoding"))
|
||
return decodeBody(body, enc), nil
|
||
}
|
||
|
||
var parts []string
|
||
|
||
if strings.HasPrefix(mediaType, "multipart/") {
|
||
boundary := params["boundary"]
|
||
mr := multipart.NewReader(parsed.Body, boundary)
|
||
for {
|
||
part, err := mr.NextPart()
|
||
if err == io.EOF {
|
||
break
|
||
}
|
||
if err != nil {
|
||
slog.Warn("[Email] Multipart-Teil fehlgeschlagen", "fehler", err)
|
||
break
|
||
}
|
||
|
||
partContentType := part.Header.Get("Content-Type")
|
||
partMediaType, _, parseErr := mime.ParseMediaType(partContentType)
|
||
if parseErr != nil {
|
||
part.Close()
|
||
continue
|
||
}
|
||
|
||
enc := strings.ToLower(part.Header.Get("Content-Transfer-Encoding"))
|
||
data, readErr := io.ReadAll(part)
|
||
part.Close()
|
||
if readErr != nil {
|
||
continue
|
||
}
|
||
|
||
switch {
|
||
case partMediaType == "text/plain":
|
||
parts = append(parts, decodeBody(data, enc))
|
||
case partMediaType == "application/pdf":
|
||
pdfText, pdfErr := extractPDFTextFromBytes(data, enc)
|
||
if pdfErr != nil {
|
||
slog.Warn("[Email] PDF-Anhang konnte nicht gelesen werden", "fehler", pdfErr)
|
||
} else if pdfText != "" {
|
||
parts = append(parts, "[PDF-Anhang] "+pdfText)
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
// Einfache (nicht-multipart) Nachricht
|
||
body, readErr := io.ReadAll(parsed.Body)
|
||
if readErr != nil {
|
||
return "", fmt.Errorf("body lesen: %w", readErr)
|
||
}
|
||
enc := strings.ToLower(parsed.Header.Get("Content-Transfer-Encoding"))
|
||
parts = append(parts, decodeBody(body, enc))
|
||
}
|
||
|
||
combined := strings.TrimSpace(strings.Join(parts, "\n"))
|
||
if len(combined) > 2000 {
|
||
combined = combined[:2000]
|
||
}
|
||
return combined, nil
|
||
}
|
||
|
||
// extractPDFTextFromBytes dekodiert die rohen Anhang-Bytes (ggf. base64) und extrahiert PDF-Text.
|
||
func extractPDFTextFromBytes(data []byte, enc string) (string, error) {
|
||
// PDF-Anhänge sind fast immer base64-kodiert
|
||
var pdfBytes []byte
|
||
switch enc {
|
||
case "base64":
|
||
cleaned := strings.ReplaceAll(strings.TrimSpace(string(data)), "\r\n", "")
|
||
cleaned = strings.ReplaceAll(cleaned, "\n", "")
|
||
decoded, err := base64.StdEncoding.DecodeString(cleaned)
|
||
if err != nil {
|
||
decoded, err = base64.RawStdEncoding.DecodeString(cleaned)
|
||
if err != nil {
|
||
return "", fmt.Errorf("base64 dekodieren: %w", err)
|
||
}
|
||
}
|
||
pdfBytes = decoded
|
||
default:
|
||
pdfBytes = data
|
||
}
|
||
|
||
// PDF in temporäre Datei schreiben, da die pdf-Bibliothek einen Datei-Pfad erwartet
|
||
tmp, err := os.CreateTemp("", "email-pdf-*.pdf")
|
||
if err != nil {
|
||
return "", fmt.Errorf("temp-Datei anlegen: %w", err)
|
||
}
|
||
tmpPath := tmp.Name()
|
||
defer os.Remove(tmpPath)
|
||
|
||
if _, err := tmp.Write(pdfBytes); err != nil {
|
||
tmp.Close()
|
||
return "", fmt.Errorf("temp-Datei schreiben: %w", err)
|
||
}
|
||
tmp.Close()
|
||
|
||
return extractPDFTextFromFile(tmpPath)
|
||
}
|
||
|
||
// extractPDFTextFromFile liest alle Seiten einer PDF-Datei und gibt den Plain-Text zurück.
|
||
// Dupliziert die Logik aus brain/ingest_pdf.go um Import-Zyklen zu vermeiden.
|
||
func extractPDFTextFromFile(filePath string) (string, error) {
|
||
f, r, err := pdf.Open(filePath)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer f.Close()
|
||
|
||
var sb strings.Builder
|
||
totalPages := r.NumPage()
|
||
for pageNum := 1; pageNum <= totalPages; pageNum++ {
|
||
page := r.Page(pageNum)
|
||
if page.V.IsNull() {
|
||
continue
|
||
}
|
||
text, err := page.GetPlainText(nil)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
sb.WriteString(text)
|
||
sb.WriteString("\n")
|
||
}
|
||
return strings.TrimSpace(sb.String()), nil
|
||
}
|
||
|
||
func parseMessages(msgs []*imapclient.FetchMessageBuffer) []Message {
|
||
result := make([]Message, 0, len(msgs))
|
||
for _, msg := range msgs {
|
||
if msg.Envelope == nil {
|
||
continue
|
||
}
|
||
m := Message{
|
||
Subject: msg.Envelope.Subject,
|
||
Date: msg.Envelope.Date.Format("2006-01-02 15:04"),
|
||
}
|
||
if len(msg.Envelope.From) > 0 {
|
||
addr := msg.Envelope.From[0]
|
||
if addr.Name != "" {
|
||
m.From = fmt.Sprintf("%s <%s@%s>", addr.Name, addr.Mailbox, addr.Host)
|
||
} else {
|
||
m.From = fmt.Sprintf("%s@%s", addr.Mailbox, addr.Host)
|
||
}
|
||
}
|
||
result = append(result, m)
|
||
}
|
||
return result
|
||
}
|