###############################################################################
# ddg.tcl
# v1.0.6  ©2025 Te[u]K
# Recherche DuckDuckGo pour Eggdrop (!ddg) + raccourcisseur short.io (optionnel)
###############################################################################

if {[info commands ::teuk::ddg::unload] eq "::teuk::ddg::unload"} { ::teuk::ddg::unload }
if { [lindex [split $::version] 1] < 1080404 } { putloglev o * "\00304[ddg - erreur]\003 Eggdrop trop ancien: \00304${::version}\003 (>= 1.8.4 requis). Chargement annulé."; return }
if { [catch { package require Tcl 8.6 } e] } { putloglev o * "\00304[ddg - erreur]\003 Tcl 8.6+ requis. Version actuelle: \00304${::tcl_version}\003. Chargement annulé."; return }
if { [catch { package require http } e] } { putloglev o * "\00304[ddg - erreur]\003 package http requis : \00304$e\003"; return }
if { [catch { package require tls } e] } { putloglev o * "\00304[ddg - erreur]\003 package tls requis pour HTTPS : \00304$e\003"; return }

# Force UTF-8 (interne Tcl + stdout/stderr)
catch { encoding system utf-8 }
catch { fconfigure stdout -encoding utf-8 }
catch { fconfigure stderr -encoding utf-8 }

package provide ddg 1.0.6

###############################################################################
# NAMESPACE
###############################################################################

namespace eval ::teuk::ddg {
    variable version "1.0.6"
    variable debug   3          ;# 0=silence, 1=info, 2=debug, 3=trace

    variable base_url     "https://duckduckgo.com/html/"
    variable max_results  3
    variable max_snippet  160

    variable color_link  "\00312"
    variable color_reset "\003"

    # Bullet: ne dépend pas de l'encodage du fichier source
    variable bullet "\u2022"

    # short.io
    variable short_api_key ""
    variable short_domain  ""
    variable use_shortio   0

    # check HEAD désactivé par défaut (trop lent)
    variable short_check   0
    variable short_timeout 2500

    variable sleep_ms 0

    catch { setudef flag ddg }
}

###############################################################################
# LOGGING
###############################################################################

proc ::teuk::ddg::log {lvl tag msg} {
    if {$::teuk::ddg::debug >= $lvl} { putlog "ddg($tag): $msg" }
}
proc ::teuk::ddg::log1 {msg} { ::teuk::ddg::log 1 INFO  $msg }
proc ::teuk::ddg::log2 {msg} { ::teuk::ddg::log 2 DEBUG $msg }
proc ::teuk::ddg::log3 {msg} { ::teuk::ddg::log 3 TRACE $msg }

proc ::teuk::ddg::_trace_loop {label iters ms} {
    ::teuk::ddg::log3 "loop:$label iters=$iters ms=$ms"
}

###############################################################################
# HTTP INIT
###############################################################################

::teuk::ddg::log1 "http pkg: [package provide http]"
::teuk::ddg::log1 "tls pkg:  [package provide tls]"

catch { ::http::config -useragent "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:146.0) Gecko/20100101 Firefox/146.0" }
catch { ::tls::init -tls1 1 }

###############################################################################
# UNLOAD
###############################################################################

proc ::teuk::ddg::unload {args} {
    set ns [namespace current]
    ::teuk::ddg::log1 "nettoyage namespace $ns"

    foreach b [binds *] {
        if {[llength $b] < 5} { continue }
        set cmd [lindex $b 4]
        if {[string match "${ns}::*" $cmd]} {
            catch { unbind [lindex $b 0] [lindex $b 1] [lindex $b 2] $cmd }
        }
    }

    catch { ::http::unregister httpsddg }
    catch { namespace delete $ns }
}

###############################################################################
# TLS SOCKET
###############################################################################

proc ::teuk::ddg::_tls_socket {args} {
    set async 0; set host ""; set port ""

    set i 0; set n [llength $args]
    while {$i < $n} {
        set a [lindex $args $i]
        if {![string match -* $a]} {
            if {$host eq ""} {
                set host $a
            } elseif {$port eq ""} {
                set port $a
            }
        }
        incr i
    }

    if {$host eq "" || $port eq ""} { error "_tls_socket: args invalides" }

    if {[catch { set s [::socket $host $port] } e]} {
        return -code error "socket error $host:$port -> $e"
    }

    catch { fconfigure $s -translation binary -encoding binary -buffering none }
    if {$async} { catch { fconfigure $s -blocking 0 } }

    if {[catch { ::tls::import $s -servername $host } e]} {
        catch { close $s }
        return -code error "tls::import a échoué: $e"
    }

    return $s
}

catch { ::http::register httpsddg 443 ::teuk::ddg::_tls_socket }
::teuk::ddg::log2 "http::register httpsddg OK"

###############################################################################
# URL
###############################################################################

proc ::teuk::ddg::_rewrite_url {url} {
    if {[string match -nocase "https://*" $url]} {
        return [string map {"https://" "httpsddg://"} $url]
    }
    return $url
}

###############################################################################
# ENTITY / HTML / URL DECODERS
###############################################################################

proc ::teuk::ddg::_decode_entities {s} {
    set res $s
    set res [string map {
        "&nbsp;" " "
        "&amp;"  "&"
        "&lt;"   "<"
        "&gt;"   ">"
        "&quot;" "\""
        "&#39;"  "'"
    } $res]

    while {[regexp -nocase {&#x([0-9a-f]+);?} $res -> hex]} {
        set ch [format %c 0x$hex]
        regsub -nocase "&#x$hex;?" $res $ch res
    }

    while {[regexp {&#([0-9]+);?} $res -> dec]} {
        if {$dec > 1114111} break
        set ch [format %c $dec]
        regsub "&#$dec;?" $res $ch res
    }

    return $res
}

proc ::teuk::ddg::_strip_html {s} {
    regsub -all {<[^>]+>} $s "" s
    return [string trim [::teuk::ddg::_decode_entities $s]]
}

# FIX RACINE UTF-8:
# - on reconstruit des OCTETS (bytearray)
# - puis on décode UTF-8 une fois
proc ::teuk::ddg::_urldecode {s} {
    set t0 [clock milliseconds]

    # '+' -> espace
    regsub -all {\+} $s " " s

    set bytes ""
    set i 0
    set len [string length $s]

    while {$i < $len} {
        set c [string index $s $i]
        if {$c eq "%" && $i+2 < $len} {
            set h [string range $s [expr {$i+1}] [expr {$i+2}]]
            if {[string is xdigit -strict $h]} {
                append bytes [binary format H2 $h]
                incr i 3
                continue
            }
        }
        # caractère ASCII normal
        append bytes $c
        incr i
    }

    # decode UTF-8 (si ça pète, on garde bytes tel quel)
    if {[catch { set out [encoding convertfrom utf-8 $bytes] }]} {
        set out $bytes
    }

    if {$::teuk::ddg::debug >= 3} {
        ::teuk::ddg::_trace_loop "urldecode" $len [expr {[clock milliseconds]-$t0}]
    }
    return $out
}

proc ::teuk::ddg::_normalize_url {url} {
    set url [string trim $url]

    if {[string match {//*} $url]} {
        set url "https:$url"
    }

    # redirect classique DDG
    if {[regexp {duckduckgo\.com/l/\?uddg=([^&]+)} $url -> enc]} {
        set dec [::teuk::ddg::_urldecode $enc]
        if {$dec ne ""} { return $dec }
    }

    # parfois param u3 (ads)
    if {[regexp {[\?&]u3=([^&]+)} $url -> enc3]} {
        set dec3 [::teuk::ddg::_urldecode $enc3]
        if {$dec3 ne ""} { return $dec3 }
    }

    return $url
}

###############################################################################
# SHORT.IO
###############################################################################

proc ::teuk::ddg::_check_head {url} {
    set u [::teuk::ddg::_rewrite_url $url]
    if {[catch { set t [::http::geturl $u -method HEAD -timeout $::teuk::ddg::short_timeout] } e]} {
        return [list 0 $e]
    }
    set c [::http::ncode $t]
    ::http::cleanup $t
    if {$c >= 200 && $c < 400} { return [list 1 $c] }
    return [list 0 $c]
}

proc ::teuk::ddg::_shorten_url {url} {
    if {!$::teuk::ddg::use_shortio} { return $url }
    if {$::teuk::ddg::short_api_key eq ""} { return $url }
    if {$::teuk::ddg::short_domain eq ""} { return $url }

    set api "https://api.short.io/links"
    set api [::teuk::ddg::_rewrite_url $api]
    set payload "{\"domain\":\"$::teuk::ddg::short_domain\",\"originalURL\":\"$url\",\"allowDuplicates\":false}"

    set headers [list \
        accept "application/json" \
        Content-Type "application/json; charset=utf-8" \
        authorization $::teuk::ddg::short_api_key \
    ]

    ::teuk::ddg::log2 "short.io: POST $api pour $url (domain=$::teuk::ddg::short_domain)"

    if {[catch {
        set tok [::http::geturl $api -method POST -headers $headers -type "application/json" -query $payload -timeout 8000]
    } e]} {
        ::teuk::ddg::log3 "short.io: POST error -> $e"
        return $url
    }

    set code [::http::ncode $tok]
    set body [::http::data $tok]
    ::http::cleanup $tok

    if {$code < 200 || $code >= 300} {
        ::teuk::ddg::log3 "short.io: HTTP $code -> fallback"
        return $url
    }

    if {![regexp {\"shortURL\"\s*:\s*\"([^\"]+)\"} $body -> short]} {
        ::teuk::ddg::log3 "short.io: pas de shortURL -> fallback"
        return $url
    }

    if {[string match "http://*" $short]} {
        set short "https://[string range $short 7 end]"
    }

    if {!$::teuk::ddg::short_check} {
        return $short
    }

    lassign [::teuk::ddg::_check_head $short] ok info
    ::teuk::ddg::log3 "short.io check: $short -> $ok ($info)"
    if {$ok} { return $short }

    ::teuk::ddg::log3 "short.io check: KO -> fallback URL longue"
    return $url
}

###############################################################################
# HTTP FETCH
###############################################################################

proc ::teuk::ddg::_fetch_url {url {depth 0}} {
    if {$depth > 5} { return -code error "too many redirects" }

    set u [::teuk::ddg::_rewrite_url $url]
    ::teuk::ddg::log2 "GET $u (depth=$depth)"

    if {[catch { set tok [::http::geturl $u -timeout 15000] } e]} {
        return -code error "http error: $e"
    }

    set code [::http::ncode $tok]
    set stat [::http::status $tok]
    set meta [::http::meta $tok]
    set body [::http::data $tok]
    ::http::cleanup $tok

    ::teuk::ddg::log1 "HTTP $code pour $u (status=$stat len=[string length $body])"

    if {$code >= 300 && $code < 400} {
        set loc ""
        set tloop [clock milliseconds]
        set it 0
        foreach {k v} $meta {
            incr it
            if {[string tolower $k] eq "location"} { set loc $v; break }
        }
        if {$::teuk::ddg::debug >= 3} {
            ::teuk::ddg::_trace_loop "fetch_meta" $it [expr {[clock milliseconds]-$tloop}]
        }
        if {$loc ne ""} {
            ::teuk::ddg::log2 "redirection -> $loc"
            return [::teuk::ddg::_fetch_url $loc [expr {$depth+1}]]
        }
    }

    if {$code < 200 || $code >= 300} { return -code error "http $code" }

    # DDG HTML est en UTF-8 (normalement). On convertit une fois.
    catch { set body [encoding convertfrom utf-8 $body] }

    ::teuk::ddg::log3 "BODY(0..400): [string range $body 0 400]"
    return $body
}

###############################################################################
# PARSE RESULTS
###############################################################################

proc ::teuk::ddg::parse_results {html} {
    if {[string match "*Bots use DuckDuckGo too.*" $html]} {
        ::teuk::ddg::log1 "page anti-bot DDG détectée"
        return {}
    }

    set max   $::teuk::ddg::max_results
    set limit $::teuk::ddg::max_snippet
    set lenHtml [string length $html]

    set positions [regexp -all -inline -indices -nocase -- {result__a} $html]
    ::teuk::ddg::log2 "parse_results: tokens 'result__a' trouvés=[llength $positions]"

    set results {}
    set idx 0

    set tloop [clock milliseconds]
    set iters 0
    foreach pair $positions {
        incr iters
        lassign $pair start end
        if {[llength $results] >= $max} { break }

        set searchStart [expr {$start - 600}]
        if {$searchStart < 0} { set searchStart 0 }

        set before [string range $html $searchStart $start]
        set rel [string last "<a" $before]
        if {$rel < 0} { continue }

        set tagStart [expr {$searchStart + $rel}]
        set tagEnd [string first ">" $html $end]
        if {$tagEnd < 0} { continue }

        set tag [string range $html $tagStart $tagEnd]

        if {![regexp -nocase {class="[^"]*result__a[^"]*"} $tag]} { continue }
        if {![regexp {href="([^"]+)} $tag -> urlHtml]} { continue }

        set url [::teuk::ddg::_normalize_url $urlHtml]

        # éviter de renvoyer des liens DDG moches
        if {[regexp -nocase {^https?://(duckduckgo\.com/|html\.duckduckgo\.com/)} $url]} {
            continue
        }

        if {![string match -nocase "http://*" $url] &&
            ![string match -nocase "https://*" $url]} { continue }

        set innerStart [expr {$tagEnd + 1}]
        set innerEnd   [string first "</a>" $html $innerStart]
        if {$innerEnd < 0} {
            set innerEnd [expr {$innerStart + 300}]
            if {$innerEnd >= $lenHtml} { set innerEnd [expr {$lenHtml - 1}] }
        }

        set titleHtml [string range $html $innerStart [expr {$innerEnd - 1}]]
        set title [::teuk::ddg::_strip_html $titleHtml]

        set winStart $innerEnd
        set winEnd [expr {$winStart + 2000}]
        if {$winEnd >= $lenHtml} { set winEnd [expr {$lenHtml - 1}] }

        set window [string range $html $winStart $winEnd]
        set snip "-"

        if {[regexp -nocase -- {class="[^"]*result__snippet[^"]*"[^>]*>(.*?)</} $window -> snHtml]} {
            set snip [::teuk::ddg::_strip_html $snHtml]
        }

        if {[string length $snip] > $limit} {
            set snip "[string range $snip 0 [expr {$limit-4}]]..."
        }

        if {$title eq "" || $url eq ""} { continue }

        incr idx
        lappend results [list $title $snip $url]
        ::teuk::ddg::log3 "parse_results: résultat #$idx -> title='$title' url='$url'"
    }

    if {$::teuk::ddg::debug >= 3} {
        ::teuk::ddg::_trace_loop "parse_positions" $iters [expr {[clock milliseconds]-$tloop}]
    }

    ::teuk::ddg::log1 "parse_results: [llength $results] résultat(s) trouvé(s)"
    return $results
}

###############################################################################
# SEND IRC
###############################################################################

proc ::teuk::ddg::_send_lines {target lines} {
    set t0 [clock milliseconds]
    set it 0
    foreach l $lines {
        incr it
        if {$l eq ""} { continue }

        # IMPORTANT: ne pas "convertto utf-8" ici => double-encodage assuré
        puthelp "PRIVMSG $target :$l"

        after $::teuk::ddg::sleep_ms
    }
    if {$::teuk::ddg::debug >= 3} {
        ::teuk::ddg::_trace_loop "send_lines" $it [expr {[clock milliseconds]-$t0}]
    }
}

###############################################################################
# PIPELINE
###############################################################################

proc ::teuk::ddg::do_search {nick target query} {
    set q [string trim $query]
    if {$q eq ""} {
        puthelp "NOTICE $nick :Syntaxe: !ddg <recherche>"
        return
    }

    ::teuk::ddg::log1 "CMD !ddg par $nick sur $target : '$q'"

    set qenc [::http::formatQuery q $q]
    set url "${::teuk::ddg::base_url}?$qenc&kl=fr-fr&kp=-2"
    ::teuk::ddg::log2 "URL: $url"

    if {[catch { set body [::teuk::ddg::_fetch_url $url] } e]} {
        ::teuk::ddg::log1 "fetch error: $e"
        puthelp "PRIVMSG $target :Erreur DDG: $e"
        return
    }

    set res [::teuk::ddg::parse_results $body]
    if {[llength $res] == 0} {
        puthelp "PRIVMSG $target :Aucun résultat pour « $q »."
        return
    }

    set lines {}
    set t0 [clock milliseconds]
    set it 0
    foreach r $res {
        incr it
        lassign $r title snip url
        set url  [::teuk::ddg::_normalize_url $url]
        set surl [::teuk::ddg::_shorten_url $url]

        if {$snip eq "-" || $snip eq ""} {
            set line "$::teuk::ddg::bullet $title $::teuk::ddg::color_link\037$surl\037$::teuk::ddg::color_reset"
        } else {
            set line "$::teuk::ddg::bullet $title — $snip $::teuk::ddg::color_link\037$surl\037$::teuk::ddg::color_reset"
        }
        lappend lines $line
    }
    if {$::teuk::ddg::debug >= 3} {
        ::teuk::ddg::_trace_loop "build_lines" $it [expr {[clock milliseconds]-$t0}]
    }

    ::teuk::ddg::_send_lines $target $lines
}

###############################################################################
# BINDS
###############################################################################

proc ::teuk::ddg::cmd_ddg_pub {nick uhost hand chan text} {
    if {![channel get $chan ddg]} {
        ::teuk::ddg::log2 "chan $chan sans +ddg, ignore."
        return
    }

    set q [string trim $text]
    if {$q eq ""} {
        puthelp "PRIVMSG $chan :Syntaxe: !ddg <recherche>"
        return
    }

    if {[catch { ::teuk::ddg::do_search $nick $chan $q } e]} {
        ::teuk::ddg::log1 "erreur interne pub: $e"
        puthelp "PRIVMSG $chan :Erreur interne: $e"
    }
}
bind pub - "!ddg" ::teuk::ddg::cmd_ddg_pub

proc ::teuk::ddg::msg_ddg {nick uhost hand text} {
    if {$text eq ""} {
        puthelp "NOTICE $nick :Syntaxe: ddg <recherche>"
        return
    }
    if {[catch { ::teuk::ddg::do_search $nick $nick $text } e]} {
        ::teuk::ddg::log1 "erreur interne msg: $e"
        puthelp "NOTICE $nick :Erreur interne: $e"
    }
}
bind msg -|- ddg ::teuk::ddg::msg_ddg

###############################################################################
# PARTYLINE
###############################################################################

proc teuk::ddg::setdebug {lvl} {
    if {![string is integer -strict $lvl]} {
        return "usage: .tcl eval teuk::ddg::setdebug <0-3>"
    }
    set ::teuk::ddg::debug $lvl
    return "ddg: debug=$lvl"
}

proc teuk::ddg::setshort {apikey domain} {
    set ::teuk::ddg::short_api_key $apikey
    set ::teuk::ddg::short_domain  $domain
    set ::teuk::ddg::use_shortio   1
    return "ddg: short.io activé (domaine='$domain')"
}

proc teuk::ddg::setshortcheck {onoff} {
    if {![string is integer -strict $onoff]} { return "usage: .tcl eval teuk::ddg::setshortcheck <0|1>" }
    set ::teuk::ddg::short_check $onoff
    return "ddg: short_check=$onoff (0=off par défaut)"
}

###############################################################################
# END
###############################################################################

putlog "ddg: script ddg.tcl v$::teuk::ddg::version chargé (auteur : Te\[u]K). Commande !ddg, chanset +ddg requis."
bind evnt - prerehash ::teuk::ddg::unload
