#!/usr/bin/bash

# Copyright (c) 2020-2024, Windscribe Limited. All rights reserved.
# Usage: dns-leak-protect up <vpn_iface> [allowed_servers...] | down
#
# Installs a DROP-only filter chain (`windscribe_dnsleaks`) into both
# `iptables` and `ip6tables` that blocks UDP/TCP port 53 traffic to every
# OS-default DNS resolver except the ones passed as positional arguments.
# This catches dual-stack hosts where the VPN pushes only IPv4 DNS but the
# physical link still advertises v6 resolvers via RA RDNSS / DHCPv6 — without
# the v6 mirror, applications that resolve through those v6 servers leak
# outside the tunnel even when the v4 nameserver order in /etc/resolv.conf
# is correct.

PATH="$PATH:/usr/local/sbin:/usr/sbin:/sbin"

cmd() {
    echo "[#] $*" >&2
    "$@"
}

# Drop every entry that appears in the `allowed` array (tunnel DNS) from
# dns_array_v4 / dns_array_v6 in place, so the emitted rules can never
# blacklist a resolver the VPN legitimately uses.
remove_allowed_from_arrays()
{
    local -a kept
    local ip a skip
    kept=()
    for ip in "${dns_array_v4[@]}"; do
        skip=0
        for a in "${allowed[@]}"; do [[ $ip == "$a" ]] && { skip=1; break; }; done
        [[ $skip -eq 0 ]] && kept+=("$ip")
    done
    dns_array_v4=("${kept[@]}")

    kept=()
    for ip in "${dns_array_v6[@]}"; do
        skip=0
        for a in "${allowed[@]}"; do [[ $ip == "$a" ]] && { skip=1; break; }; done
        [[ $skip -eq 0 ]] && kept+=("$ip")
    done
    dns_array_v6=("${kept[@]}")
}

# Surface failures from the iptables/ip6tables toolchain to both stderr (for
# OpenVPN's log capture) and syslog (so journalctl/users with logger see them
# even when the script is invoked outside of OpenVPN). Returning 0 on purpose:
# logging must not flip a function's exit code.
log_warn() {
    echo "dns-leak-protect: WARNING: $*" >&2
    command -v logger &>/dev/null && logger -t dns-leak-protect -p user.warning -- "$*" 2>/dev/null
    return 0
}

# Strict IPv6 syntax check. The extraction gate (`[0-9a-fA-F:]+` + `NF>=3`) is
# permissive enough to pass incomplete/malformed tokens (e.g. `2606:4700:4700`,
# `:::::`, or >8 groups) that `ip6tables-restore` rejects. Because a family's
# DROP rules are loaded as one atomic restore batch, a single rejected token
# aborts the whole batch and silently leaves every OS-default v6 resolver open.
# Validate here so a bad entry costs at most one resolver, never the family.
is_valid_ipv6()
{
    local ip="$1"
    [[ -n $ip ]] || return 1
    # Hex digits and colons only (the %zone-id suffix is stripped upstream).
    [[ $ip =~ ^[0-9A-Fa-f:]+$ ]] || return 1
    # At most one "::" compression marker.
    case "$ip" in
        *::*::*) return 1 ;;
    esac

    local compressed=0
    case "$ip" in
        *::*) compressed=1 ;;
    esac

    local -a groups
    IFS=':' read -r -a groups <<< "$ip"
    local g count=0
    for g in "${groups[@]}"; do
        # Empty fields come from the "::" marker / leading-trailing colons.
        [[ -z $g ]] && continue
        (( ${#g} <= 4 )) || return 1
        (( count++ ))
    done

    if (( compressed )); then
        # "::" stands in for one or more zero groups, so 0..7 explicit groups
        # are valid (8 would leave no room for the compression).
        (( count <= 7 )) || return 1
    else
        # No compression: a full address is exactly 8 groups.
        (( count == 8 )) || return 1
    fi
    return 0
}

# Populate `dns_array_v4` from systemd-resolved. The awk filter strips the
# line that belongs to our tunnel link (matched by the caller-scoped
# `vpn_iface`, e.g. "(utun420)") so we don't blacklist our own VPN DNS server;
# an empty vpn_iface disables the exclusion. Everything after the first colon
# is the address list; `tr -s '[:space:]' '\n'` puts one token per line (kept
# symmetric with the v6 sibling whose `%zone-id` strip depends on per-line
# pattern space) and tolerates any whitespace separator should a future
# resolvectl version switch from spaces to tabs. The dotted-quad regex filters
# out non-v4.
get_os_default_dnsservers_resolvectl_v4()
{
    mapfile -t dns_array_v4 < <( resolvectl dns | awk -v ifc="$vpn_iface" -F: 'ifc == "" || index($1, "(" ifc ")") == 0 {st = index($0,":"); print substr($0,st+1)}' |
                  tr -s '[:space:]' '\n' | sed 's/^[ \t]*//;s/[ \t]*$//' | grep '^[0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+$' | sed '/^$/d' | sort | uniq )
}

# Same as the v4 variant, but extract IPv6 tokens. The first colon in each
# resolvectl line is the separator between the link header and the address
# list; subsequent colons belong to the addresses themselves. We strip the
# optional `%zone-id` suffix (e.g. fe80::1%eth0) because ip6tables `-d` wants
# a bare address — the zone-id is routing-layer metadata, not part of the
# on-wire packet. The address shape gate is `[0-9a-fA-F:]+` plus `NF>=3`
# (at least two colons) so we accept `::1`/`fe80::1`/`2606:4700:4700::1111`
# but reject bare hex tokens like `abcd` that match the character class.
#
# The whitespace split MUST use external `tr` rather than the in-sed
# `s/[[:space:]]/\n/g`. The subsequent `s/%.*$//` strip relies on each token
# sitting in its own pattern space — `.*` in GNU sed traverses embedded `\n`
# and `$` anchors to the end of the *whole* pattern space, so a sed-only
# split would drop every token after a zoned `fe80::…%iface` on the same
# line. `-s` squeezes runs of whitespace so tab/space mixes don't yield
# empty tokens.
get_os_default_dnsservers_resolvectl_v6()
{
    mapfile -t dns_array_v6 < <( resolvectl dns | awk -v ifc="$vpn_iface" -F: 'ifc == "" || index($1, "(" ifc ")") == 0 {st = index($0,":"); print substr($0,st+1)}' |
                  tr -s '[:space:]' '\n' | sed 's/^[ \t]*//;s/[ \t]*$//;s/%.*$//' | grep -E '^[0-9a-fA-F:]+$' | awk -F: 'NF>=3' | sed '/^$/d' | sort | uniq )
}


get_os_default_dnsservers_NMCLI_v4()
{
    mapfile -t dns_array_v4 < <( (nmcli dev list || nmcli dev show ) 2>/dev/null | grep IP4.DNS |
                  awk -F: '{ st = index($0,":");print substr($0,st+1)}' | awk '$1 {print $1}' | sed 's/^[ \t]*//;s/[ \t]*$//' )
}

# nmcli typically prints IP6.DNS rows as `IP6.DNS[N]: <addr>` without a zone
# suffix, but the format is unverified across nmcli versions and link-local
# RA RDNSS servers (fe80::/10) are stored with %zone-id internally — some
# builds expose the zone in the IP6.DNS row. We strip `%.*$` defensively so
# `ip6tables-restore` never sees a `-d fe80::1%eth0` token (which would abort
# the entire batch atomically and leave v6 unprotected).
get_os_default_dnsservers_NMCLI_v6()
{
    mapfile -t dns_array_v6 < <( (nmcli dev list || nmcli dev show ) 2>/dev/null | grep IP6.DNS |
                  awk -F: '{ st = index($0,":");print substr($0,st+1)}' | awk '$1 {print $1}' | sed 's/^[ \t]*//;s/[ \t]*$//;s/%.*$//' )
}

# Build a `*filter`/`COMMIT` blob for one address family and pipe it into the
# matching `-restore -n`. The chain header + OUTPUT jump are always emitted
# (even on an empty blacklist) so `dnsleak_protection_down_family` can find
# them via the marker comment in subsequent runs — same invariant the original
# v4-only script relied on.
#
# `ip6tables-restore`/`ip6tables-save` may be absent on minimal/CLI installs
# that ship only the v4 toolchain (e.g. some container images). We skip the
# v6 branch silently in that case rather than failing the whole script —
# there is no v6 DNS server reachable from the host anyway when the v6
# stack/toolchain is missing, so there's nothing to leak. We require BOTH
# tools (not just `-restore`) so the symmetric `down` path can later tear
# down anything we install — otherwise a partial-toolchain host would end
# up with an unsnoozable chain.
build_and_load_rules()
{
    local family="$1"
    local vpn_iface="$2"
    local -a servers
    if [[ $family == "v4" ]]; then
        servers=("${dns_array_v4[@]}")
    else
        servers=("${dns_array_v6[@]}")
    fi

    local restore_tool
    if [[ $family == "v4" ]]; then
        restore_tool="iptables-restore"
    else
        if ! command -v ip6tables-restore &>/dev/null || ! command -v ip6tables-save &>/dev/null; then
            return 0
        fi
        restore_tool="ip6tables-restore"
    fi

    local marker="-m comment --comment \"Windscribe client dns leak protection\""
    local rules=$'*filter\n'
    printf -v rules '%s:windscribe_dnsleaks - [0:0]\n' "$rules"
    printf -v rules '%s-I OUTPUT -j windscribe_dnsleaks %s\n' "$rules" "$marker"

    # A DNS query only leaks if it physically leaves via a non-tunnel,
    # non-loopback interface. The destination-IP DROPs below cannot tell the
    # egress interface apart, so let loopback and tunnel traffic RETURN to
    # OUTPUT before them — this protects the user's local resolver (127.0.0.1)
    # and the tunnel DNS regardless of how an address ended up in the list.
    printf -v rules '%s-A windscribe_dnsleaks -o lo -j RETURN %s\n' "$rules" "$marker"
    if [[ -n $vpn_iface ]]; then
        printf -v rules '%s-A windscribe_dnsleaks -o %s -j RETURN %s\n' "$rules" "$vpn_iface" "$marker"
    fi

    local dnsIp
    for dnsIp in "${servers[@]}"; do
        # Loopback / unspecified addresses can never egress a physical link.
        if [[ $family == "v4" ]]; then
            [[ $dnsIp == 127.* || $dnsIp == "0.0.0.0" ]] && continue
        else
            [[ $dnsIp == "::1" || $dnsIp == "::" ]] && continue
            # Reject malformed tokens before they reach the atomic restore batch,
            # where a single bad entry would abort the whole family (see
            # is_valid_ipv6). Skipping one token only loses that one resolver.
            if ! is_valid_ipv6 "$dnsIp"; then
                log_warn "skipping malformed IPv6 DNS server token: $dnsIp"
                continue
            fi
        fi
        printf -v rules '%s-A windscribe_dnsleaks -d %s -p udp --dport 53 -j DROP %s\n' "$rules" "$dnsIp" "$marker"
        printf -v rules '%s-A windscribe_dnsleaks -d %s -p tcp --dport 53 -j DROP %s\n' "$rules" "$dnsIp" "$marker"
    done

    printf -v rules '%sCOMMIT\n' "$rules"

    if ! echo -n "$rules" | cmd "$restore_tool" -n; then
        # Without this log line a v6 install failure (kernel module missing,
        # malformed token, sysctl-disabled IPv6, etc.) is invisible — the
        # script exits non-zero but the OpenVPN runtime ignores that, so
        # the user assumes both stacks are protected while v6 still leaks.
        log_warn "$restore_tool -n failed for IP$family — DNS leak protection may be inactive for this family"
        return 1
    fi
}

dnsleak_protection_up()
{
    # First arg is the tunnel interface, the rest are the tunnel DNS servers
    # (allowed). Invoked by the DNS-manager scripts (update-resolv-conf /
    # update-network-manager / update-systemd-resolved) at connect time, before
    # the manager swaps system DNS to the tunnel resolver (SetLinkDNS /
    # resolvconf -a runs after us), so the snapshot below is the real pre-VPN OS
    # DNS. OUTPUT rule ordering on a late firewall enable is handled by
    # FirewallController_linux::firewallOnImpl, so no cached/no-arg replay path
    # exists anymore — bail out rather than rebuild an empty chain without args.
    if [[ $# -eq 0 ]]; then
        log_warn "up requires the tunnel interface: dns-leak-protect up <vpn_iface> [allowed_servers...]"
        return 1
    fi

    local -a dns_array_v4=() dns_array_v6=() allowed=()
    local vpn_iface="$1"
    shift
    allowed=("$@")

    if ! command -v resolvectl &> /dev/null; then
        get_os_default_dnsservers_NMCLI_v4
        get_os_default_dnsservers_NMCLI_v6
    else
        get_os_default_dnsservers_resolvectl_v4
        get_os_default_dnsservers_resolvectl_v6
        if [[ ${#dns_array_v4[@]} -eq 0 ]]; then
            get_os_default_dnsservers_NMCLI_v4
        fi
        if [[ ${#dns_array_v6[@]} -eq 0 ]]; then
            get_os_default_dnsservers_NMCLI_v6
        fi
    fi

    # Append IPv4 default-route nexthops — many home routers act as DNS
    # forwarders on the LAN gateway address without advertising it via DHCP.
    # Skip the tunnel's own default route ($1 == vpn_iface) so the VPN
    # gateway never lands in the blocklist. The v6 equivalent is
    # intentionally not added: the v6 default-route nexthop on RA-configured
    # networks is the router's link-local (fe80::…), which is virtually
    # never used as a DNS server; legitimate v6 forwarders show up in
    # resolvectl/nmcli output via RA RDNSS and are caught above.
    local hexgws
    hexgws=$(awk -v ifc="$vpn_iface" '$2 == "00000000" && $1 != ifc {print $3}' /proc/net/route)
    for i in $hexgws; do
        dns_array_v4+=($(printf "%d." $(echo $i | sed 's/../0x& /g' | tr ' ' '\n' | tac) | sed 's/\.$/\n/'))
    done

    # Strip the tunnel DNS from the snapshot before it is emitted, so it can
    # never be blacklisted by any later code path.
    remove_allowed_from_arrays

    # Tear down any pre-existing windscribe_dnsleaks chain before installing
    # a fresh one. `iptables-restore -n` is no-flush mode, so without this
    # cleanup a second `up` (e.g. NetworkManager-driven DNS refresh, or
    # recovery from a partial previous run) would stack a new `-I OUTPUT -j
    # windscribe_dnsleaks` and accumulate stale DROP rules. As a side effect
    # this down→up cycle re-`-I`s the OUTPUT jump back to pos 1; if the firewall
    # is already enabled, FirewallController_linux::firewallOnImpl keeps its own
    # windscribe_output jump below ours so the DROPs are not shadowed.
    # build_and_load_rules logs the specifics on failure; surface a per-family
    # summary here too so the previously-discarded v4 status is no longer silent
    # and the user-facing log makes clear which stack may still be leaking.
    dnsleak_protection_down_family "iptables-save" "iptables-restore"
    build_and_load_rules "v4" "$vpn_iface" || log_warn "IPv4 DNS leak protection may be incomplete"
    dnsleak_protection_down_family "ip6tables-save" "ip6tables-restore"
    build_and_load_rules "v6" "$vpn_iface" || log_warn "IPv6 DNS leak protection may be incomplete"
}


# Strip the `windscribe_dnsleaks` chain from one address family by parsing
# the matching `*-save` output. Reused for iptables and ip6tables — both
# share the `*filter ... COMMIT` syntax and the same marker convention.
dnsleak_protection_down_family()
{
    local save_tool="$1"
    local restore_tool="$2"

    if ! command -v "$save_tool" &>/dev/null || ! command -v "$restore_tool" &>/dev/null; then
        return 0
    fi

    local line found restore is_filter_section
    restore="" found=0 is_filter_section=0
    while read -r line; do
        if [[ $line == "*filter"* ]]; then
            is_filter_section=1
        else
            [[ $line == "*"* ]] && is_filter_section=0
        fi

        # Match -A rules that reference our chain by name — both the OUTPUT jump
        # (`-A OUTPUT ... -j windscribe_dnsleaks`) and the chain's own rules
        # (`-A windscribe_dnsleaks ...`). Keying on the chain name rather than
        # the `-m comment --comment "..."` framing scopes the teardown tightly to
        # our chain and stays robust to iptables-nft (default on Debian 11+,
        # RHEL 9+, modern Ubuntu) emitting subtly different comment quoting or
        # argument ordering — a strict-literal comment match would silently leave
        # the chain installed after `down`, causing the next `up` to layer on top
        # via -n mode. We anchor on `-A windscribe_dnsleaks` / `-j windscribe_dnsleaks`
        # (the chain definition and the jump target) rather than a bare
        # `*windscribe_dnsleaks*` substring so a third-party rule that merely
        # mentions the string in a comment (e.g. --comment "bypass windscribe_dnsleaks")
        # is not swept into the teardown and deleted on every down/up.
        [[ $line == "*"* || $line == COMMIT \
           || $line == "-A windscribe_dnsleaks "* \
           || $line == *" -j windscribe_dnsleaks" \
           || $line == *" -j windscribe_dnsleaks "* ]] || continue
        [[ $line == "-A"* ]] && found=1

        if [[ $is_filter_section -ne 0 && $line == COMMIT ]]; then
            printf -v restore '%s-X windscribe_dnsleaks\n' "$restore"
        fi
        printf -v restore '%s%s\n' "$restore" "${line/#-A/-D}"

    done < <($save_tool 2>/dev/null)

    if [[ $found -ne 0 ]]; then
        echo -n "$restore" | cmd "$restore_tool" -n
    fi
}

dnsleak_protection_down()
{
    dnsleak_protection_down_family "iptables-save" "iptables-restore"
    dnsleak_protection_down_family "ip6tables-save" "ip6tables-restore"
}

main()
{
    local action="$1"
    shift 2>/dev/null

    if [[ $action == "up" ]]; then
        # Remaining args are: vpn_iface [allowed_servers...].
        dnsleak_protection_up "$@"
    elif [[ $action == "down" ]]; then
        dnsleak_protection_down
    else
        echo "Usage: dns-leak-protect up <vpn_iface> [servers_to_allow] | down"
        return 1
    fi
}


main "$@"
