From c0fc949006df6f8d93833f823a4f8b98e4247228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lo=C5=A1=C5=A5=C3=A1k?= Date: Wed, 1 Apr 2026 13:59:27 +0200 Subject: [PATCH] Initial commit --- README.md | 224 +++++++++++++++++++++++++++++++++- custom-domains.txt | 1 + mikrotik-adlist-builder.py | 243 +++++++++++++++++++++++++++++++++++++ 3 files changed, 466 insertions(+), 2 deletions(-) create mode 100644 custom-domains.txt create mode 100644 mikrotik-adlist-builder.py diff --git a/README.md b/README.md index 0ecc143..c173e2a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,223 @@ -# mikrotik-adlist-builder +# Mikrotik Adlist Builder -Tool for building mikrotik adlists for blocking ands and harmful domains. \ No newline at end of file +`mikrotik-adlist-builder` is a small Python tool for building MikroTik adlists used to block ads and harmful domains. + +It can download multiple blocklists from remote URLs, read local files, extract valid domain names from different formats, merge them, remove duplicates, and write the final output in a MikroTik-friendly format: + +```text +0.0.0.0 example.com +0.0.0.0 ads.example.net +``` + +## Features + +- Supports multiple input sources +- Downloads blocklists from `http://` and `https://` URLs +- Reads local files from: + - relative paths such as `./custom-domains.txt` + - absolute paths + - `file://` URLs +- Supports multiple common blocklist formats: + - ABP-style rules such as `||example.com^` + - hosts file syntax such as `0.0.0.0 example.com` + - plain domain lists such as `example.com` +- Removes duplicates automatically +- Filters out invalid entries +- Writes a merged output file ready for MikroTik adlist import + +## Requirements + +- Python 3.9 or newer + +## Installation + +No external dependencies are required. + +Clone the repository or just save the script locally: + +```bash +chmod +x mikrotik-adlist-builder.py +``` + +You can then run it directly: + +```bash +./mikrotik-adlist-builder.py +``` + +Or with Python: + +```bash +python3 mikrotik-adlist-builder.py +``` + +## Default sources + +The script includes a built-in `DEFAULT_URLS` list. Example: + +```python +DEFAULT_URLS = [ + "https://big.oisd.nl/", + "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts", + "./custom-domains.txt", +] +``` + +This means the tool can combine public online blocklists with your own local domain list. + +## Usage + +### Use default sources + +```bash +python3 mikrotik-adlist-builder.py +``` + +This will create: + +```text +adlist.txt +``` + +### Specify custom URLs + +```bash +python3 mikrotik-adlist-builder.py \ + -u https://big.oisd.nl/ \ + -u https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts +``` + +### Mix remote and local sources + +```bash +python3 mikrotik-adlist-builder.py \ + -u https://big.oisd.nl/ \ + -u ./custom-domains.txt \ + -u ./my-extra-list.txt +``` + +### Use a local file via `file://` + +```bash +python3 mikrotik-adlist-builder.py \ + -u file:///home/user/blocklists/custom.txt +``` + +### Change output file + +```bash +python3 mikrotik-adlist-builder.py \ + -o mikrotik-adlist.txt +``` + +### Full example + +```bash +python3 mikrotik-adlist-builder.py \ + -u https://big.oisd.nl/ \ + -u https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts \ + -u ./custom-domains.txt \ + -o mikrotik-adlist.txt +``` + +## Supported input formats + +### 1. ABP syntax + +Example: + +```text +||example.com^ +||ads.example.net^ +``` + +Some simplified variants are also accepted, for example: + +```text +||example.com +``` + +### 2. Hosts syntax + +Example: + +```text +0.0.0.0 example.com +127.0.0.1 ads.example.net +``` + +### 3. Plain domain syntax + +Example: + +```text +example.com +ads.example.net +tracker.example.org +``` + +## Local custom domain file example + +Example `custom-domains.txt`: + +```text +example-bad-site.com +ads.example.net +tracker.example.org +``` + +You can also mix in hosts-style entries: + +```text +0.0.0.0 bad.example.com +127.0.0.1 ads.badsite.net +``` + +And ABP-style rules: + +```text +||tracker.example.org^ +||ads.example.net^ +``` + +## Output format + +The generated file contains one domain per line in this format: + +```text +0.0.0.0 domain.tld +``` + +Example: + +```text +0.0.0.0 ads.example.com +0.0.0.0 tracker.example.net +0.0.0.0 malware.example.org +``` + +## Import into MikroTik + +The resulting file is intended to be used as a source for a MikroTik adlist or for further processing before import, depending on your RouterOS version and setup. + +## Notes + +- Relative local paths are resolved against the current working directory from which you run the script. +- `file://` paths should normally be absolute. +- Duplicate domains are removed automatically. +- Invalid lines, comments, whitelist rules, localhost-style entries, IPv6 entries, and malformed domains are ignored. + +## Example output messages + +```text +[INFO] Downloading: https://big.oisd.nl/ +[INFO] Domains found: 123456 +[INFO] Downloading: ./custom-domains.txt +[INFO] Domains found: 25 +[OK] Output written to: adlist.txt +[OK] Total unique domains: 123470 +``` + +## License + +Use, modify, and distribute freely as needed. \ No newline at end of file diff --git a/custom-domains.txt b/custom-domains.txt new file mode 100644 index 0000000..2fa5814 --- /dev/null +++ b/custom-domains.txt @@ -0,0 +1 @@ +0.0.0.0 ssp.seznam.cz \ No newline at end of file diff --git a/mikrotik-adlist-builder.py b/mikrotik-adlist-builder.py new file mode 100644 index 0000000..6305b1c --- /dev/null +++ b/mikrotik-adlist-builder.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import argparse +import gzip +import io +import re +import sys +import urllib.request + +from urllib.parse import urlparse, unquote +from pathlib import Path +from typing import Iterable, Optional, Set + +DEFAULT_URLS = [ + # Popular blocklists that often use ABP syntax + "https://big.oisd.nl/", + "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts", + + # Custom blocklists. These may contain ABP rules, + # but mainly plain domain syntax. + "./custom-domains.txt", +] + +ABP_DOMAIN_RE = re.compile(r"^\|\|([A-Za-z0-9._-]+)\^$") +HOSTS_SPLIT_RE = re.compile(r"\s+") +VALID_DOMAIN_RE = re.compile( + r"^(?=.{1,253}$)(?!-)(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9-]{2,63}\.?$", + re.IGNORECASE, +) + +def download_text(url: str, timeout: int = 30) -> str: + parsed = urlparse(url) + + # Local file via file:// + if parsed.scheme == "file": + path = Path(unquote(parsed.path)) + with open(path, "r", encoding="utf-8", errors="replace") as f: + return f.read() + + # Local file without scheme, e.g. ./list.txt or list.txt + if parsed.scheme == "": + path = Path(url) + with open(path, "r", encoding="utf-8", errors="replace") as f: + return f.read() + + # HTTP/HTTPS + req = urllib.request.Request( + url, + headers={ + "User-Agent": "mikrotik-adlist-builder/1.0", + "Accept-Encoding": "gzip", + }, + ) + + with urllib.request.urlopen(req, timeout=timeout) as response: + raw = response.read() + encoding = response.headers.get("Content-Encoding", "").lower() + + if encoding == "gzip": + raw = gzip.decompress(raw) + else: + if len(raw) >= 2 and raw[:2] == b"\x1f\x8b": + raw = gzip.decompress(raw) + + charset = response.headers.get_content_charset() or "utf-8" + return raw.decode(charset, errors="replace") + +def normalize_domain(domain: str) -> Optional[str]: + domain = domain.strip().lower().rstrip(".") + if not domain: + return None + + if domain in {"localhost", "local", "broadcasthost"}: + return None + + if "/" in domain or "\\" in domain: + return None + + if ":" in domain: + # Ignore IPv6, ports, and similar entries + return None + + if domain.startswith("*."): + domain = domain[2:] + + if not VALID_DOMAIN_RE.match(domain): + return None + + return domain + +def extract_from_abp_line(line: str) -> Optional[str]: + # Example: ||example.com^ + m = ABP_DOMAIN_RE.match(line) + if m: + return normalize_domain(m.group(1)) + + # Some variants may omit the trailing ^ + if line.startswith("||"): + candidate = line[2:] + for sep in ["^", "/", "$"]: + if sep in candidate: + candidate = candidate.split(sep, 1)[0] + return normalize_domain(candidate) + + return None + +def extract_from_hosts_line(line: str) -> Set[str]: + result: Set[str] = set() + + # Remove inline comment + line = line.split("#", 1)[0].strip() + if not line: + return result + + parts = HOSTS_SPLIT_RE.split(line) + if len(parts) < 2: + return result + + first = parts[0].lower() + + # Common hosts file IP prefixes + if first in {"0.0.0.0", "127.0.0.1", "::1", "::", "255.255.255.255"}: + for item in parts[1:]: + d = normalize_domain(item) + if d: + result.add(d) + + return result + +def extract_plain_domain(line: str) -> Optional[str]: + line = line.strip() + if not line: + return None + + if line.startswith(("!", "#", "[")): + return None + + if line.startswith("@@"): + # Ignore whitelist rules + return None + + if line.startswith(("||", "|")): + return None + + if any(x in line for x in [" ", "\t", "/", "^", "$"]): + return None + + return normalize_domain(line) + +def extract_domains(text: str) -> Set[str]: + domains: Set[str] = set() + + for raw_line in io.StringIO(text): + line = raw_line.strip() + + if not line: + continue + + # Skip comments and metadata + if line.startswith(("!", "#", "[")): + continue + + # Skip ABP whitelist rules + if line.startswith("@@"): + continue + + # 1) ABP syntax + d = extract_from_abp_line(line) + if d: + domains.add(d) + continue + + # 2) hosts file syntax + hosts_domains = extract_from_hosts_line(line) + if hosts_domains: + domains.update(hosts_domains) + continue + + # 3) plain domain syntax + d = extract_plain_domain(line) + if d: + domains.add(d) + continue + + return domains + +def build_output(urls: Iterable[str], output_file: str) -> int: + all_domains: Set[str] = set() + + for url in urls: + print(f"[INFO] Downloading: {url}", file=sys.stderr) + try: + text = download_text(url) + domains = extract_domains(text) + print(f"[INFO] Domains found: {len(domains)}", file=sys.stderr) + all_domains.update(domains) + except Exception as exc: + print(f"[ERROR] {url}: {exc}", file=sys.stderr) + + sorted_domains = sorted(all_domains) + + with open(output_file, "w", encoding="utf-8", newline="\n") as f: + for domain in sorted_domains: + f.write(f"0.0.0.0 {domain}\n") + + return len(sorted_domains) + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Download multiple blocklists from URLs and create one MikroTik adlist in '0.0.0.0 domain' format." + ) + parser.add_argument( + "-u", + "--url", + action="append", + dest="urls", + help="Blocklist URL. Can be used multiple times.", + ) + parser.add_argument( + "-o", + "--output", + default="adlist.txt", + help="Output file.", + ) + return parser.parse_args() + +def main() -> int: + args = parse_args() + urls = args.urls or DEFAULT_URLS + + if not urls: + print("Error: no URLs were provided. Use -u URL or edit DEFAULT_URLS.", file=sys.stderr) + return 1 + + count = build_output(urls, args.output) + print(f"[OK] Output written to: {args.output}", file=sys.stderr) + print(f"[OK] Total unique domains: {count}", file=sys.stderr) + return 0 + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file