import asyncio
import logging
import re
import urllib.request
import xml.etree.ElementTree as ET
from html import unescape
from html.parser import HTMLParser
import hashlib
from meshcore import MeshCore, EventType
# =====================
# Config
# =====================
SERIAL_PORT = "COM4" # change this to your serial port
CHANNEL_IDXS = [6]#[5, 6] # change this to the index of your target channel
RSS_URL = "https://data.emergency.vic.gov.au/Show?pageId=getIncidentRSS"
POLL_INTERVAL_SEC = 300 # poll interval (seconds)
# Only send items whose description (plain text) contains this keyword (case-insensitive)
KEYWORDS_REQUIRED = ["BUSHFIRE"]
# Only send these fields from the RSS description
FIELDS_TO_SEND = [
"Type",
"Fire District",
"Location",
"Latitude",
"Longitude",
"Status",
"Size",
]
BLOCKED_STATUSES = [
"Under Control",
"Safe",
]
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger("serial_rssbot")
class _HTMLTextExtractor(HTMLParser):
def __init__(self):
super().__init__()
self._chunks: list[str] = []
def handle_data(self, data: str) -> None:
if data:
self._chunks.append(data)
def get_text(self) -> str:
return "".join(self._chunks)
def _strip_html(html: str) -> str:
"""Convert HTML-ish fragments into plain text."""
if not html:
return ""
# Normalize
into newlines so field parsing is reliable.
html = re.sub(r"<\s*br\s*/?\s*>", "\n", html, flags=re.IGNORECASE)
parser = _HTMLTextExtractor()
try:
parser.feed(html)
parser.close()
text = parser.get_text()
except Exception:
text = re.sub(r"<[^>]+>", "\n", html)
text = unescape(text)
# Normalize whitespace but keep newlines as separators.
text = text.replace("\r", "\n")
text = re.sub(r"[ \t\f\v]+", " ", text)
text = re.sub(r"\n+", "\n", text)
return text.strip()
def _fetch_rss_bytes(url: str) -> bytes:
req = urllib.request.Request(
url,
headers={
"User-Agent": "meshcore-rss-bot/1.0",
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.8",
},
method="GET",
)
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.read()
def extract_fields(raw_html: str) -> dict[str, str]:
"""
Extract desired fields from the HTML description.
We strip HTML to text with newlines, then parse lines like "Field: value".
"""
text = _strip_html(raw_html)
lines = [ln.strip() for ln in text.split("\n") if ln.strip()]
result: dict[str, str] = {}
for ln in lines:
m = re.match(r"^([^:]+):\s*(.*)$", ln)
if not m:
continue
key = m.group(1).strip()
val = m.group(2).strip()
# Match desired fields case-insensitively
for wanted in FIELDS_TO_SEND:
if key.lower() == wanted.lower():
result[wanted] = val
break
return result
def _parse_rss_items(xml_bytes: bytes) -> list[dict[str, str]]:
"""Return list of items with keys: id, description, raw_description."""
items: list[dict[str, str]] = []
root = ET.fromstring(xml_bytes)
# RSS 2.0: - ...
channel = root.find("channel") if root.tag.lower().endswith("rss") else root
if channel is None:
return items
for it in channel.findall("item"):
title = (it.findtext("title") or "").strip()
link = (it.findtext("link") or "").strip()
guid = (it.findtext("guid") or "").strip()
pub_date = (it.findtext("pubDate") or "").strip()
desc_html = it.findtext("description") or ""
desc_text = _strip_html(desc_html)
# stable_id = (guid or link or f"{title}|{pub_date}" or desc_text).strip()
base_id = (guid or link or f"{title}|{pub_date}").strip()
# fingerprint only the fields you output
fields = extract_fields(desc_html)
parts = []
for k in FIELDS_TO_SEND:
v = fields.get(k, "")
parts.append(f"{k}={v}")
fingerprint = hashlib.sha1("|".join(parts).encode("utf-8")).hexdigest()[:12]
stable_id = f"{base_id}|{fingerprint}"
items.append(
{
"id": stable_id,
"description": desc_text, # plain text for keyword filtering
"raw_description": desc_html, # original HTML-ish description
}
)
return items
def build_filtered_message(item: dict[str, str]) -> str | None:
# Keyword filter (case-insensitive) on plain text description
desc_text_lower = (item.get("description") or "").lower()
if not any(k.lower() in desc_text_lower for k in KEYWORDS_REQUIRED):
return None
fields = extract_fields(item.get("raw_description") or "")
if not fields:
return None
status_norm = fields.get("Status", "").strip().lower()
blocked_norm = [b.lower() for b in BLOCKED_STATUSES]
if status_norm in blocked_norm:
return None
# if any(b.lower() in status for b in BLOCKED_STATUSES):
# return None
# Keep order exactly as FIELDS_TO_SEND, skip missing
# parts = []
# for k in FIELDS_TO_SEND:
# v = fields.get(k)
# if v:
# parts.append(f"{v}")
parts = []
lat = fields.get("Latitude")
lon = fields.get("Longitude")
for k in FIELDS_TO_SEND:
if k in ("Latitude", "Longitude"):
continue
v = fields.get(k)
if v:
parts.append(v)
# append formatted lat,lon at the end
if lat and lon:
parts.append(f"{lat},{lon}")
if not parts:
return None
return " | ".join(parts)
async def _fetch_items_async() -> list[dict[str, str]]:
xml_bytes = await asyncio.to_thread(_fetch_rss_bytes, RSS_URL)
return _parse_rss_items(xml_bytes)
async def main() -> None:
meshcore = await MeshCore.create_serial(SERIAL_PORT, debug=True)
print(f"Connected on {SERIAL_PORT}")
# await meshcore.start_auto_message_fetching()
seen_ids: set[str] = set()
# Prime seen set so we do not spam old items on startup
try:
initial_items = await _fetch_items_async()
for it in initial_items:
if it.get("id"):
seen_ids.add(it["id"])
_LOGGER.info("Primed %d existing RSS items as seen.", len(seen_ids))
except Exception as ex:
_LOGGER.warning("Failed to prime RSS items: %s", ex)
try:
while True:
try:
items = await _fetch_items_async()
# Feeds are usually newest-first. Send unseen in chronological order.
new_items = [it for it in items if it.get("id") and it["id"] not in seen_ids]
for it in reversed(new_items):
msg_text = build_filtered_message(it)
# Mark as seen even if it does not match filter, so we do not re-check forever
seen_ids.add(it["id"])
if not msg_text:
continue
# _LOGGER.info("Sending filtered RSS item to channel %s", CHANNEL_IDX)
# result = await meshcore.commands.send_chan_msg(CHANNEL_IDX, msg_text)
# if result.type == EventType.ERROR:
# _LOGGER.error("Error sending RSS message: %s", result.payload)
for ch in CHANNEL_IDXS:
_LOGGER.info("Sending filtered RSS item to channel %s", ch)
result = await meshcore.commands.send_chan_msg(ch, msg_text)
if result.type == EventType.ERROR:
_LOGGER.error("Error sending RSS message to channel %s: %s", ch, result.payload)
await asyncio.sleep(5) # seconds
await asyncio.sleep(5)
except Exception as ex:
_LOGGER.warning("RSS poll failed: %s", ex)
await asyncio.sleep(POLL_INTERVAL_SEC)
except KeyboardInterrupt:
print("Stopping...")
finally:
await meshcore.stop_auto_message_fetching()
await meshcore.disconnect()
print("Disconnected")
if __name__ == "__main__":
asyncio.run(main())