Clarion02WebpageSniffer/WebPageUpdateChecker.py

import requests
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import time
import subprocess

URL = "http://clarion02.physics.fsu.edu/~clarion02/index.html"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (date-monitor)"
}

previousTime = "time"

def scrape_date_time():
    global previousTime
    try:
        response = requests.get(URL, headers=HEADERS, timeout=10)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"[ERROR] Request failed: {e}")
        return

    soup = BeautifulSoup(response.text, "html.parser")

    # Find the <p> that contains the date/time and <pre>
    p_tag = soup.find("p")
    if not p_tag:
        print("Date/Time not found (no <p> tag).")
        return

    date_text_parts = []

    for child in p_tag.children:
        # Stop when <pre> is encountered
        if getattr(child, "name", None) == "pre":
            break

        if isinstance(child, NavigableString):
            text = child.strip()
            if text:
                date_text_parts.append(text)

    if date_text_parts:
        date_time = " ".join(date_text_parts)
        #print(date_time)
    else:
        print("Date/Time not found.")

    if date_time == previousTime:
        #print("ALERT!!!!!!!!!!!!!!!!!!!")
        subprocess.run(["./sendEmailAlert.sh"])
        #print("Ran the script*******************")
    else:
        previousTime = date_time


if __name__ == "__main__":
    try:
        while True:
            scrape_date_time()
            time.sleep(900)
    except KeyboardInterrupt:
        print("\nStopped.")