snow-finals-schedule/download.py

97 lines
3.7 KiB
Python

# Kyler Olsen
# July 2025
import requests
import bs4
import datetime
import json
# This script downloads the final exam schedule from Snow College's registrar page
# and parses it to extract the semester and exam times for each course.
# Last tested for Fall 2025 on 2025-07-10
def parse_days(days_str):
# Mon, Wed, or Fri
# Tue or Thu
days = days_str.split(" ")
days = [day.strip(',') for day in days if day.strip(',')]
day_map = {
"Mon": "M",
"Tue": "T",
"Wed": "W",
"Thu": "Th",
"Fri": "F",
}
return [day_map[day] for day in days if day in day_map]
def parse_class_time(time_str) -> tuple[datetime.time, datetime.time]:
# 7:00 or 7:30am
# *5:00pm or later
am_or_pm = "am" if "am" in time_str else "pm"
time_parts = time_str.replace("*", "").replace(am_or_pm, "").split(" ")
start_time = datetime.datetime.strptime(time_parts[0] + am_or_pm, "%I:%M%p").time()
if "later" in time_str:
end_time = datetime.time(23, 59)
else:
end_time = datetime.datetime.strptime(time_parts[-1] + am_or_pm, "%I:%M%p").time()
return start_time, end_time
def parse_schedule(content):
soup = bs4.BeautifulSoup(content, 'html.parser')
semester = soup.select_one("h2 strong").get_text(strip=True) # type: ignore
year = semester.split()[-1]
# schedule = {}
schedule = []
date = datetime.date(int(year), 1, 1)
earliest_date = datetime.date(int(year)+1, 1, 1)
for row in soup.select("tbody tr"):
date_label = row.find("th")
new_date = f"{date_label.get_text(strip=True)}, {year}" if date_label and date_label.get_text(strip=True) else None
if new_date:
new_date = new_date.replace(' ,', ',')
date = datetime.datetime.strptime(new_date, "%A, %B %d, %Y").date()
earliest_date = min(earliest_date, date)
cols = row.find_all("td")
if len(cols) == 3:
days = cols[0].get_text(strip=True)
class_time = cols[1].get_text(strip=True)
exam_time = cols[2].get_text(strip=True)
exam_datetime = datetime.datetime.strptime(f"{date} {exam_time.split(' - ')[0]}", "%Y-%m-%d %I:%M%p")
for day in parse_days(days):
start_time, end_time = parse_class_time(class_time)
# schedule[(day, start_time, end_time)] = exam_datetime
schedule.append({
"day": day,
"start_time": start_time.strftime("%H:%M"),
"end_time": end_time.strftime("%H:%M"),
"exam_datetime": exam_datetime.strftime("%Y-%m-%d %H:%M")
})
else: print("Unexpected number of columns in row:", len(cols))
return semester, schedule, earliest_date.strftime("%Y-%m-%d")
def download_schedule(url):
response = requests.get(url)
if response.status_code == 200:
return response.content
else:
raise Exception(f"Failed to retrieve schedule. Status code: {response.status_code}")
if __name__ == "__main__":
url = "https://www.snow.edu/offices/registrar/final_exams.html"
content = download_schedule(url)
# with open("final_exams.html", "r") as file:
# content = file.read()
semester, schedule, earliest_date = parse_schedule(content)
with open("www/final_exams.json", "w") as file:
json.dump({
"url": url,
"semester": semester,
"updated_date": datetime.date.today().strftime("%Y-%m-%d"),
"earliest_date": earliest_date,
"schedule": schedule,
}, file, indent=4)
# print("Semester:", semester)
# print("Schedule:")
# for (day, start_time, end_time), date in schedule.items():
# print(f"{day} {start_time} - {end_time}: {date}")