97 lines
3.7 KiB
Python
97 lines
3.7 KiB
Python
# Kyler Olsen
|
|
# July 2025
|
|
import requests
|
|
import bs4
|
|
import datetime
|
|
import json
|
|
|
|
# This script downloads the final exam schedule from Snow College's registrar page
|
|
# and parses it to extract the semester and exam times for each course.
|
|
|
|
# Last tested for Fall 2025 on 2025-07-10
|
|
|
|
def parse_days(days_str):
|
|
# Mon, Wed, or Fri
|
|
# Tue or Thu
|
|
days = days_str.split(" ")
|
|
days = [day.strip(',') for day in days if day.strip(',')]
|
|
day_map = {
|
|
"Mon": "M",
|
|
"Tue": "T",
|
|
"Wed": "W",
|
|
"Thu": "Th",
|
|
"Fri": "F",
|
|
}
|
|
return [day_map[day] for day in days if day in day_map]
|
|
|
|
def parse_class_time(time_str) -> tuple[datetime.time, datetime.time]:
|
|
# 7:00 or 7:30am
|
|
# *5:00pm or later
|
|
am_or_pm = "am" if "am" in time_str else "pm"
|
|
time_parts = time_str.replace("*", "").replace(am_or_pm, "").split(" ")
|
|
start_time = datetime.datetime.strptime(time_parts[0] + am_or_pm, "%I:%M%p").time()
|
|
if "later" in time_str:
|
|
end_time = datetime.time(23, 59)
|
|
else:
|
|
end_time = datetime.datetime.strptime(time_parts[-1] + am_or_pm, "%I:%M%p").time()
|
|
return start_time, end_time
|
|
|
|
def parse_schedule(content):
|
|
soup = bs4.BeautifulSoup(content, 'html.parser')
|
|
semester = soup.select_one("h2 strong").get_text(strip=True) # type: ignore
|
|
year = semester.split()[-1]
|
|
# schedule = {}
|
|
schedule = []
|
|
date = datetime.date(int(year), 1, 1)
|
|
earliest_date = datetime.date(int(year)+1, 1, 1)
|
|
for row in soup.select("tbody tr"):
|
|
date_label = row.find("th")
|
|
new_date = f"{date_label.get_text(strip=True)}, {year}" if date_label and date_label.get_text(strip=True) else None
|
|
if new_date:
|
|
new_date = new_date.replace(' ,', ',')
|
|
date = datetime.datetime.strptime(new_date, "%A, %B %d, %Y").date()
|
|
earliest_date = min(earliest_date, date)
|
|
cols = row.find_all("td")
|
|
if len(cols) == 3:
|
|
days = cols[0].get_text(strip=True)
|
|
class_time = cols[1].get_text(strip=True)
|
|
exam_time = cols[2].get_text(strip=True)
|
|
exam_datetime = datetime.datetime.strptime(f"{date} {exam_time.split(' - ')[0]}", "%Y-%m-%d %I:%M%p")
|
|
for day in parse_days(days):
|
|
start_time, end_time = parse_class_time(class_time)
|
|
# schedule[(day, start_time, end_time)] = exam_datetime
|
|
schedule.append({
|
|
"day": day,
|
|
"start_time": start_time.strftime("%H:%M"),
|
|
"end_time": end_time.strftime("%H:%M"),
|
|
"exam_datetime": exam_datetime.strftime("%Y-%m-%d %H:%M")
|
|
})
|
|
else: print("Unexpected number of columns in row:", len(cols))
|
|
return semester, schedule, earliest_date.strftime("%Y-%m-%d")
|
|
|
|
def download_schedule(url):
|
|
response = requests.get(url)
|
|
if response.status_code == 200:
|
|
return response.content
|
|
else:
|
|
raise Exception(f"Failed to retrieve schedule. Status code: {response.status_code}")
|
|
|
|
if __name__ == "__main__":
|
|
url = "https://www.snow.edu/offices/registrar/final_exams.html"
|
|
content = download_schedule(url)
|
|
# with open("final_exams.html", "r") as file:
|
|
# content = file.read()
|
|
semester, schedule, earliest_date = parse_schedule(content)
|
|
with open("www/final_exams.json", "w") as file:
|
|
json.dump({
|
|
"url": url,
|
|
"semester": semester,
|
|
"updated_date": datetime.date.today().strftime("%Y-%m-%d"),
|
|
"earliest_date": earliest_date,
|
|
"schedule": schedule,
|
|
}, file, indent=4)
|
|
# print("Semester:", semester)
|
|
# print("Schedule:")
|
|
# for (day, start_time, end_time), date in schedule.items():
|
|
# print(f"{day} {start_time} - {end_time}: {date}")
|