-
Notifications
You must be signed in to change notification settings - Fork 0
/
auroracle.py
497 lines (417 loc) · 16.9 KB
/
auroracle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# .:: AurOracle ::.
# A utility to generate course schedules for University of Manitoba students.
#
# Written by Sam Shelton ([email protected])
import os
import time
import threading
import urllib2
import lxml.html as lh
import itertools
import argparse
import re
from math import factorial
import ssl
from modules.sorting import quicksort_sections,get_sorted_daylists
from modules.sorting import compress,prefer_free
from modules.classes import Section,Course
RATE_LIMIT = 1 # Max calls to get_course (and web requests) per second
terms = {
"fall15":"201590",
"winter16":"201610",
"summer16":"201650",
"fall16":"201690",
"winter17":"201710",
"summer17":"201750",
"fall17":"201690",
"winter18":"201710",
"summer18":"201750"
}
# Create bypass context
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
"""
The function to print schedule data
to the outfile. Debug and error messages
instead use print().
"""
def print_write(text, verbose=False):
print >> outfile, text
"""
Print a combination of courses to the
outfile as a calendar.
"""
def print_calendar(comb):
print_write("********************* CALENDAR **********************")
print_write("| MONDAY || TUESDAY ||WEDNESDAY||THURSDAY || FRIDAY |")
# Create day lists (the columns of the calendar)
day_lists = get_sorted_daylists(comb)
# Convert to iterators
day_iters = [iter(l) for l in day_lists]
still_printing = True
while still_printing:
l1 = "" #line 1
l2 = "" #line 2
l3 = "" #line 3
stops = 0 # number of times nothing was printed
print_write("|---------||---------||---------||---------||---------|")
for day_iter in day_iters:
try:
section = day_iter.next()
l1day = section.root_course.name.ljust(9)
l2day = section.name.center(9)
l3day = time.strftime("%I:%M %p",section.start_time).center(9)
except StopIteration:
stops+=1
l1day = " "
l2day = " "
l3day = " "
l1 += "|" + l1day + "|"
l2 += "|" + l2day + "|"
l3 += "|" + l3day + "|"
if(stops == 5):
still_printing = False
if still_printing:
print_write(l1)
print_write(l2)
print_write(l3)
"""
DECORATOR: Prevents function func
from being called more than max times
per second.
Used to rate limit get_course calls.
"""
def rate_limit(max):
lock = threading.Lock()
minInterval = 1.0 / float(max)
def decorate(func):
delta = [0.0]
def rate_limited_function(args,*kargs):
lock.acquire()
elapsed = time.clock() - delta[0]
remaining = minInterval - elapsed
if remaining>0:
time.sleep(remaining)
lock.release()
ret = func(args,*kargs)
delta[0] = time.clock()
return ret
return rate_limited_function
return decorate
"""
Prints a combination of sections
to the outfile.
"""
def print_section_comb(comb):
print_write("************** SCHEDULE *******************")
for section in comb:
print_write(section.root_course.name + " : " + section.name + " " + time.strftime("%I:%M %p",section.start_time) + " - " + time.strftime("%I:%M %p",section.end_time) + " " + section.day)
"""
Retrieves the course from Aurora.
"""
@rate_limit(RATE_LIMIT)
def get_course(name, term, earliest, latest, offlinemode):
name = name.upper() # Note that name could be "MATH-1500" or "MATH-1500-A05-A06" etc.
name_parts = name.split(" ")
subj = name_parts[0]
crse = name_parts[1]
course = Course(subj + " " + crse)
specific_sections = []
# Sections specified?
for sec_i in range(2, len(name_parts)):
specific_sections.append(name_parts[sec_i])
# Retrieval
dpath = "cache/"+term+"/"
fpath = dpath + subj+"-"+crse+".html"
# Is it cached?
if os.path.exists(fpath):
html = lh.parse(fpath)
root = html.getroot()
elif not offlinemode:
url = "https://aurora.umanitoba.ca/banprod/bwckctlg.p_disp_listcrse?term_in="+term+"&subj_in="+subj+"&crse_in="+crse+"&schd_in=F02"
request = urllib2.Request(url)
#request.add_header('User-Agent', "Mozilla/5.0 (X11; U; Linux i686) AppleWebKit/536.16 (KHTML, like Gecko) Chrome/35.0.2049.59 Safari/536.16")
#request.add_header('Referer', "https://aurora.umanitoba.ca/banprod/bwckctlg.p_disp_course_detail?cat_term_in="+term+"&subj_code_in="+subj+"&crse_numb_in=" + crse)
response = urllib2.urlopen(request, timeout=30, context=ctx)
html = lh.parse(response)
root = html.getroot()
if root is None:
print("Fatal error: failed to retrieve data for course "+name)
exit()
# Add to cache
data = lh.tostring(root)
if not os.path.exists(os.path.dirname(fpath)):
os.makedirs(os.path.dirname(fpath))
with open(fpath,'wb') as f:
f.write(data)
else:
print("Offline mode failed: Course " + name + " not found in /cache.")
exit()
nodes = {}
"""
"nodes" refers to the entries in the section table.
The table looks like this:
<tr><th><a>section title</a></th></tr>
<tr><td><a>section body</td></tr>
...
We find all title elements (tr/th/a) and
associate their titles to the bodies (tr/td)
(which are two levels up and one sibling down)
using the nodes dict.
The elements on Aurora don't use IDs, so it's safest
to use the long summaries.
"""
# Downloaded HTML files may have tbody elements inserted by the browser.
if len(root.xpath(".//table[@summary='This layout table is used to present the sections found']/tr")) == 0:
tbody = "tbody/"
else:
tbody = ""
# NODE EXTRACTION
titlenodes = root.xpath(".//table[@summary='This layout table is used to present the sections found']/"+tbody+"tr/th[@class='ddtitle']/a")
for title_a in titlenodes:
body_tr = title_a.getparent().getparent().getnext() # From tr/th/a to tr/ and the next tr is the body of the entry.
tablenode = body_tr.xpath("./td/table[@summary='This table lists the scheduled meeting times and assigned instructors for this class..']/"+tbody+"tr[2]")[0]
nodes[title_a.text] = tablenode
for title,tablenode in nodes.items():
# Section
section_num = title[-3:]
# Is a section specified?
if len(specific_sections) > 0 and section_num not in specific_sections:
continue
# Only allow courses and labs
if not (section_num[0] == "A" or section_num[0] == "B"):
continue
# Check for exclusion
if args.xclude and (course.name + " " + section_num) in args.xclude:
continue
# Day
section_day = tablenode.find("./td[3]").text
if not section_day or ("TBD" in section_day) or ("TBA" in section_day):
continue
# Time
section_time = tablenode.find("./td[2]").text
if not section_time:
continue
times = re.split(" *- *", section_time)
start_time = time.strptime(times[0], "%I:%M %p")
end_time = time.strptime(times[1], "%I:%M %p")
# Earliest / latest checking
if (earliest and (start_time < earliest)) or (latest and (end_time > latest)):
continue
# It's a course
if section_num[0] == "A":
course.sections.append(Section(section_num, start_time, end_time, section_day, course))
# It's a lab
elif section_num[0] == "B":
#create lab if not exists
if not course.haslab:
course.haslab = True
course.lab = Course(course.name)
course.lab.sections.append(Section(section_num, start_time, end_time, section_day, course))
return course
"""
MAIN FUNCTION
"""
def get_valid_combs(number, term_string, m_course_strings, p_course_strings, earliest, latest, offlinemode):
valid_combs = []
m_courses = [] # A list of all mandatory courses. All are included in each iteration below.
p_courses = [] # A list of all potential courses. Used to fill up remaining spots, though all combinations are exausted.
for coursename in m_course_strings:
course = get_course(coursename, term_string, earliest, latest, offlinemode)
m_courses.append(course)
for coursename in p_course_strings:
course = get_course(coursename, term_string, earliest, latest, offlinemode)
p_courses.append(course)
p_combs = itertools.combinations(p_courses, number-len(m_courses)) # set of tuples of possible ways to fill remaining spots
# This is just looping nCr times, so the length of p_combs.
n = len(p_courses)
r = number-len(m_courses)
for i in range(factorial(n) // factorial(r) // factorial(n-r)):
courselist = list(next(p_combs)) + m_courses
assert(number == len(courselist)) #debugging
combs = generate_valid_combinations(courselist) # (local) list of possible combinations of courses.
for comb in combs:
if len(comb) > 0:
valid_combs.append(comb)
return valid_combs
"""
Given a list of courses, generates a list of each
way to take each course (each section), and then
generates the Cartesian product of all of those
subsets. It iterates over the results and returns
those that are valid.
"""
def generate_valid_combinations(courselist):
valid_combs = [] # list of valid combinations
s_lists = [] # list of section lists of each course
for course in courselist:
s_lists.append(course.sections)
# Lab?
if (course.haslab):
s_lists.append(course.lab.sections)
section_combs = itertools.product(*s_lists) # set of tuples of ways to combine courses by section
count = 0
try:
while True:
section_comb = next(section_combs)
if is_valid_combination(section_comb):
valid_combs.append(section_comb)
count += 1
if args.cap and count >= args.cap:
break
except StopIteration:
pass
return valid_combs
"""
Checks combinations for conflicts
"""
def is_valid_combination(sectionlist):
for section in sectionlist: # loop through each section in the combination
for othersection in set(sectionlist) ^ set([section]): # loop through all sections but this one
if section.conflicts_with(othersection):
return False
return True
def runwizard():
# args.term
print("Which term do you want to generate schedules for? (example: fall15)")
while args.term == None:
try:
i = raw_input("> ").lower().replace(" ", "")
if i not in terms.keys():
raise ValueError
args.term = i
except ValueError:
print("Not a valid option.")
# args.number
print("How many courses are you taking for this term? (example: 3)")
while args.number == None:
try:
i = int(raw_input("> "))
if i == 0:
raise ValueError
args.number = i
except ValueError:
print("Not a valid number.")
# args.must
print("List the courses you know you must take, separated by spaces. (example: MATH-1300 COMP-1010)")
while args.must == None:
i = raw_input("> ").split(" ")
if not i or len(i) <= args.number:
args.must = i
else:
print("Number of courses provided does not match number specified.")
# args.would
if len(args.must) < args.number:
print("List any amount of courses that you'd take to fill the remaining "+str(args.number - len(args.must))+" slots. (example: GEOL-1420 FREN-1152)")
while args.would == None:
i = raw_input("> ").split(" ")
if (len(i) + len(args.must)) >= args.number:
args.would = i
else:
print("You need at least enough to fill the remaining slots.")
# args.earliest
print("What is the earliest you want to be in class? If you don't care, just leave it blank. (format: 8:30 AM)")
i = raw_input("> ")
if i:
args.earliest = i
# args.latest
print("What is the latest you want to be in class? If you don't care, just leave it blank. (format: 3:30 PM)")
i = raw_input("> ")
if i:
args.latest = i
if __name__ == "__main__":
# PARSING
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--number', type=int)
parser.add_argument('-t', '--term')
parser.add_argument('-f', '--file')
parser.add_argument('-m', '--must', nargs='+')
parser.add_argument('-w', '--would', nargs='+')
parser.add_argument('-x', '--xclude', nargs='+')
parser.add_argument('-o', '--offline', action='store_true')
parser.add_argument('-v', '--verbose', action='store_true')
parser.add_argument('-e', '--earliest')
parser.add_argument('-l', '--latest')
parser.add_argument('-c', '--cap', type=int)
# Optimization args
parser.add_argument('--prefer-free-days', action='store_true')
parser.add_argument('--no-compression', action='store_true')
args = parser.parse_args()
# Wizard
if not any(vars(args).values()):
runwizard()
if not args.must and not args.would:
print("You must specify at least one course.")
exit()
# Error checking
if not args.number:
if args.would:
print("Because you used the --would parameter, you must specify a number of courses desired. \nExample: '--number 5'")
exit()
else:
args.number = len(args.must)
# Course parsing
if not args.must:
args.must = []
if not args.would:
args.would = []
args.must = [i.replace("-", " ") for i in args.must]
args.would = [i.replace("-", " ") for i in args.would]
if (len(args.would) + len(args.must) < args.number or len(args.must) > args.number):
print("The number of courses specified does not match the number desired.")
exit()
# Exclusion parsing
if args.xclude:
args.xclude = [i.replace("-", " ") for i in args.xclude]
# Convert term names
if not args.term:
print("You must specify an academic term. \nExample: '--term winter16'")
exit()
args.term = args.term.lower()
if args.term not in terms:
print("Invalid academic term. \nExample: '--term winter16'")
exit()
args.term = terms[args.term]
# Earliest / latest time parsing
if args.earliest:
args.earliest = time.strptime(args.earliest, "%I:%M %p")
if args.latest:
args.latest = time.strptime(args.latest, "%I:%M %p")
# File out
if args.file:
args.file = args.file.replace(".txt","") + ".out.txt"
if os.path.dirname(args.file) and not os.path.exists(os.path.dirname(args.file)):
os.makedirs(os.path.dirname(args.file))
else:
# Construct name
args.file = " ".join(args.must + args.would)
args.file = args.term + "-" + args.file
args.file = args.file[:250] + (args.file[250:] and '..')
args.file = args.file.replace(" ","-") + ".out.txt"
outfile = file(args.file, 'w')
# Main call
print("Generating schedules...")
valid_combs = get_valid_combs(args.number, args.term, args.must, args.would, args.earliest, args.latest, args.offline)
if len(valid_combs) == 0:
print("No courses could be generated. Perhaps your request was too specific?")
exit()
# Pre-schedule output
print_write("- Generated "+str(len(valid_combs))+" schedules.")
# Optimization
print("Optimizing...")
if not args.no_compression:
valid_combs = compress(valid_combs)
print_write("- These schedules are sorted by most compression to least compression.")
if args.prefer_free_days:
valid_combs = prefer_free(valid_combs)
print_write("- Schedules with free days are listed first. (--prefer-free-days)")
# Schedule output
print("Writing to file...")
print_write("\n\n")
for comb in valid_combs:
print_section_comb(comb)
print_write("\n")
print_calendar(comb)
print_write("\n\n\n\n\n")
print("Completed. Outputted to \"" + args.file + "\"")