From 71d3bf5194b90661f424a45915a27db95cda6a76 Mon Sep 17 00:00:00 2001 From: Dominic Bieri Date: Wed, 18 Mar 2020 19:03:06 +0100 Subject: [PATCH 1/5] Added parallel downloads, ranges, resuming, more TUI (not flying\!) and extended readme --- .gitignore | 1 + README.md | 26 ++++- vo-scraper.py | 307 ++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 259 insertions(+), 75 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..56ffde9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +Lecture Recordings diff --git a/README.md b/README.md index a166bd1..b5ba92e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -#### ⚠ **Important**: In order to not overburden ETH servers during the current situation, I highly recommend only downloading videos outside of peak hours, i.e. early in the morning or late at night ⚠ +#### ⚠ **Important**: In order not to overload ETH servers (like UZH) in the current situation, I highly recommend only downloading videos outside of peak hours, i.e. early in the morning or late at night ⚠ *** @@ -32,6 +32,30 @@ To see a list of possible arguments check **For protected lectures** the vo-scraper will ask for your login credentials before downloading the video(s). +### Q: How can I choose what lectures to download? + +#### A: You will be prompted with a list of lectures available for downloading. + +You can either specify single lectures by typing their indices separated by space, or add ranges in haskell syntax, like `1..5` for `1 2 3 4`. +Ranges are upper-bound-exclusive, so `min..max` meaning `[min, max[`. Custom steps, eg. every other week, are supported too: `1..3..10` + +Videos are downloaded in parallel and cancelled downloads are resumed whenever possible. + +You may find these ranges useful: + +| Range | Equivalent | In Words | +| --------| -- | -- | +| `1..5` | `1 2 3 4` | Lecture one to four (the fifth) +| `..5` | `0 1 2 3 4` | All lectures up to four (the fifth) +| `3..` | `3 4 5 6 [...]` | All lectures starting from three (the fourth) +| `..` | all lectures | well... +| `2..4..7` | `2 4 6` | Every other lecture from two to six +| `..2..7` | `0 2 4 6` | Every other lecture until six (when I started paying attention) +| `1..3..` | `1 3 5 [...]` | Every other lecture starting from the second (eg. all the second lectures of the week) +| `..3..` | `0 3 6 [...]` | Every third lecture, starting from the beginning +(_⚠ A trailing 0 in a range is interpreted as "until the end"_) + + ### Q: How do I pass a file with links to multiple lectures? #### A: Use `--file ` diff --git a/vo-scraper.py b/vo-scraper.py index a4ae064..21b96f8 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -19,6 +19,9 @@ Check README.md and LICENSE before using this program. import urllib.request, os, sys, http.client from urllib.request import Request, urlopen from sys import platform +from multiprocessing import Pool, Array +import multiprocessing as mp +import queue, traceback # for mp.Queue import json # For handling json files import argparse # For parsing commandline arguments import getpass # For getting the user password @@ -110,6 +113,8 @@ def print_information(str, type='info', verbose_only=False): type -- The type of information: {info, warning, error} verbose_only -- If true the string will only be printed when the verbose flag is set. Useful for printing debugging info. + + returns: True if line(s) printed, false otherwise """ global print_type_dict @@ -120,9 +125,12 @@ def print_information(str, type='info', verbose_only=False): else: # Print with tag print(print_type_dict[type], str) + return True elif verbose: # Always print with tag print(print_type_dict[type],str) + return True + return False def get_credentials(user, passw): """Gets user credentials and returns them @@ -251,6 +259,8 @@ def vo_scrapper(vo_link, user, passw): global link_counter + global downloader_statuses, print_queue + # Remove `.html` file extension if vo_link.endswith('.html'): vo_link = vo_link[:-5] @@ -273,11 +283,17 @@ def vo_scrapper(vo_link, user, passw): else: # Let user pick videos try: - choice = [int(x) for x in input( - "Enter numbers of the above lectures you want to download separated by space (e.g. 0 5 12 14)\nJust press enter if you don't want to download anything from this lecture\n" + def mkrng(lst): return range(*((lst if len(lst) < 2 or lst[1] != 0 else (lst[0], len(vo_json_data['episodes']))) if len(lst) < 3 else (lst[0], lst[2] if lst[2] != 0 else len(vo_json_data['episodes']), lst[1]-lst[0]))) + + choice = set([flat for sublist in + [[int(x)] if x.isnumeric() else + mkrng(tuple(map(lambda prs:int(prs) if not prs == '' else 0, x.split("..")))) + for x in input( + "Enter numbers or ranges of the lectures you want to download, separated by space (e.g. ..3 3..5..10 12 14..)\nJust press enter if you don't want to download anything from this lecture. Ranges are defined upper bound exclusive as in [from, to[\n" ).split()] - except: - print() + for flat in sublist]) + except e: + print(e) print_information("Exiting...") sys.exit() @@ -299,9 +315,16 @@ def vo_scrapper(vo_link, user, passw): print() print_information("Keyboard interrupt detected, skipping lecture", type='warning') return + + downloader_statuses = []; + downloader_jobs = []; + print_queue = mp.Queue(len(choice)*2) # Collect links and download them + print("Initializing, please wait...") + print("["+" "*len(choice)+"]\r[", end="") for item_nr in choice: + print("*", end='') # Get link to video metadata json file item = vo_json_data['episodes'][item_nr] video_info_link = video_info_prefix+item['id'] @@ -369,50 +392,184 @@ def vo_scrapper(vo_link, user, passw): print_information(video_src_link) # Otherwise download video else: - downloader(file_name, video_src_link) - -def downloader(file_name, video_src_link): + response_head = requests.head(video_src_link) + downloader_statuses.append(mp.Array('i', [-2, int(response_head.headers.get('content-length'))])) + downloader_jobs.append((file_name, video_src_link, len(downloader_jobs))) + sys.stdout.flush() + + nproc = min(len(downloader_jobs), 5) + infolines = [] + os.system('cls' if os.name=='nt' else 'clear') # clear console + with Pool(processes=nproc) as pool: + try: + results = [pool.apply_async(downloader, job, error_callback=lambda exc: infolines.append(((str(exc)+traceback.format_exc(),), {}))) for job in downloader_jobs] + done = False + flipper = 0 + while not done: + running, successful, error = [], [], [] + for i in range(len(results)): + if results[i].ready(): + if results[i].successful(): + successful.append(i) + else: + error.append(i) + else: + running.append(i) + done = (len(successful) + len(error)) == len(downloader_jobs) + print_results_table(downloader_jobs, results, running, successful, error, infolines, flipper % 60 == 0) + flipper += 1 + try: + while True: #generally roughly equivalent to time.sleep(0.2) + infolines.append(print_queue.get(True, 0.2)) + except queue.Empty: + pass + except KeyboardInterrupt: + print_results_table(downloader_jobs, results, running, successful, error, infolines, True, True) + print("STOPPING, please be patient") + pool.terminate() + + # cleanup + print_results_table(downloader_jobs, results, running, successful, error, infolines, True, True) + +# courtesy of https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size +def sizeof_fmt(num, suffix='B'): + for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: + if abs(num) < 1024.0: + return "%3.1f%s%s" % (num, unit, suffix) + num /= 1024.0 + return "%.1f%s%s" % (num, 'Yi', suffix) + +def print_results_table(jobs, results, running, successful, error, infolines, full = False, last = False): + global download_counter, skip_counter + global downloader_statuses + global old_stats + + new_stats = (len(running), len(successful), len(error), len(infolines)) + download_counter = sum([1 if (results[job].get()) else 0 for job in successful]) + skip_counter = len(successful) - download_counter + full = full or old_stats != new_stats or last + if full: + os.system('cls' if os.name=='nt' else 'clear') # clear console + print("="*10 + " STATUS " + "="*10) + print("Errors: %d" % len(error)) + print("Completed: %d" % len(successful)) + for job in successful: + data = jobs[job] + progress, total = downloader_statuses[data[2]] + print("{0}:\t{1}\t({2})".format( + ': '.join(data[0].split('/')[-2:]), + "Skipped" if progress == -1 else "Completed", + sizeof_fmt(total) + )) + else: + print("\r\x1B[%dA"% (len(running)+1),end='') + print("Downloading (%d / %d) ... (%s / %s total)\x1B[K" % ( + len(running), + len(jobs), + sizeof_fmt(sum([x for x, _ in downloader_statuses])), + sizeof_fmt(sum([x for _, x in downloader_statuses]))) + ) + for job in running: + data = jobs[job] + progress, total = downloader_statuses[data[2]] + if progress == -2: + print("{0}:\tQueued, {1}\x1B[K".format(': '.join(data[0].split('/')[-2:]), sizeof_fmt(total))) + else: + bars = int(50 * progress / total) + print("{0}:\t[{1}{2}{3}]\t({4} / {5})\x1B[K".format( + ': '.join(data[0].split('/')[-2:]), + '=' * max(bars-1,0), '>' if bars > 0 and bars < 50 else '', ' ' * (50-bars), + sizeof_fmt(progress), + sizeof_fmt(total) + )) + if full: + #print("\x1B7", end="") #save cursor + print("Additional Info:") + up = 0 + for args, kwds in infolines: + up += 1 if print_information(*args, **kwds) else 0 + #print("\x1B8", end="") + if not last: + print("\r\x1B[%dA"% (up+1), end='') + old_stats = new_stats + +def downloader(file_name, video_src_link, progress_array): """Downloads the video and gives progress information Keyword arguments: file_name -- Name of the file to write the data to video_src_link -- The link to download the data from + progress_array -- Shared array indicating progress + + returns: false if skipped """ - global download_counter - global skip_counter + #global download_counter + #global skip_counter + global downloader_statuses, print_queue + + com_q = print_queue + progress_array = downloader_statuses[progress_array] + nice_fname = ': '.join(file_name.split('/')[-2:]) - print_information("Video source: " + video_src_link, verbose_only=True) + com_q.put(((nice_fname + " Video source: " + video_src_link, ), {'verbose_only': True})) + com_q.put(((nice_fname + " PID: " + str(os.getpid()), ), {'verbose_only': True})) + + response_head = requests.head(video_src_link) + total_length = int(response_head.headers.get('content-length')) + partial_allowed = response_head.headers.get('accept-range') == 'bytes' + offset = 0 + skip = False + progress_array[0] = -1 + progress_array[1] = total_length # Check if file already exists if os.path.isfile(file_name): - print_information("download skipped - file already exists: " + file_name.split('/')[-1]) - skip_counter += 1 + fsize = os.stat(file_name).st_size + if fsize >= total_length: + com_q.put(((nice_fname + " download skipped - file already exists", ), {})) + else: + com_q.put(((nice_fname + " file may be corrupted: smaller than video length", ), {})) + skip = True + elif os.path.isfile(file_name+".part"): + fsize = os.stat(file_name+".part").st_size + if fsize < total_length: + com_q.put(((nice_fname + " incomplete video file already exists - resuming download of %s.part at %s" % (file_name.split('/')[-1], sizeof_fmt(fsize)), ), {})) + offset = fsize + else: + com_q.put(((nice_fname + " complete part file found: %s.part, fixing up" % file_name.split('/')[-1], ), {})) + os.rename(file_name+".part", file_name) + skip = True # Otherwise download it - else: + if not skip: # cf.: https://stackoverflow.com/questions/15644964/python-progress-bar-and-downloads with open(file_name+".part", "wb") as f: - response = requests.get(video_src_link, stream=True) + addl_headers = {} + if partial_allowed and offset != 0: + addl_headers['range'] = "bytes=%d-" % offset + f.seek(offset, os.SEEK_SET) + response = requests.get(video_src_link, stream=True, headers=addl_headers) total_length = response.headers.get('content-length') - - print_information("Downloading " + file_name.split('/')[-1] + " (%.2f" % (int(total_length)/1024/1024) + " MiB)") + + #print_information("Downloading " + file_name.split('/')[-1] + " (%.2f" % (int(total_length)/1024/1024) + " MiB)") if total_length is None: # We received no content length header f.write(response.content) else: # Download file and show progress bar - dl = 0 total_length = int(total_length) + dl = offset for data in response.iter_content(chunk_size=4096): dl += len(data) f.write(data) - done = int(50 * dl / total_length) - sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) ) - sys.stdout.flush() - print() + progress_array[0] = dl + #done = int(50 * dl / total_length) + #sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) ) + #sys.stdout.flush() + #print() os.rename(file_name+".part", file_name) - print_information("Downloaded file: " + file_name.split('/')[-1]) - download_counter += 1 + #print_information("Downloaded file: " + file_name.split('/')[-1]) + return not skip def check_connection(): """Checks connection to video.ethz.ch and if it fails then also to the internet""" @@ -613,56 +770,58 @@ def print_usage(): # # =============================================================== -# Setup parser -parser = setup_arg_parser() -args = parser.parse_args() - -# Apply commands from input -apply_args(args) - -# Store where to print video source -if print_src and args.print_src: - file_to_print_src_to = args.print_src - -# Collect lecture links -links = list() -if args.file: - links += read_links_from_file(args.file) - -# Append links passed through the command line: -links += args.lecture_link - -# Extract username and password from "link" -lecture_objects = list() -lecture_objects += [tuple((link.split(' ') + ['',''])[:3]) for link in links] # This gives us tuples of size 3, where user and pw can be empty - -# Print basic usage and exit if no lecture links are passed -if not links: - print_usage() - sys.exit() - -# Connection check -if not args.skip_connection_check: - check_connection() -else: - print_information("Connection check skipped.", verbose_only=True) - -# Update check -if not args.skip_update_check: - check_update() -else: - print_information("Update check skipped.", verbose_only=True) - -# Run scraper for every link provided -for (link, user, password) in lecture_objects: - print_information("Currently selected: " + link, verbose_only=True) - if "video.ethz.ch" not in link: - print_information("Looks like the provided link does not go to 'videos.ethz.ch' and has therefore been skipped. Make sure that it is correct: " + link, type='warning') +if __name__ == '__main__': + + # Setup parser + parser = setup_arg_parser() + args = parser.parse_args() + + # Apply commands from input + apply_args(args) + + # Store where to print video source + if print_src and args.print_src: + file_to_print_src_to = args.print_src + + # Collect lecture links + links = list() + if args.file: + links += read_links_from_file(args.file) + + # Append links passed through the command line: + links += args.lecture_link + + # Extract username and password from "link" + lecture_objects = list() + lecture_objects += [tuple((link.split(' ') + ['',''])[:3]) for link in links] # This gives us tuples of size 3, where user and pw can be empty + + # Print basic usage and exit if no lecture links are passed + if not links: + print_usage() + sys.exit() + + # Connection check + if not args.skip_connection_check: + check_connection() else: - vo_scrapper(link, user, password) - print() + print_information("Connection check skipped.", verbose_only=True) + + # Update check + if not args.skip_update_check: + check_update() + else: + print_information("Update check skipped.", verbose_only=True) + + # Run scraper for every link provided + for (link, user, password) in lecture_objects: + print_information("Currently selected: " + link, verbose_only=True) + if "video.ethz.ch" not in link: + print_information("Looks like the provided link does not go to 'videos.ethz.ch' and has therefore been skipped. Make sure that it is correct: " + link, type='warning') + else: + vo_scrapper(link, user, password) + print() -# Print summary and exit -print_information(str(link_counter) + " files found, " + str(download_counter) + " downloaded and " + str(skip_counter) + " skipped") -if platform == "win32": - input('\nEOF') # So Windows users also see the output (apparently) + # Print summary and exit + print_information(str(link_counter) + " files found, " + str(download_counter) + " downloaded and " + str(skip_counter) + " skipped") + if platform == "win32": + input('\nEOF') # So Windows users also see the output (apparently) -- GitLab From 90707a00d065967075436c5714b1d089c3826777 Mon Sep 17 00:00:00 2001 From: bierido Date: Wed, 18 Mar 2020 19:06:28 +0100 Subject: [PATCH 2/5] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b5ba92e..8425135 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ You may find these ranges useful: | `..2..7` | `0 2 4 6` | Every other lecture until six (when I started paying attention) | `1..3..` | `1 3 5 [...]` | Every other lecture starting from the second (eg. all the second lectures of the week) | `..3..` | `0 3 6 [...]` | Every third lecture, starting from the beginning + (_⚠ A trailing 0 in a range is interpreted as "until the end"_) -- GitLab From ba9ad978fd3c66a323648f7ff658322f9ca87015 Mon Sep 17 00:00:00 2001 From: Dominic Bieri Date: Thu, 2 Apr 2020 20:14:50 +0200 Subject: [PATCH 3/5] bump to latest version --- vo-scraper.py | 725 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 490 insertions(+), 235 deletions(-) diff --git a/vo-scraper.py b/vo-scraper.py index 21b96f8..a45f1a0 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -54,12 +54,15 @@ gitlab_repo_page = "https://gitlab.ethz.ch/tgeorg/vo-scraper/" gitlab_issue_page = gitlab_repo_page+"issues" gitlab_changelog_page = gitlab_repo_page+"-/tags/v" remote_version_link = gitlab_repo_page+"raw/master/VERSION" -program_version = '1.1' +program_version = '1.2' # For web requests user_agent = 'Mozilla/5.0' cookie_jar = requests.cookies.RequestsCookieJar() +# Store video sources in global list +video_src_collection = list() + # For stats link_counter = 0 download_counter = 0 @@ -68,21 +71,25 @@ skip_counter = 0 # series_metadata_suffix = ".series-metadata.json" video_info_prefix = "https://video.ethz.ch/.episode-video.json?recordId=" -directory_prefix = "Lecture Recordings/" +directory_prefix = "Lecture Recordings" + os.sep # Default quality video_quality = "high" +# Boolean flags download_all = False verbose = False - print_src = False +use_multithread = True + +# Location of text files file_to_print_src_to = "" +history_file = "" quality_dict = { - 'low' : 0, + 'high' : 0, 'medium': 1, - 'high' : 2 + 'low' : 2 } class bcolors: @@ -113,7 +120,7 @@ def print_information(str, type='info', verbose_only=False): type -- The type of information: {info, warning, error} verbose_only -- If true the string will only be printed when the verbose flag is set. Useful for printing debugging info. - + returns: True if line(s) printed, false otherwise """ global print_type_dict @@ -215,33 +222,100 @@ def pretty_print_episodes(vo_json_data, selected): """Prints the episode numbers that match `selected`""" # Get length of longest strings for nice formatting when printing nr_length = len(" Nr.") + max_date_length = max([len(str(episode['createdAt'][:-6])) for episode in vo_json_data['episodes']]) max_title_length = max([len(episode['title']) for episode in vo_json_data['episodes']]) max_lecturer_length = max([len(str(episode['createdBy'])) for episode in vo_json_data['episodes']]) + # Print header + print_information( + " Nr." + + " | " + + "Date".ljust(max_date_length) + + " | " + + "Name".ljust(max_title_length) + + " | " + + "Lecturer".ljust(max_lecturer_length) + ) + # Print the selected episodes for episode_nr in selected: episode = vo_json_data['episodes'][episode_nr] print_information( "%3d".ljust(nr_length) % episode_nr + " | " + + episode['createdAt'][:-6].ljust(max_date_length) + + " | " + episode['title'].ljust(max_title_length) + " | " + str(episode['createdBy']).ljust(max_lecturer_length) - + " | " + - episode['createdAt'][:-6] ) +def make_range(item, max_episode_number): + """ + + Keyword arguments: + item -- a string in the form of 'x..z' or 'x..y..z' + max_episode_number -- The highest episode number to have an upperbound for the range of episodes + + Returns: + A range from x to z, with step size y, 1 if y wasn't provided + """ + if len(item.split('..')) == 2: + # user passed something like 'x..z', so step size is 1 + lower_bound, upper_bound = item.split('..') + step = 1 + else: + # user passed something like 'x..y..z', so step size is y + lower_bound, step, upper_bound = item.split('..') + + # set the bounds to the outer limits if no number was passed + lower_bound = int(lower_bound) if lower_bound else 0 + upper_bound = int(upper_bound) if upper_bound else max_episode_number + + step = int(step) + return range(lower_bound, upper_bound+1, step) + +def get_user_choice(max_episode_number): + """ + Prompts the user to pick multiple episodes and returns them + + Keyword arguments: + max_episode_number -- The highest episode number to have an upperbound for the range of episodes + + Returns: + A list containg the user picked choices + """ + # Prompt user + user_input = input( + "Enter numbers of the above lectures you want to download separated by space (e.g. 0 5 12 14)\nJust press enter if you don't want to download anything from this lecture\n" + ).split() + choice = list() + for elem in user_input: + if elem.isnumeric(): + choice.append(int(elem)) + else: + choice += make_range(elem, max_episode_number) + + # make elements of `choice` unique + choice = set(choice) + # sort them, to download in order and not randomly + choice = sorted(choice) + + return choice def vo_scrapper(vo_link, user, passw): """ Gets the list of all available videos for a lecture. Allows user to select multiple videos. - Afterwards passes the links to the video source to `downloader()` + Returns the selected episodes Keyword arguments: vo_link -- The link to the lecture user -- The username passed from a text file passw -- The password passed from a text file + + Returns: + A tuple consisting out of the filename and the video_src_link """ global user_agent global download_all @@ -250,17 +324,12 @@ def vo_scrapper(vo_link, user, passw): global quality_dict global cookie_jar - global print_src - global file_to_print_src_to - global series_metadata_suffix global video_info_prefix global directory_prefix global link_counter - global downloader_statuses, print_queue - # Remove `.html` file extension if vo_link.endswith('.html'): vo_link = vo_link[:-5] @@ -283,24 +352,16 @@ def vo_scrapper(vo_link, user, passw): else: # Let user pick videos try: - def mkrng(lst): return range(*((lst if len(lst) < 2 or lst[1] != 0 else (lst[0], len(vo_json_data['episodes']))) if len(lst) < 3 else (lst[0], lst[2] if lst[2] != 0 else len(vo_json_data['episodes']), lst[1]-lst[0]))) - - choice = set([flat for sublist in - [[int(x)] if x.isnumeric() else - mkrng(tuple(map(lambda prs:int(prs) if not prs == '' else 0, x.split("..")))) - for x in input( - "Enter numbers or ranges of the lectures you want to download, separated by space (e.g. ..3 3..5..10 12 14..)\nJust press enter if you don't want to download anything from this lecture. Ranges are defined upper bound exclusive as in [from, to[\n" - ).split()] - for flat in sublist]) - except e: - print(e) + choice = get_user_choice(max(range(len(vo_json_data['episodes'])))) + except KeyboardInterrupt: + print() print_information("Exiting...") sys.exit() # Print the user's choice if not choice: print_information("No videos selected") - return # Nothing to do anymore + return list() # Nothing to do anymore else: print_information("You selected:") pretty_print_episodes(vo_json_data, choice) @@ -315,13 +376,11 @@ def vo_scrapper(vo_link, user, passw): print() print_information("Keyboard interrupt detected, skipping lecture", type='warning') return - - downloader_statuses = []; - downloader_jobs = []; - print_queue = mp.Queue(len(choice)*2) - # Collect links and download them - print("Initializing, please wait...") + local_video_src_collection = list() + + # Collect links for download + print("\r\nInitializing {vo_link}, please wait...") print("["+" "*len(choice)+"]\r[", end="") for item_nr in choice: print("*", end='') @@ -354,222 +413,124 @@ def vo_scrapper(vo_link, user, passw): versions.append((counter, vid_version['res']['w']*vid_version['res']['h'])) print_information(str(counter) + ": " + "%4d" %vid_version['res']['w'] + "x" + "%4d" %vid_version['res']['h'], verbose_only=True) counter += 1 - versions.sort(key=lambda tup: tup[1]) - # Now it's sorted: low -> medium -> high + versions.sort(key=lambda tup: tup[1], reverse=True) + # Now it's sorted: high -> medium -> low # Get video src url from json - video_src_link = video_json_data['streams'][0]['sources']['mp4'][versions[quality_dict[video_quality]][0]]['src'] + try: # try/except block to handle cases were not all three types of quality exist + video_src_link = video_json_data['streams'][0]['sources']['mp4'][versions[quality_dict[video_quality]][0]]['src'] + except IndexError: + print_information("Requested quality \"" + video_quality + "\" not available. Skipping episode!", type='error') + continue - lecture_titel = vo_json_data['title'] - video_title = vo_json_data["episodes"][item_nr]["title"] + lecture_title = vo_json_data['title'] + episode_title = vo_json_data["episodes"][item_nr]["title"] # If video and lecture title overlap, remove lecture title from video title - if video_title.startswith(lecture_titel): - video_title = video_title[len(lecture_titel):] - # Append date - video_title = item['createdAt'][:-6]+video_title + if episode_title.startswith(lecture_title): + episode_title = episode_title[len(lecture_title):] - # Create directory for video if it does not already exist - directory = directory_prefix + lecture_titel +"/" - if not os.path.isdir(directory): - os.makedirs(directory) - print_information("This folder was generated: " + directory, verbose_only=True) - else: - print_information("This folder already exists: " + directory, verbose_only=True) + # Extract episode name before adding the date to episode_title + episode_name = item['createdAt'][:-6] + " " + lecture_title + episode_title + + # Append date + episode_title = item['createdAt'][:-6]+episode_title # Filename is `directory/