From f8041519c5d636bfddcd754b82028648f14fd0b3 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Thu, 2 Apr 2020 18:44:10 +0200 Subject: [PATCH 1/4] Add possibility to pass ranges during episode selection --- README.md | 20 ++++++++++++++++++ vo-scraper.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a166bd1..8d384cc 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,26 @@ To see a list of possible arguments check **For protected lectures** the vo-scraper will ask for your login credentials before downloading the video(s). +### Q: How can I choose which episodes of a lecture to download? + +#### A: You will be prompted with the list of episodes available for downloading for each lecture. + +You can either specify single episodes by typing their indices separated by space, or add ranges in Haskell syntax, like `1..5` for `1 2 3 4`. +Ranges are upper-bound-inclusive. Custom steps sizes are supported too, e.g. `1..3..10` + +You may find this example of ranges useful: + +| Range | Equivalent | In Words | +|-----------|-----------------|-------------------------------------------------------------------------------------------| +| `1..4` | `1 2 3 4` | Episode one to four | +| `..4` | `0 1 2 3 4` | All episodes up to four (the fifth) | +| `3..` | `3 4 5 6 [...]` | All episodes starting from three (the fourth) | +| `..` | `0 1 2 3 [...]` | All episodes | +| `2..4..6` | `2 4 6` | Every other episodes from two to six | +| `..2..6` | `0 2 4 6` | Every other episodes until six (when I started paying attention) | +| `1..3..` | `1 3 5 [...]` | Every other episodes starting from the second (i.e.. all the second episodes of the week) | +| `..3..` | `0 3 6 [...]` | Every third episodes, starting from the beginning | + ### Q: How do I pass a file with links to multiple lectures? #### A: Use `--file ` diff --git a/vo-scraper.py b/vo-scraper.py index 7afd47a..d496ba7 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -241,6 +241,58 @@ def pretty_print_episodes(vo_json_data, selected): str(episode['createdBy']).ljust(max_lecturer_length) ) +def make_range(item, max_episode_number): + """ + + Keyword arguments: + item -- a string in the form of 'x..z' or 'x..y..z' + max_episode_number -- The highest episode number to have an upperbound for the range of episodes + + Returns: + A range from x to z, with step size y, 1 if y wasn't provided + """ + if len(item.split('..')) == 2: + # user passed something like 'x..z', so step size is 1 + lower_bound, upper_bound = item.split('..') + step = 1 + else: + # user passed something like 'x..y..z', so step size is y + lower_bound, step, upper_bound = item.split('..') + + # set the bounds to the outer limits if no number was passed + lower_bound = int(lower_bound) if lower_bound else 0 + upper_bound = int(upper_bound) if upper_bound else max_episode_number + + step = int(step) + return range(lower_bound, upper_bound+1, step) + +def get_user_choice(max_episode_number): + """ + Prompts the user to pick multiple episodes and returns them + + Keyword arguments: + max_episode_number -- The highest episode number to have an upperbound for the range of episodes + + Returns: + A list containg the user picked choices + """ + # Prompt user + user_input = input( + "Enter numbers of the above lectures you want to download separated by space (e.g. 0 5 12 14)\nJust press enter if you don't want to download anything from this lecture\n" + ).split() + choice = list() + for elem in user_input: + if elem.isnumeric(): + choice.append(int(elem)) + else: + choice += make_range(elem, max_episode_number) + + # make elements of `choice` unique + choice = set(choice) + # sort them, to download in order and not randomly + choice = sorted(choice) + + return choice def vo_scrapper(vo_link, user, passw): """ @@ -291,10 +343,8 @@ def vo_scrapper(vo_link, user, passw): else: # Let user pick videos try: - choice = [int(x) for x in input( - "Enter numbers of the above lectures you want to download separated by space (e.g. 0 5 12 14)\nJust press enter if you don't want to download anything from this lecture\n" - ).split()] - except: + choice = get_user_choice(max(range(len(vo_json_data['episodes'])))) + except KeyboardInterrupt: print() print_information("Exiting...") sys.exit() -- GitLab From e50a0dd0ac19a3701a7d4b6281040cf28a2a8cca Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 4 Sep 2020 19:26:15 +0200 Subject: [PATCH 2/4] Print HTTP reponse code for login on verbose Prints the HTTP response code when requesting a cookie for accessing videos secured with "ETH" --- vo-scraper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vo-scraper.py b/vo-scraper.py index d496ba7..bfa9e1e 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -169,6 +169,7 @@ def acquire_login_cookie(protection, vo_link, user, passw): # Request login-cookie r = requests.post("https://video.ethz.ch/j_security_check", headers=headers, data=data) + print_information(f"Received response: {r.status_code}", verbose_only=True) # Put login cookie in cookie_jar cookie_jar = r.cookies -- GitLab From 2ff8edcad989282f53ba5dcbfb5201e4d244da3b Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 4 Sep 2020 19:28:15 +0200 Subject: [PATCH 3/4] Update headers for cookie request Something changed something with the backend, so the "Referer" is now required when trying to acquire a cookie for "ETH" protected videos --- vo-scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vo-scraper.py b/vo-scraper.py index bfa9e1e..0a932eb 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -164,7 +164,7 @@ def acquire_login_cookie(protection, vo_link, user, passw): (user, passw) = get_credentials(user, passw) # Setup headers and content to send - headers = { "Content-Type": "application/x-www-form-urlencoded", "CSRF-Token": "undefined", 'User-Agent': user_agent} + headers = {"User-Agent": user_agent, "Referer": vo_link+".html"} data = { "__charset__": "utf-8", "j_validate": True, "j_username": user, "j_password": passw} # Request login-cookie -- GitLab From e9d69276e138a03a732630f85609d2e8d2b58017 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Sat, 5 Sep 2020 17:18:15 +0200 Subject: [PATCH 4/4] Bump version number --- VERSION | 2 +- vo-scraper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index ea710ab..cb174d5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2 \ No newline at end of file +1.2.1 \ No newline at end of file diff --git a/vo-scraper.py b/vo-scraper.py index 0a932eb..9d8fbda 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -51,7 +51,7 @@ gitlab_repo_page = "https://gitlab.ethz.ch/tgeorg/vo-scraper/" gitlab_issue_page = gitlab_repo_page+"issues" gitlab_changelog_page = gitlab_repo_page+"-/tags/v" remote_version_link = gitlab_repo_page+"raw/master/VERSION" -program_version = '1.2' +program_version = '1.2.1' # For web requests user_agent = 'Mozilla/5.0' -- GitLab