From a3d5913e67716b8f75fda52150210d127a0a39e3 Mon Sep 17 00:00:00 2001 From: rnhmjoj Date: Tue, 7 Apr 2020 19:42:33 +0200 Subject: [PATCH] elearning.py: add magic sync feature --- python/elearning.py | 49 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/python/elearning.py b/python/elearning.py index 638c4c1..b579e1f 100755 --- a/python/elearning.py +++ b/python/elearning.py @@ -18,14 +18,23 @@ from requests.utils import unquote, urlparse from bs4 import BeautifulSoup +# combines raw descriptions and default values +formatter = type('CustomFormatter', + (argparse.RawDescriptionHelpFormatter, + argparse.ArgumentDefaultsHelpFormatter), {}) + parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, + formatter_class=formatter, description=''' Download all video lessons from an elearning course. The videos are taken at the original quality and encoded using h.265 slow profile, 96kb/s opus for audio, via ffmpeg. + You can run the program multiple times to keep the archive + in sync with elearning: existing files won't be replaced or + downloaded again, even if you have renamed them. + If authentication is required the EUSER,EPASS variables are tried for logging in, otherwise they will be prompted. Only Kaltura videos are supported (dual screen and captions @@ -56,7 +65,7 @@ parser.add_argument('--link-only', '-l', action='store_true', parser.add_argument('--json', '-j', action='store_true', help='print the video metadata in JSON') parser.add_argument('--directory', '-d', metavar='DIR', - type=str, default=pathlib.Path(), + default='.', type=pathlib.Path, help='directory where to save the videos. defaults to' ' the currenct directory if not given') parser.add_argument('--ffmpeg', '-f', metavar='ARG', @@ -233,7 +242,7 @@ def extract_ids(page, partner_id=None): return partner_id, entry_id -def save_video(infos, args): +def save_video(infos, files, args): ''' Download and convert the video using ffmpeg and x265. @@ -262,10 +271,18 @@ def save_video(infos, args): inputs.extend(['-i', url]) maps.extend(['-map', str(i) + (':v' if i > 0 else '')]) + # video ids, used to check for existing files + ids = ','.join(i['id'] for i in infos) + + if ids in files: + printr('# already downloaded "{description}"'.format_map(info)) + printr('# skipping', end='\n\n') + return + ffmpeg = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error', - '-stats', '-n', + '-stats', '-y' ] + inputs + maps + args.ffmpeg + [ # video '-c:v', 'libx265', '-preset', 'slow', '-crf', '23', @@ -277,7 +294,7 @@ def save_video(infos, args): '-metadata', 'title=' + info['description'], '-metadata', 'AUTHOR=' + info['userId'], '-metadata', 'DATE=' + info['createdAt'], - '-metadata', 'IDS=' + ','.join(i['id'] for i in infos), + '-metadata', 'IDS=' + ids, # output (dir / filename).with_suffix('.mkv') @@ -291,6 +308,23 @@ def save_video(infos, args): printr() +def get_filenames(dir): + ''' + This is where the magic happens. This extracts the `IDS` + tag from the downloaded videos and builts a dictionary + ids -> filename. Checking these ids we can avoid downloading + existing videos even if they were renamed. + ''' + files = {} + for file in dir.glob('*.mkv'): + ffprobe = ['ffprobe', file, '-show_format', '-of', 'json'] + output = subprocess.run(ffprobe, capture_output=True).stdout + metadata = json.loads(output)['format'] + files[metadata['tags']['IDS']] = file + + return files + + def main(args): course = ('{base_url}/course' '/view.php?id={course_id}'.format_map(vars(args))) @@ -303,6 +337,9 @@ def main(args): links.append(li.find('a')['href']) printr('* {} videos found!\n'.format(len(links) or 'no')) + # filenames of already saved videos + files = get_filenames(args.directory) + partner = None output = [] for i, link in enumerate(links[args.skip:], start=args.skip): @@ -327,7 +364,7 @@ def main(args): if args.json: output.append(info) else: - save_video(info, args) + save_video(info, files, args) if args.json: print(json.dumps(output))