From a3d5913e67716b8f75fda52150210d127a0a39e3 Mon Sep 17 00:00:00 2001
From: rnhmjoj <rnhmjoj@inventati.org>
Date: Tue, 7 Apr 2020 19:42:33 +0200
Subject: [PATCH] elearning.py: add magic sync feature

---
 python/elearning.py | 49 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 6 deletions(-)

diff --git a/python/elearning.py b/python/elearning.py
index 638c4c1..b579e1f 100755
--- a/python/elearning.py
+++ b/python/elearning.py
@@ -18,14 +18,23 @@ from requests.utils import unquote, urlparse
 from bs4            import BeautifulSoup
 
 
+# combines raw descriptions and default values
+formatter = type('CustomFormatter',
+                 (argparse.RawDescriptionHelpFormatter,
+                  argparse.ArgumentDefaultsHelpFormatter), {})
+
 parser = argparse.ArgumentParser(
-    formatter_class=argparse.RawDescriptionHelpFormatter,
+    formatter_class=formatter,
     description='''
     Download all video lessons from an elearning course.
 
     The videos are taken at the original quality and encoded
     using h.265 slow profile, 96kb/s opus for audio, via ffmpeg.
 
+    You can run the program multiple times to keep the archive
+    in sync with elearning: existing files won't be replaced or
+    downloaded again, even if you have renamed them.
+
     If authentication is required the EUSER,EPASS variables
     are tried for logging in, otherwise they will be prompted.
     Only Kaltura videos are supported (dual screen and captions
@@ -56,7 +65,7 @@ parser.add_argument('--link-only', '-l', action='store_true',
 parser.add_argument('--json', '-j', action='store_true',
                     help='print the video metadata in JSON')
 parser.add_argument('--directory', '-d', metavar='DIR',
-                    type=str, default=pathlib.Path(),
+                    default='.', type=pathlib.Path,
                     help='directory where to save the videos. defaults to'
                          ' the currenct directory if not given')
 parser.add_argument('--ffmpeg', '-f', metavar='ARG',
@@ -233,7 +242,7 @@ def extract_ids(page, partner_id=None):
     return partner_id, entry_id
 
 
-def save_video(infos, args):
+def save_video(infos, files, args):
     '''
     Download and convert the video
     using ffmpeg and x265.
@@ -262,10 +271,18 @@ def save_video(infos, args):
         inputs.extend(['-i', url])
         maps.extend(['-map', str(i) + (':v' if i > 0 else '')])
 
+    # video ids, used to check for existing files
+    ids = ','.join(i['id'] for i in infos)
+
+    if ids in files:
+        printr('# already downloaded "{description}"'.format_map(info))
+        printr('# skipping', end='\n\n')
+        return
+
     ffmpeg = [
         'ffmpeg', '-hide_banner',
         '-loglevel', 'error',
-        '-stats', '-n',
+        '-stats', '-y'
     ] + inputs + maps + args.ffmpeg + [
         # video
         '-c:v', 'libx265', '-preset', 'slow', '-crf', '23',
@@ -277,7 +294,7 @@ def save_video(infos, args):
         '-metadata', 'title='  + info['description'],
         '-metadata', 'AUTHOR=' + info['userId'],
         '-metadata', 'DATE='   + info['createdAt'],
-        '-metadata', 'IDS='    + ','.join(i['id'] for i in infos),
+        '-metadata', 'IDS='    + ids,
 
         # output
         (dir / filename).with_suffix('.mkv')
@@ -291,6 +308,23 @@ def save_video(infos, args):
     printr()
 
 
+def get_filenames(dir):
+    '''
+    This is where the magic happens. This extracts the `IDS`
+    tag from the downloaded videos and builts a dictionary
+    ids -> filename. Checking these ids we can avoid downloading
+    existing videos even if they were renamed.
+    '''
+    files = {}
+    for file in dir.glob('*.mkv'):
+        ffprobe = ['ffprobe', file, '-show_format', '-of', 'json']
+        output = subprocess.run(ffprobe, capture_output=True).stdout
+        metadata = json.loads(output)['format']
+        files[metadata['tags']['IDS']] = file
+
+    return files
+
+
 def main(args):
     course = ('{base_url}/course'
               '/view.php?id={course_id}'.format_map(vars(args)))
@@ -303,6 +337,9 @@ def main(args):
         links.append(li.find('a')['href'])
     printr('* {} videos found!\n'.format(len(links) or 'no'))
 
+    # filenames of already saved videos
+    files = get_filenames(args.directory)
+
     partner = None
     output = []
     for i, link in enumerate(links[args.skip:], start=args.skip):
@@ -327,7 +364,7 @@ def main(args):
         if args.json:
             output.append(info)
         else:
-            save_video(info, args)
+            save_video(info, files, args)
 
     if args.json:
         print(json.dumps(output))