elearning.py: add magic sync feature

2020-04-07 19:42:33 +02:00 · 2020-04-07 19:42:33 +02:00 · a3d5913e67
commit a3d5913e67
parent 837d8a1717
1 changed files with 43 additions and 6 deletions
--- a/python/elearning.py
+++ b/python/elearning.py
@ -18,14 +18,23 @@ from requests.utils import unquote, urlparse
 from bs4            import BeautifulSoup


+# combines raw descriptions and default values
+formatter = type('CustomFormatter',
+                 (argparse.RawDescriptionHelpFormatter,
+                  argparse.ArgumentDefaultsHelpFormatter), {})
+
 parser = argparse.ArgumentParser(
-    formatter_class=argparse.RawDescriptionHelpFormatter,
+    formatter_class=formatter,
    description='''
    Download all video lessons from an elearning course.

    The videos are taken at the original quality and encoded
    using h.265 slow profile, 96kb/s opus for audio, via ffmpeg.

+    You can run the program multiple times to keep the archive
+    in sync with elearning: existing files won't be replaced or
+    downloaded again, even if you have renamed them.
+
    If authentication is required the EUSER,EPASS variables
    are tried for logging in, otherwise they will be prompted.
    Only Kaltura videos are supported (dual screen and captions
@ -56,7 +65,7 @@ parser.add_argument('--link-only', '-l', action='store_true',
 parser.add_argument('--json', '-j', action='store_true',
                    help='print the video metadata in JSON')
 parser.add_argument('--directory', '-d', metavar='DIR',
-                    type=str, default=pathlib.Path(),
+                    default='.', type=pathlib.Path,
                    help='directory where to save the videos. defaults to'
                         ' the currenct directory if not given')
 parser.add_argument('--ffmpeg', '-f', metavar='ARG',
@ -233,7 +242,7 @@ def extract_ids(page, partner_id=None):
    return partner_id, entry_id


-def save_video(infos, args):
+def save_video(infos, files, args):
    '''
    Download and convert the video
    using ffmpeg and x265.
@ -262,10 +271,18 @@ def save_video(infos, args):
        inputs.extend(['-i', url])
        maps.extend(['-map', str(i) + (':v' if i > 0 else '')])

+    # video ids, used to check for existing files
+    ids = ','.join(i['id'] for i in infos)
+
+    if ids in files:
+        printr('# already downloaded "{description}"'.format_map(info))
+        printr('# skipping', end='\n\n')
+        return
+
    ffmpeg = [
        'ffmpeg', '-hide_banner',
        '-loglevel', 'error',
-        '-stats', '-n',
+        '-stats', '-y'
    ] + inputs + maps + args.ffmpeg + [
        # video
        '-c:v', 'libx265', '-preset', 'slow', '-crf', '23',
@ -277,7 +294,7 @@ def save_video(infos, args):
        '-metadata', 'title='  + info['description'],
        '-metadata', 'AUTHOR=' + info['userId'],
        '-metadata', 'DATE='   + info['createdAt'],
-        '-metadata', 'IDS='    + ','.join(i['id'] for i in infos),
+        '-metadata', 'IDS='    + ids,

        # output
        (dir / filename).with_suffix('.mkv')
@ -291,6 +308,23 @@ def save_video(infos, args):
    printr()


+def get_filenames(dir):
+    '''
+    This is where the magic happens. This extracts the `IDS`
+    tag from the downloaded videos and builts a dictionary
+    ids -> filename. Checking these ids we can avoid downloading
+    existing videos even if they were renamed.
+    '''
+    files = {}
+    for file in dir.glob('*.mkv'):
+        ffprobe = ['ffprobe', file, '-show_format', '-of', 'json']
+        output = subprocess.run(ffprobe, capture_output=True).stdout
+        metadata = json.loads(output)['format']
+        files[metadata['tags']['IDS']] = file
+
+    return files
+
+
 def main(args):
    course = ('{base_url}/course'
              '/view.php?id={course_id}'.format_map(vars(args)))
@ -303,6 +337,9 @@ def main(args):
        links.append(li.find('a')['href'])
    printr('* {} videos found!\n'.format(len(links) or 'no'))

+    # filenames of already saved videos
+    files = get_filenames(args.directory)
+
    partner = None
    output = []
    for i, link in enumerate(links[args.skip:], start=args.skip):
@ -327,7 +364,7 @@ def main(args):
        if args.json:
            output.append(info)
        else:
-            save_video(info, args)
+            save_video(info, files, args)

    if args.json:
        print(json.dumps(output))