elearning.py: add magic sync feature

This commit is contained in:
Michele Guerini Rocco 2020-04-07 19:42:33 +02:00
parent 837d8a1717
commit a3d5913e67
Signed by: rnhmjoj
GPG Key ID: BFBAF4C975F76450

View File

@ -18,14 +18,23 @@ from requests.utils import unquote, urlparse
from bs4 import BeautifulSoup
# combines raw descriptions and default values
formatter = type('CustomFormatter',
(argparse.RawDescriptionHelpFormatter,
argparse.ArgumentDefaultsHelpFormatter), {})
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
formatter_class=formatter,
description='''
Download all video lessons from an elearning course.
The videos are taken at the original quality and encoded
using h.265 slow profile, 96kb/s opus for audio, via ffmpeg.
You can run the program multiple times to keep the archive
in sync with elearning: existing files won't be replaced or
downloaded again, even if you have renamed them.
If authentication is required the EUSER,EPASS variables
are tried for logging in, otherwise they will be prompted.
Only Kaltura videos are supported (dual screen and captions
@ -56,7 +65,7 @@ parser.add_argument('--link-only', '-l', action='store_true',
parser.add_argument('--json', '-j', action='store_true',
help='print the video metadata in JSON')
parser.add_argument('--directory', '-d', metavar='DIR',
type=str, default=pathlib.Path(),
default='.', type=pathlib.Path,
help='directory where to save the videos. defaults to'
' the currenct directory if not given')
parser.add_argument('--ffmpeg', '-f', metavar='ARG',
@ -233,7 +242,7 @@ def extract_ids(page, partner_id=None):
return partner_id, entry_id
def save_video(infos, args):
def save_video(infos, files, args):
'''
Download and convert the video
using ffmpeg and x265.
@ -262,10 +271,18 @@ def save_video(infos, args):
inputs.extend(['-i', url])
maps.extend(['-map', str(i) + (':v' if i > 0 else '')])
# video ids, used to check for existing files
ids = ','.join(i['id'] for i in infos)
if ids in files:
printr('# already downloaded "{description}"'.format_map(info))
printr('# skipping', end='\n\n')
return
ffmpeg = [
'ffmpeg', '-hide_banner',
'-loglevel', 'error',
'-stats', '-n',
'-stats', '-y'
] + inputs + maps + args.ffmpeg + [
# video
'-c:v', 'libx265', '-preset', 'slow', '-crf', '23',
@ -277,7 +294,7 @@ def save_video(infos, args):
'-metadata', 'title=' + info['description'],
'-metadata', 'AUTHOR=' + info['userId'],
'-metadata', 'DATE=' + info['createdAt'],
'-metadata', 'IDS=' + ','.join(i['id'] for i in infos),
'-metadata', 'IDS=' + ids,
# output
(dir / filename).with_suffix('.mkv')
@ -291,6 +308,23 @@ def save_video(infos, args):
printr()
def get_filenames(dir):
'''
This is where the magic happens. This extracts the `IDS`
tag from the downloaded videos and builts a dictionary
ids -> filename. Checking these ids we can avoid downloading
existing videos even if they were renamed.
'''
files = {}
for file in dir.glob('*.mkv'):
ffprobe = ['ffprobe', file, '-show_format', '-of', 'json']
output = subprocess.run(ffprobe, capture_output=True).stdout
metadata = json.loads(output)['format']
files[metadata['tags']['IDS']] = file
return files
def main(args):
course = ('{base_url}/course'
'/view.php?id={course_id}'.format_map(vars(args)))
@ -303,6 +337,9 @@ def main(args):
links.append(li.find('a')['href'])
printr('* {} videos found!\n'.format(len(links) or 'no'))
# filenames of already saved videos
files = get_filenames(args.directory)
partner = None
output = []
for i, link in enumerate(links[args.skip:], start=args.skip):
@ -327,7 +364,7 @@ def main(args):
if args.json:
output.append(info)
else:
save_video(info, args)
save_video(info, files, args)
if args.json:
print(json.dumps(output))