elearning.py: add magic sync feature
This commit is contained in:
parent
837d8a1717
commit
a3d5913e67
@ -18,14 +18,23 @@ from requests.utils import unquote, urlparse
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
# combines raw descriptions and default values
|
||||
formatter = type('CustomFormatter',
|
||||
(argparse.RawDescriptionHelpFormatter,
|
||||
argparse.ArgumentDefaultsHelpFormatter), {})
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
formatter_class=formatter,
|
||||
description='''
|
||||
Download all video lessons from an elearning course.
|
||||
|
||||
The videos are taken at the original quality and encoded
|
||||
using h.265 slow profile, 96kb/s opus for audio, via ffmpeg.
|
||||
|
||||
You can run the program multiple times to keep the archive
|
||||
in sync with elearning: existing files won't be replaced or
|
||||
downloaded again, even if you have renamed them.
|
||||
|
||||
If authentication is required the EUSER,EPASS variables
|
||||
are tried for logging in, otherwise they will be prompted.
|
||||
Only Kaltura videos are supported (dual screen and captions
|
||||
@ -56,7 +65,7 @@ parser.add_argument('--link-only', '-l', action='store_true',
|
||||
parser.add_argument('--json', '-j', action='store_true',
|
||||
help='print the video metadata in JSON')
|
||||
parser.add_argument('--directory', '-d', metavar='DIR',
|
||||
type=str, default=pathlib.Path(),
|
||||
default='.', type=pathlib.Path,
|
||||
help='directory where to save the videos. defaults to'
|
||||
' the currenct directory if not given')
|
||||
parser.add_argument('--ffmpeg', '-f', metavar='ARG',
|
||||
@ -233,7 +242,7 @@ def extract_ids(page, partner_id=None):
|
||||
return partner_id, entry_id
|
||||
|
||||
|
||||
def save_video(infos, args):
|
||||
def save_video(infos, files, args):
|
||||
'''
|
||||
Download and convert the video
|
||||
using ffmpeg and x265.
|
||||
@ -262,10 +271,18 @@ def save_video(infos, args):
|
||||
inputs.extend(['-i', url])
|
||||
maps.extend(['-map', str(i) + (':v' if i > 0 else '')])
|
||||
|
||||
# video ids, used to check for existing files
|
||||
ids = ','.join(i['id'] for i in infos)
|
||||
|
||||
if ids in files:
|
||||
printr('# already downloaded "{description}"'.format_map(info))
|
||||
printr('# skipping', end='\n\n')
|
||||
return
|
||||
|
||||
ffmpeg = [
|
||||
'ffmpeg', '-hide_banner',
|
||||
'-loglevel', 'error',
|
||||
'-stats', '-n',
|
||||
'-stats', '-y'
|
||||
] + inputs + maps + args.ffmpeg + [
|
||||
# video
|
||||
'-c:v', 'libx265', '-preset', 'slow', '-crf', '23',
|
||||
@ -277,7 +294,7 @@ def save_video(infos, args):
|
||||
'-metadata', 'title=' + info['description'],
|
||||
'-metadata', 'AUTHOR=' + info['userId'],
|
||||
'-metadata', 'DATE=' + info['createdAt'],
|
||||
'-metadata', 'IDS=' + ','.join(i['id'] for i in infos),
|
||||
'-metadata', 'IDS=' + ids,
|
||||
|
||||
# output
|
||||
(dir / filename).with_suffix('.mkv')
|
||||
@ -291,6 +308,23 @@ def save_video(infos, args):
|
||||
printr()
|
||||
|
||||
|
||||
def get_filenames(dir):
|
||||
'''
|
||||
This is where the magic happens. This extracts the `IDS`
|
||||
tag from the downloaded videos and builts a dictionary
|
||||
ids -> filename. Checking these ids we can avoid downloading
|
||||
existing videos even if they were renamed.
|
||||
'''
|
||||
files = {}
|
||||
for file in dir.glob('*.mkv'):
|
||||
ffprobe = ['ffprobe', file, '-show_format', '-of', 'json']
|
||||
output = subprocess.run(ffprobe, capture_output=True).stdout
|
||||
metadata = json.loads(output)['format']
|
||||
files[metadata['tags']['IDS']] = file
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def main(args):
|
||||
course = ('{base_url}/course'
|
||||
'/view.php?id={course_id}'.format_map(vars(args)))
|
||||
@ -303,6 +337,9 @@ def main(args):
|
||||
links.append(li.find('a')['href'])
|
||||
printr('* {} videos found!\n'.format(len(links) or 'no'))
|
||||
|
||||
# filenames of already saved videos
|
||||
files = get_filenames(args.directory)
|
||||
|
||||
partner = None
|
||||
output = []
|
||||
for i, link in enumerate(links[args.skip:], start=args.skip):
|
||||
@ -327,7 +364,7 @@ def main(args):
|
||||
if args.json:
|
||||
output.append(info)
|
||||
else:
|
||||
save_video(info, args)
|
||||
save_video(info, files, args)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(output))
|
||||
|
Loading…
Reference in New Issue
Block a user