over-video/over-video.py

#! /usr/bin/env python3
# encoding: utf-8

import json
import os
import over
import pathlib
import re
import tempfile
import time
import version
import aux

Command = over.cmd.Command

# --------------------------------------------------
X264_BANNED_PIXFMTS = {"bgr24", "yuv422p"}

# --------------------------------------------------

# see doc/command_assembler.png
command = over.types.ndict()
command.identify = Command("ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", "INFILE")
command.normalize_prepass = Command("ffmpeg", "-i", "INFILE", "-max_muxing_queue_size", "512", "-filter:a", "loudnorm=I=-16:TP=-1.5:LRA=11:print_format=json", "-f", "null", "/dev/null")
command.encode_generic = Command("ffmpeg", "FPS", "CUT_FROM", "-i", "INFILE", "-max_muxing_queue_size", "512", "CUT_TO", "MAP", "VIDEO", "AUDIO", "-sn", "OUTFILE")
command.sub_vorbis = Command("-codec:a", "libvorbis", "-qscale:a", "QUALITY", "NORMALIZE")
command.sub_pcm = Command("-codec:a", "pcm_s16le", "NORMALIZE")
command.sub_theora = Command("-codec:v", "libtheora", "-qscale:v", "QUALITY", "VFILTER")
command.sub_x264 = Command("PIXFMT", "-codec:v", "libx264", "-preset", "PRESET", "-crf", "QUALITY", "-profile:v", "high", "-level", "4.2", "VFILTER")
command.sub_x265 = Command("PIXFMT", "-codec:v", "libx265", "-preset", "PRESET", "-crf", "QUALITY", "VFILTER")
command.sub_normalize = Command("-filter:a", "LOUDNORM_INCANTATION", "-ar", "48k")
command.sub_vfilter = Command("-filter:v", "ARGS")
command.force_yuv420p = Command("-pix_fmt", "yuv420p")
command.sub_copy_audio = Command("-codec:a", "copy")
command.sub_copy_video = Command("-codec:v", "copy")

# --------------------------------------------------

if __name__ == "__main__":
	main = over.app.Main("over-video", version.str, "AO-JSL", features={"config_file": True})
	main.add_option("audio", "Audio codec to use, either <M>vorbis<.>, <M>pcm<.>, <M>copy<.> or <M>drop<.>.", str, ["vorbis"], abbr="a", count=1)
	main.add_option("audio-quality", "Audio encoding quality with <M>-1<.> being the worst and <M>10<.> being the best.", float, [4], abbr="q", count=1)
	main.add_option("video", "Video codec to use, either <M>x265<.>, <M>x264<.>, <M>theora<.>, <M>copy<.> or <M>drop<.>.", str, ["x264"], abbr="v", count=1)
	main.add_option("video-preset", "Video encoding preset, if supported by the selected encoder.", str, ["slow"], abbr="P", count=1)
	main.add_option("video-quality", "Video encoding quality (CRF). Use <M>0<.>-<M>10<.> for Theora (<M>0<.> being the lowest, <M>5<.>-<M>7<.> is generally watchable) and <M>0<.>-<M>51<.> for x264/5 (<M>0<.> being lossless, <M>18<.>-<M>28<.> is reasonable).", float, [22], abbr="Q", count=1)
	main.add_option("context", "Use .over-video file in CWD, if available, to remember encoding parameters per-directory.", bool, [True], abbr="C")
	main.add_option("normalize", "Normalize the audio track without clipping. May use dynamic range compression.", bool, [True], abbr="n")
	main.add_option("ffmpeg-vfilter", 'Raw ffmpeg -filter:v options, e.g. "<M>scale=1280:trunc(ow/a/2)*2,transpose=dir=1<.>"', str, abbr="F", count=1)
	main.add_option("ffmpeg-map", "Raw ffmpeg <c>-map<.> options, e.g. <W>--<g>map<.> <M>0:1<.> <W>--<g>map<.> <M>0:2<.>. This is a drop-in fix until we get proper stream selection.", str, abbr="M", overwrite=False, count=1)
	main.add_option("cut", "Start and end timestamps of the portion to cut out. Uses native ffmpeg <c>-ss<.> and <c>-to<.> format, so it's either seconds from start or <M>[<HH>:]<MM>:<SS>[.<<m>...]<.>. Example: <W>--<g>cut<.> <M>25 35<.> uses 10 seconds of video starting at 25s, <W>--<g>cut<.> <M>1:10:45 1:23:54.5<.> uses video from 4245s to 5034.5s.", over.callback.strings, abbr="X", count=2)
	main.add_option("fps", "Override input framerate.", float, abbr="f", count=1)
	main.add_option("move-source", "Move source file to this directory after conversion. Use an empty string to disable.", str, ["processed"], count=1)
	main.add_option("dump-commands", "Print ffmpeg commands that would be executed. If <W>--<g>normalize<.> is in effect, the normalization pre-pass will still be performed so that the proper volume correction can be computed.", bool, [False], abbr="D", in_cfg_file=False)
	main.add_option("probe", "Print the raw JSON output of ffprobe and exit.", bool, [False], abbr="p", in_cfg_file=False)
	main.add_option("armed", "Perform the suggested action.", bool, [False], abbr="A", in_cfg_file=False)

	main.add_doc("Description", ["Over-Video is a simple video converter."])
	main.add_doc("Good encoder settings", ["<W>x264<.>: <W>--<g>video<.> <M>x264<.> <W>--<g>video-preset<.> <M>slow<.> <W>--<g>video-quality<.> <M>22<.>", "<W>x265<.>: <W>--<g>video<.> <M>x265<.> <W>--<g>video-preset<.> <M>medium<.> <W>--<g>video-quality<.> <M>20<.>"])

	main.setup()

	# --------------------------------------------------
	# cfg checks

	files = over.types.ndict()
	audio_words = []
	video_words = []
	files.container = "mkv"

	if main.cfg.context:
		aux.update_cfg_context(main, ["context", "armed", "probe", "dump-commands", "ffmpeg-map"])

	if main.cfg.audio in ("copy", "drop"):
		audio_words.append("<c>%s<.>" %(main.cfg.audio))
	else:
		audio_words.append("<g>codec<.>=<M>%s<.>" %(main.cfg.audio))

		if main.cfg.audio == "vorbis":
			audio_words.append("<g>quality<.>=<M>%.1f<.>" %(main.cfg.audio_quality))

		if main.cfg.normalize:
			audio_words.append("<c>normalize<.>")

	if main.cfg.video in ("copy", "drop"):
		video_words.append("<c>%s<.>" %(main.cfg.video))

	else:
		video_words.append("<g>codec<.>=<M>%s<.>" %(main.cfg.video))
		video_words.append("<g>quality<.>=<M>%.1f<.>" %(main.cfg.video_quality))

		if main.cfg.video_preset and main.cfg.video in ("x264", "x265"):
			video_words.append("<g>preset<.>=<M>%s<.>" %(main.cfg.video_preset))

		if main.cfg.ffmpeg_vfilter:
			video_words.append("<g>vfilter<.>=<M>%s<.>" %(main.cfg.ffmpeg_vfilter))

	if main.cfg.video == "drop":
		if main.cfg.audio == "pcm":
			files.container = "wav"
		elif main.cfg.audio == "vorbis":
			files.container = "ogg"

	main.print("settings", main.print.tl.start, end=":\n")
	main.print("audio: %s" %(", ".join(audio_words)))
	main.print("video: %s" %(", ".join(video_words)))
	main.print("container: <g>type<.>=<M>%s<.>" %(files.container))

	if main.cfg.move_source:
		main.print("move source files to <W>%s<.>/" %(main.cfg.move_source))

	if main.cfg.audio not in ("drop", "copy", "pcm", "vorbis"):
		raise ValueError("unknown audio codec: %s" %(main.cfg.audio))

	if main.cfg.video not in ("drop", "copy", "theora", "x264", "x265"):
		raise ValueError("unknown video codec: %s" %(main.cfg.video))

	if not main.targets:
		main.print("no files specified", main.print.tl.warn)

	for tgt in main.targets:
		print()

		files.infile = aux.to_Path(tgt)
		files.tmpfile = aux.to_Path(tempfile.mktemp(suffix="." + files.container, dir="."))
		files.outfile = files.infile.parent / (str(files.infile.stem) + "." + files.container)
		files.move_infile_to = aux.to_Path(main.cfg.move_source) / files.infile.name if main.cfg.move_source else None

		if not os.path.exists(tgt) or os.path.isdir(tgt):
			main.print("target <y>%s<.> <r>is not a readable file<.>, skipping" %(tgt), main.print.tl.fail)
			continue

		original_filesize = over.text.Unit(files.infile.stat().st_size, "o")
		main.print("processing <W>%s<.> (%s)" %(tgt, original_filesize), main.print.tl.start)

		# --------------------------------------------------
		# identify the input file

		command.identify.reset()
		command.identify.INFILE = "file:" + str(files.infile)
		command.identify.run()
		identify_raw = command.identify.get_all_output().decode("utf-8")
		identify_dict = json.loads(identify_raw)

		if main.cfg.probe:
			print(identify_raw)
			continue

		info = over.types.ndict()

		try:
			info.duration = float(identify_dict["format"]["duration"])

			video_streams = [s for s in identify_dict["streams"] if s["codec_type"] == "video"]
			audio_streams = [s for s in identify_dict["streams"] if s["codec_type"] == "audio"]

			amount_vs = len(video_streams)
			amount_as = len(audio_streams)

			if amount_vs > 1:
				main.print('detected <r>%d<.> video streams, picking the "best" one (see man 1 ffmpeg, section STREAM SELECTION)' %(amount_vs), main.print.tl.warn)

			if amount_as > 1:
				main.print('detected <y>%d<.> audio streams, picking the "best" one (see man 1 ffmpeg, section STREAM SELECTION)' %(amount_as), main.print.tl.warn)

			if video_streams:
				# ffmpeg picks the stream with the highest pixel count and then the lowest index
				video_streams.sort(key=lambda s: s["width"] * s["height"], reverse=True)
				video = video_streams[0]
				info.video_codec = video["codec_name"]
				info.video_size_x = video["width"]
				info.video_size_y = video["height"]
				info.video_fps = over.text.Unit(aux.parse_fps(video["r_frame_rate"]), "Hz")

				if "bit_rate" in video:
					info.video_bitrate = over.text.Unit(video["bit_rate"], "b/s")
				elif "tags" in video and "BPS" in video["tags"]:
					info.video_bitrate = over.text.Unit(int(video["tags"]["BPS"]), "b/s")
				else:
					info.video_bitrate = "<R>??<.>"
				info.pixel_fmt = video["pix_fmt"]
			else:
				info.video_fps = 30 # faked for progress bars

			if audio_streams:
				# ffmpeg picks the stream with the most channels and then the lowest index
				audio_streams.sort(key=lambda s: s["channels"], reverse=True)
				audio = audio_streams[0]
				info.audio_codec = audio["codec_name"]
				info.audio_channels = audio["channels"]
				info.audio_samplerate = over.text.Unit(audio["sample_rate"], "Hz")
				info.audio_language = audio["tags"]["language"] if "tags" in audio and "language" in audio["tags"] else "und"
				info.audio_bitrate = over.text.Unit(audio["bit_rate"], "b/s") if "bit_rate" in audio else "<R>??<.>"

		except:
			main.print("exception while reading identify_dict, dump follows", main.print.tl.fail)
			print(identify_dict)
			raise

		if video_streams:
			main.print("<m>video<.>: size=<M>%d<.>x<M>%d<.> px, framerate=%s, codec=%s, bitrate=%s" %(info.video_size_x, info.video_size_y, info.video_fps, info.video_codec, info.video_bitrate))
		else:
			main.print("<m>video<.>: <y>None<.>", main.print.tl.warn)

		if audio_streams:
			main.print("<c>audio<.>: channels=<C>%d<.>, samplerate=%s, codec=%s, bitrate=%s, language=%s" %(info.audio_channels, info.audio_samplerate, info.audio_codec, info.audio_bitrate, info.audio_language))
		else:
			main.print("<c>audio<.>: <y>None<.>", main.print.tl.warn)

		# --------------------------------------------------
		# normalization pre-pass

		if audio_streams and main.cfg.normalize and (main.cfg.armed or main.cfg.dump_commands) and (not main.cfg.audio == "drop"):
			main.print("running normalization pre-pass")

			command.normalize_prepass.reset()
			command.normalize_prepass.INFILE = "file:" + str(files.infile)
			command.normalize_prepass.run(stderr=True)

			pb = over.text.ProgressBar(
				"§%a [§=a>§ A] §sa (Trem=§TA)",
				{
					"a": {
						"unit": "s",
						"top": info.duration,
						"precision": 1,
						"min_width_raw": 0,
						"min_width_rate": 0,
						"min_width_time": 0
					}
				}
			)

			pb.render()
			output_buffer = []

			while True:
				time.sleep(.25)

				out = command.normalize_prepass.get_output()

				if out:
					output_buffer.append(out)

					if b"frame=" in out:
						frame_id = re.findall(b"frame= *(\d+) ", out)[0]
						pb.set("a", int(frame_id) / info.video_fps.value)
						pb.render()

				elif out is None:
					break

			pb.end()

			output = b"".join(output_buffer)

			# decode the JSON dump from loudnorm
			if output.count(b"{") == 1 and output.count(b"}") == 1:
				loudnorm_dict = json.loads(output[output.index(b"{"):].decode("ascii"))
				info.loudnorm = over.types.ndict({k: aux.float_or_string(v) for k, v in loudnorm_dict.items()})
			else:
				main.print("<r>unexpected ffmpeg output<.>, dump follows", main.print.tl.fail, suffix=":\n")
				print(output.decode("utf-8"))
				raise RuntimeError

			main.print("detected true peak %.1f dB" %(info.loudnorm.input_tp))

			info.normalize_command = command.sub_normalize
			info.normalize_command.reset()
			info.normalize_command.LOUDNORM_INCANTATION = "loudnorm=I=-16:TP=-1.5:LRA=11:measured_I=%.02f:measured_LRA=%.02f:measured_TP=%.02f:measured_thresh=%.02f:offset=%.02f:linear=true" %(info.loudnorm.input_i, info.loudnorm.input_lra, info.loudnorm.input_tp, info.loudnorm.input_thresh, info.loudnorm.target_offset)

		else:
			info.normalize_command = None

		# --------------------------------------------------
		# main command assembly

		encode_cmd = command.encode_generic
		encode_cmd.reset()

		encode_cmd.INFILE = "file:" + str(files.infile)
		encode_cmd.OUTFILE = files.tmpfile

		encode_cmd.FPS = ["-r", main.cfg.fps] if main.cfg.fps else None

		encode_cmd.CUT_FROM = ["-ss", main.cfg.cut[0]] if main.cfg.cut else None
		encode_cmd.CUT_TO = ["-to", main.cfg.cut[1]] if main.cfg.cut else None

		if main.cfg.audio == "copy":
			encode_cmd.AUDIO = command.sub_copy_audio
		elif main.cfg.audio == "drop":
			encode_cmd.AUDIO = "-an"
		elif main.cfg.audio == "pcm":
			command.sub_pcm.reset()
			command.sub_pcm.NORMALIZE = info.normalize_command

			encode_cmd.AUDIO = command.sub_pcm
		elif main.cfg.audio == "vorbis":
			command.sub_vorbis.reset()
			command.sub_vorbis.QUALITY = main.cfg.audio_quality
			command.sub_vorbis.NORMALIZE = info.normalize_command

			encode_cmd.AUDIO = command.sub_vorbis

		if main.cfg.ffmpeg_vfilter:
			info.vfilter_command = command.sub_vfilter
			info.vfilter_command.reset()
			info.vfilter_command.ARGS = main.cfg.ffmpeg_vfilter
		else:
			info.vfilter_command = None

		if main.cfg.ffmpeg_map:
			info.map_command = []

			for m in main.cfg.ffmpeg_map:
				info.map_command.append("-map")
				info.map_command.append(m)
		else:
			info.map_command = None

		encode_cmd.MAP = info.map_command

		if main.cfg.video == "copy":
			encode_cmd.VIDEO = command.sub_copy_video
		elif main.cfg.video == "drop":
			encode_cmd.VIDEO = "-vn"
		elif main.cfg.video == "theora":
			command.sub_theora.reset()
			command.sub_theora.QUALITY = main.cfg.video_quality
			command.sub_theora.VFILTER = info.vfilter_command

			encode_cmd.VIDEO = command.sub_theora
		elif main.cfg.video == "x264":
			command.sub_x264.reset()
			command.sub_x264.QUALITY = main.cfg.video_quality
			command.sub_x264.PRESET = main.cfg.video_preset
			command.sub_x264.VFILTER = info.vfilter_command

			if info.pixel_fmt in X264_BANNED_PIXFMTS:
				main.print("source pixel format <r>%s<.> is incompatible with x264, forcing <y>yuv420p<.>" %(info.pixel_fmt), main.print.tl.warn)
				command.sub_x264.PIXFMT = command.force_yuv420p
			else:
				command.sub_x264.PIXFMT = None

			encode_cmd.VIDEO = command.sub_x264
		elif main.cfg.video == "x265":
			command.sub_x265.reset()
			command.sub_x265.QUALITY = main.cfg.video_quality
			command.sub_x265.PRESET = main.cfg.video_preset
			command.sub_x265.VFILTER = info.vfilter_command
			command.sub_x265.PIXFMT = None

			encode_cmd.VIDEO = command.sub_x265

		# --------------------------------------------------
		# run the command iff armed

		if main.cfg.dump_commands or main.cfg.armed:
			cmd = " ".join(encode_cmd.dump(pretty=True))

			if main.cfg.armed:
				main.print("executing <W>%s<.>" %(cmd), main.print.tl.start)
			else:
				main.print("will execute <W>%s<.>" %(cmd))
		else:
			main.print("will encode into <W>%s<.>" %(files.tmpfile))

		if main.cfg.armed:
			pb = over.text.ProgressBar(
					"§%f §rs [§=f>§ F] §sf (§ss) (Sest=§zs, Trem=§TF)",
					{
						"f": {
							"unit": "f",
							"top": int(info.video_fps.value * info.duration),
							"precision": 1,
							"min_width_rate": 9
						},
						"s": {
							"unit": "o", # octets are cool
							"top": None, # size is unknown at the start but will be estimated during updates
							"precision": 1,
							"min_width_raw": 9,
							"min_width_rate": 11
						}
					}
				)

			encode_cmd.run(stderr=True)

			while True:
				time.sleep(.25)

				out = encode_cmd.get_output()

				if out:
					if b"frame=" in out:
						frame_id = re.findall(b"frame= *(\d+) ", out)[0]
						pb.set("f", int(frame_id))

				elif out is None:
					break

				try:
					pb.set("s", files.tmpfile.stat().st_size)
				except FileNotFoundError: # a race condition with ffmpeg
					pass

				pb.render()

			new_filesize = over.text.Unit(files.tmpfile.stat().st_size, "o")

			pb.end()

			if encode_cmd.returncode == 0:
				main.print("encoding finished: %s -> %s" %(original_filesize, new_filesize), main.print.tl.done)
			else:
				main.print("<r>encoding failed<.>, ffmpeg returned <y>%d<.>" %(encode_cmd.returncode), main.print.tl.fail)
				raise RuntimeError

		# --------------------------------------------------
		# shuffle files around

		if main.cfg.move_source:
			move_to_dir = pathlib.Path(main.cfg.move_source)

			if not move_to_dir.is_dir():
				if main.cfg.armed:
					main.print("creating directory <W>%s<.>" %(move_to_dir), main.print.tl.start)
					move_to_dir.mkdir()
				else:
					main.print("will create directory <W>%s<.>" %(move_to_dir))

		if files.move_infile_to:
			if main.cfg.armed:
				main.print("moving <W>%s<.> -> <W>%s<.>" %(files.infile, files.move_infile_to), main.print.tl.start)
				files.infile.rename(files.move_infile_to)
			else:
				main.print("will move <W>%s<.> -> <W>%s<.>" %(files.infile, files.move_infile_to))

		if main.cfg.armed:
			main.print("moving <W>%s<.> -> <W>%s<.>" %(files.tmpfile, files.outfile), main.print.tl.start)
			files.tmpfile.rename(files.outfile)
		else:
			main.print("will move <W>%s<.> -> <W>%s<.>" %(files.tmpfile, files.outfile))