rewrote normalization to use loudnorm in order to avoid clipping
This commit is contained in:
parent
d12eb050a7
commit
f092e852da
3 changed files with 74 additions and 81 deletions
8
aux.py
8
aux.py
|
@ -59,3 +59,11 @@ def update_cfg_context(main, ignore=[]):
|
|||
added = context_file_write.update_config(context_file_header, (main.name, main.version))
|
||||
if added:
|
||||
main.print("added to <m>.over-video<.>: %s" %(", ".join("<g>%s<.>" %(o.name) for o in added)))
|
||||
|
||||
# --------------------------------------------------
|
||||
|
||||
def float_or_string(a):
|
||||
try:
|
||||
return float(a)
|
||||
except:
|
||||
return a
|
||||
|
|
141
over-video.py
141
over-video.py
|
@ -9,8 +9,7 @@ import re
|
|||
import tempfile
|
||||
import time
|
||||
import version
|
||||
|
||||
from aux import parse_fps, to_Path, update_cfg_context
|
||||
import aux
|
||||
|
||||
Command = over.cmd.Command
|
||||
|
||||
|
@ -22,14 +21,14 @@ X264_BANNED_PIXFMTS = {"bgr24", "yuv422p"}
|
|||
# see doc/command_assembler.png
|
||||
command = over.types.ndict()
|
||||
command.identify = Command("ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", "INFILE")
|
||||
command.normalize_prepass = Command("ffmpeg", "-i", "INFILE", "-max_muxing_queue_size", "512", "-filter:a", "volumedetect", "-f", "null", "/dev/null")
|
||||
command.normalize_prepass = Command("ffmpeg", "-i", "INFILE", "-max_muxing_queue_size", "512", "-filter:a", "loudnorm=I=-16:TP=-1.5:LRA=11:print_format=json", "-f", "null", "/dev/null")
|
||||
command.encode_generic = Command("ffmpeg", "FPS", "CUT_FROM", "-i", "INFILE", "-max_muxing_queue_size", "512", "CUT_TO", "MAP", "VIDEO", "AUDIO", "-sn", "OUTFILE")
|
||||
command.sub_vorbis = Command("-codec:a", "libvorbis", "-qscale:a", "QUALITY", "NORMALIZE")
|
||||
command.sub_pcm = Command("-codec:a", "pcm_s16le", "NORMALIZE")
|
||||
command.sub_theora = Command("-codec:v", "libtheora", "-qscale:v", "QUALITY", "VFILTER")
|
||||
command.sub_x264 = Command("PIXFMT", "-codec:v", "libx264", "-preset", "PRESET", "-crf", "QUALITY", "-profile:v", "high", "-level", "4.2", "VFILTER")
|
||||
command.sub_x265 = Command("PIXFMT", "-codec:v", "libx265", "-preset", "PRESET", "-crf", "QUALITY", "VFILTER")
|
||||
command.sub_normalize = Command("-filter:a", "VOLUME")
|
||||
command.sub_normalize = Command("-filter:a", "LOUDNORM_INCANTATION", "-ar", "48k")
|
||||
command.sub_vfilter = Command("-filter:v", "ARGS")
|
||||
command.force_yuv420p = Command("-pix_fmt", "yuv420p")
|
||||
command.sub_copy_audio = Command("-codec:a", "copy")
|
||||
|
@ -45,9 +44,7 @@ if __name__ == "__main__":
|
|||
main.add_option("video-preset", "Video encoding preset, if supported by the selected encoder.", str, ["slow"], abbr="P", count=1)
|
||||
main.add_option("video-quality", "Video encoding quality (CRF). Use <M>0<.>-<M>10<.> for Theora (<M>0<.> being the lowest, <M>5<.>-<M>7<.> is generally watchable) and <M>0<.>-<M>51<.> for x264/5 (<M>0<.> being lossless, <M>18<.>-<M>28<.> is reasonable).", float, [22], abbr="Q", count=1)
|
||||
main.add_option("context", "Use .over-video file in CWD, if available, to remember encoding parameters per-directory.", bool, [True], abbr="C")
|
||||
main.add_option("normalize", "Normalize the audio track.", bool, [True], abbr="n")
|
||||
main.add_option("normalize-target", "Target mean volume to target.", float, [-20.0], count=1)
|
||||
main.add_option("normalize-override", "Volume correction to use instead of computing the required value in a (lengthy) pre-pass.", float, [0.0], abbr="N", count=1, in_cfg_file=False)
|
||||
main.add_option("normalize", "Normalize the audio track without clipping. May use dynamic range compression.", bool, [True], abbr="n")
|
||||
main.add_option("ffmpeg-vfilter", 'Raw ffmpeg -filter:v options, e.g. "<M>scale=1280:trunc(ow/a/2)*2,transpose=dir=1<.>"', str, abbr="F", count=1)
|
||||
main.add_option("ffmpeg-map", "Raw ffmpeg <c>-map<.> options, e.g. <W>--<g>map<.> <M>0:1<.> <W>--<g>map<.> <M>0:2<.>. This is a drop-in fix until we get proper stream selection.", str, abbr="M", overwrite=False, count=1)
|
||||
main.add_option("cut", "Start and end timestamps of the portion to cut out. Uses native ffmpeg <c>-ss<.> and <c>-to<.> format, so it's either seconds from start or <M>[<HH>:]<MM>:<SS>[.<<m>...]<.>. Example: <W>--<g>cut<.> <M>25 35<.> uses 10 seconds of video starting at 25s, <W>--<g>cut<.> <M>1:10:45 1:23:54.5<.> uses video from 4245s to 5034.5s.", over.callback.strings, abbr="X", count=2)
|
||||
|
@ -71,7 +68,7 @@ if __name__ == "__main__":
|
|||
files.container = "mkv"
|
||||
|
||||
if main.cfg.context:
|
||||
update_cfg_context(main, ["context", "armed", "probe", "dump-commands", "ffmpeg-map", "normalize-override"])
|
||||
aux.update_cfg_context(main, ["context", "armed", "probe", "dump-commands", "ffmpeg-map"])
|
||||
|
||||
if main.cfg.audio in ("copy", "drop"):
|
||||
audio_words.append("<c>%s<.>" %(main.cfg.audio))
|
||||
|
@ -81,10 +78,8 @@ if __name__ == "__main__":
|
|||
if main.cfg.audio == "vorbis":
|
||||
audio_words.append("<g>quality<.>=<M>%.1f<.>" %(main.cfg.audio_quality))
|
||||
|
||||
if main.cfg.normalize_override != 0:
|
||||
audio_words.append("<g>adjust_volume<.>=<M>%.1f dB<.>" %(main.cfg.normalize_override))
|
||||
elif main.cfg.normalize:
|
||||
audio_words.append("<g>normalize<.>=<M>%.1f dB<.>" %(main.cfg.normalize_target))
|
||||
if main.cfg.normalize:
|
||||
audio_words.append("<b>normalize<.>")
|
||||
|
||||
if main.cfg.video in ("copy", "drop"):
|
||||
video_words.append("<c>%s<.>" %(main.cfg.video))
|
||||
|
@ -125,10 +120,10 @@ if __name__ == "__main__":
|
|||
for tgt in main.targets:
|
||||
print()
|
||||
|
||||
files.infile = to_Path(tgt)
|
||||
files.tmpfile = to_Path(tempfile.mktemp(suffix="." + files.container, dir="."))
|
||||
files.infile = aux.to_Path(tgt)
|
||||
files.tmpfile = aux.to_Path(tempfile.mktemp(suffix="." + files.container, dir="."))
|
||||
files.outfile = files.infile.parent / (str(files.infile.stem) + "." + files.container)
|
||||
files.move_infile_to = to_Path(main.cfg.move_source) / files.infile.name if main.cfg.move_source else None
|
||||
files.move_infile_to = aux.to_Path(main.cfg.move_source) / files.infile.name if main.cfg.move_source else None
|
||||
|
||||
if not os.path.exists(tgt) or os.path.isdir(tgt):
|
||||
main.print("target <y>%s<.> <r>is not a readable file<.>, skipping" %(tgt), main.print.tl.fail)
|
||||
|
@ -174,7 +169,7 @@ if __name__ == "__main__":
|
|||
info.video_codec = video["codec_name"]
|
||||
info.video_size_x = video["width"]
|
||||
info.video_size_y = video["height"]
|
||||
info.video_fps = over.text.Unit(parse_fps(video["r_frame_rate"]), "Hz")
|
||||
info.video_fps = over.text.Unit(aux.parse_fps(video["r_frame_rate"]), "Hz")
|
||||
|
||||
if "bit_rate" in video:
|
||||
info.video_bitrate = over.text.Unit(video["bit_rate"], "b/s")
|
||||
|
@ -215,73 +210,63 @@ if __name__ == "__main__":
|
|||
# normalization pre-pass
|
||||
|
||||
if audio_streams and main.cfg.normalize and (main.cfg.armed or main.cfg.dump_commands) and (not main.cfg.audio == "drop"):
|
||||
if main.cfg.normalize_override == 0.0:
|
||||
main.print("running normalization pre-pass")
|
||||
|
||||
command.normalize_prepass.reset()
|
||||
command.normalize_prepass.INFILE = "file:" + str(files.infile)
|
||||
command.normalize_prepass.run(stderr=True)
|
||||
|
||||
pb = over.text.ProgressBar(
|
||||
"§%a [§=a>§ A] §sa (Trem=§TA)",
|
||||
{
|
||||
"a": {
|
||||
"unit": "s",
|
||||
"top": info.duration,
|
||||
"precision": 1,
|
||||
"min_width_raw": 0,
|
||||
"min_width_rate": 0,
|
||||
"min_width_time": 0
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
pb.render()
|
||||
output_buffer = []
|
||||
|
||||
while True:
|
||||
time.sleep(.25)
|
||||
|
||||
out = command.normalize_prepass.get_output()
|
||||
|
||||
if out:
|
||||
output_buffer.append(out)
|
||||
|
||||
if b"frame=" in out:
|
||||
frame_id = re.findall(b"frame= *(\d+) ", out)[0]
|
||||
pb.set("a", int(frame_id) / info.video_fps.value)
|
||||
pb.render()
|
||||
|
||||
elif out is None:
|
||||
break
|
||||
|
||||
pb.end()
|
||||
|
||||
output = b"".join(output_buffer)
|
||||
|
||||
if b"mean_volume: " in output:
|
||||
info.mean_volume = float(re.findall(b"mean_volume: (-?\d+\.\d+) dB", output)[0])
|
||||
info.max_correction = -float(re.findall(b"max_volume: (-?\d+\.\d+) dB", output)[0])
|
||||
info.volume_correction = main.cfg.normalize_target - info.mean_volume
|
||||
else:
|
||||
main.print("<r>unexpected ffmpeg output<.>, dump follows", main.print.tl.fail, suffix=":\n")
|
||||
print(output.decode("utf-8"))
|
||||
raise RuntimeError
|
||||
|
||||
info.volume_correction = min(info.volume_correction, info.max_correction)
|
||||
main.print("detected volume %.1f dB, correction %.1f dB, max. correction %.1f dB" %(info.mean_volume, info.volume_correction, info.max_correction))
|
||||
|
||||
if info.volume_correction > info.max_correction:
|
||||
d = info.volume_correction - info.max_correction
|
||||
main.print("suggested correction is %.1f dB above the stream's maximum and will cause clipping" %(d), main.print.tl.warn)
|
||||
main.print("running normalization pre-pass")
|
||||
|
||||
command.normalize_prepass.reset()
|
||||
command.normalize_prepass.INFILE = "file:" + str(files.infile)
|
||||
command.normalize_prepass.run(stderr=True)
|
||||
|
||||
pb = over.text.ProgressBar(
|
||||
"§%a [§=a>§ A] §sa (Trem=§TA)",
|
||||
{
|
||||
"a": {
|
||||
"unit": "s",
|
||||
"top": info.duration,
|
||||
"precision": 1,
|
||||
"min_width_raw": 0,
|
||||
"min_width_rate": 0,
|
||||
"min_width_time": 0
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
pb.render()
|
||||
output_buffer = []
|
||||
|
||||
while True:
|
||||
time.sleep(.25)
|
||||
|
||||
out = command.normalize_prepass.get_output()
|
||||
|
||||
if out:
|
||||
output_buffer.append(out)
|
||||
|
||||
if b"frame=" in out:
|
||||
frame_id = re.findall(b"frame= *(\d+) ", out)[0]
|
||||
pb.set("a", int(frame_id) / info.video_fps.value)
|
||||
pb.render()
|
||||
|
||||
elif out is None:
|
||||
break
|
||||
|
||||
pb.end()
|
||||
|
||||
output = b"".join(output_buffer)
|
||||
|
||||
# decode the JSON dump from loudnorm
|
||||
if output.count(b"{") == 1 and output.count(b"}") == 1:
|
||||
loudnorm_dict = json.loads(output[output.index(b"{"):].decode("ascii"))
|
||||
info.loudnorm = over.types.ndict({k: aux.float_or_string(v) for k, v in loudnorm_dict.items()})
|
||||
else:
|
||||
info.volume_correction = main.cfg.normalize_override
|
||||
main.print("using user-supplied volume correction <M>%.1f dB<.>" %(info.volume_correction))
|
||||
main.print("<r>unexpected ffmpeg output<.>, dump follows", main.print.tl.fail, suffix=":\n")
|
||||
print(output.decode("utf-8"))
|
||||
raise RuntimeError
|
||||
|
||||
main.print("detected true peak %.1f dB" %(info.loudnorm.input_tp))
|
||||
|
||||
info.normalize_command = command.sub_normalize
|
||||
info.normalize_command.reset()
|
||||
info.normalize_command.VOLUME = "volume=%.2fdB" %(info.volume_correction)
|
||||
info.normalize_command.LOUDNORM_INCANTATION = "loudnorm=I=-16:TP=-1.5:LRA=11:measured_I=%.02f:measured_LRA=%.02f:measured_TP=%.02f:measured_thresh=%.02f:offset=%.02f:linear=true" %(info.loudnorm.input_i, info.loudnorm.input_lra, info.loudnorm.input_tp, info.loudnorm.input_thresh, info.loudnorm.target_offset)
|
||||
|
||||
else:
|
||||
info.normalize_command = None
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# encoding: utf-8
|
||||
|
||||
major = 1 # VERSION_MAJOR_IDENTIFIER
|
||||
minor = 101 # VERSION_MINOR_IDENTIFIER
|
||||
# VERSION_LAST_MM 1.101
|
||||
patch = 3 # VERSION_PATCH_IDENTIFIER
|
||||
minor = 102 # VERSION_MINOR_IDENTIFIER
|
||||
# VERSION_LAST_MM 1.102
|
||||
patch = 0 # VERSION_PATCH_IDENTIFIER
|
||||
str = ".".join(str(v) for v in (major, minor, patch))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue