find-orphans/find-orphans.py
2022-03-21 12:51:00 +01:00

110 lines
3.4 KiB
Python
Executable file

#! /usr/bin/env python3
# encoding: utf-8
# library imports
import fnmatch
import glob
import os
import over
import re
import sys
import time
# local imports
import version
# --------------------------------------------------
if __name__ == "__main__":
main = over.app.Main("find-orhpans", version.str, "AO-JSL", features={"config_file": True})
#main.add_option("filter", "Shell globs that should be filtered out.", over.callback.strings, abbr="f", count=1)
main.add_option("filter", "Shell globs that should be filtered out.", over.callback.strings, abbr="f", count=1, overwrite=False)
main.add_option("null-separated", "Output each filename separated with a null character. Otherwise use a newline.", bool, [False], abbr="0", count=1)
main.add_doc("Description", ["Finds files on Gentoo systems that aren't owned by any installed package and lists them."])
main.setup()
# works around bug over:#15 (https://git.covalent.cz/overwatch/over/issues/15)
if not main.cfg.filter:
main.cfg.filter = ["*/.keep", "*.pyc", "/lib*/modules/*", "*.pyo", "/usr/share/mime/*", "/usr/local/*", "/usr/src/*"]
if not main.targets:
main.log.fail("specify at least one directory to work on, e.g. <M>/usr<.> or <M>/lib64<.>")
main.exit(1)
# --------------------------------------------------
# Create a list of all files
threads = [] # {proc, output_buffer}
for root in main.targets:
proc = over.cmd.Command("find", root, "-type", "f", "-print0")
proc.run()
threads.append({"proc": proc, "output_buffer": [], "root": root})
main.log.begin("gathering a list of all files using %d processes", len(threads))
while True:
at_least_one_lives = False
for thread in threads:
tmp = thread["proc"].get_output()
if not tmp is None:
thread["output_buffer"].append(tmp)
at_least_one_lives = True
if at_least_one_lives:
time.sleep(.25)
else:
break
main.log.begin("post-processing file list")
all_files = set()
for thread in threads:
output = b"".join(thread["output_buffer"])
for raw_name in output.split(b"\x00"):
try:
name = raw_name.decode("utf-8")
except UnicodeDecodeError:
main.log.fail("UTF-8 decoding failed: <R>%s<.>", repr(raw_name))
all_files.add(os.path.normpath(os.path.join(os.path.abspath(thread["root"]), name)))
main.log.done("found %d files", len(all_files))
# --------------------------------------------------
# create a list of owned files
main.log.begin("gathering a list of files owned by installed packages")
owned_files = set()
for contents_filename in glob.glob("/var/db/pkg/*/*/CONTENTS"):
owned_files.update(re.findall("obj (.+) [0-9a-f]+ \d+", open(contents_filename).read()))
unowned_files = all_files - owned_files
main.log.done("found %d owned files", len(owned_files))
main.log.info("there are %d unowned files", len(unowned_files))
# --------------------------------------------------
# apply filters
main.log.begin("applying filters")
unowned_files_ignored = set()
for filter in main.cfg.filter + main.targets:
unowned_files_ignored.update(fnmatch.filter(unowned_files, filter))
files_to_output = unowned_files - unowned_files_ignored
color = "y" if files_to_output else "g"
main.log.done("found <%s>%d unowned files<.>", color, len(files_to_output))
# --------------------------------------------------
# output
if files_to_output:
joiner = "\x00" if main.cfg.null_separated else "\n"
print(joiner.join(sorted(files_to_output)))