#! /usr/bin/env python3 # encoding: utf-8 # library imports import fnmatch import glob import os import over import re import sys import time # local imports import version # -------------------------------------------------- if __name__ == "__main__": main = over.app.Main("find-orhpans", version.str, "AO-JSL", features={"config_file": True}) #main.add_option("filter", "Shell globs that should be filtered out.", over.callback.strings, abbr="f", count=1) main.add_option("filter", "Shell globs that should be filtered out.", over.callback.strings, abbr="f", count=1, overwrite=False) main.add_option("null-separated", "Output each filename separated with a null character. Otherwise use a newline.", bool, [False], abbr="0", count=1) main.add_doc("Description", ["Finds files on Gentoo systems that aren't owned by any installed package and lists them."]) main.setup() # works around bug over:#15 (https://git.covalent.cz/overwatch/over/issues/15) if not main.cfg.filter: main.cfg.filter = ["*/.keep", "*.pyc", "/lib*/modules/*", "*.pyo", "/usr/share/mime/*", "/usr/local/*", "/usr/src/*"] if not main.targets: main.log.fail("specify at least one directory to work on, e.g. /usr<.> or /lib64<.>") main.exit(1) # -------------------------------------------------- # Create a list of all files threads = [] # {proc, output_buffer} for root in main.targets: proc = over.cmd.Command("find", root, "-type", "f", "-print0") proc.run() threads.append({"proc": proc, "output_buffer": [], "root": root}) main.log.begin("gathering a list of all files using %d processes", len(threads)) while True: at_least_one_lives = False for thread in threads: tmp = thread["proc"].get_output() if not tmp is None: thread["output_buffer"].append(tmp) at_least_one_lives = True if at_least_one_lives: time.sleep(.25) else: break main.log.begin("post-processing file list") all_files = set() for thread in threads: output = b"".join(thread["output_buffer"]) for raw_name in output.split(b"\x00"): try: name = raw_name.decode("utf-8") except UnicodeDecodeError: main.log.fail("UTF-8 decoding failed: %s<.>", repr(raw_name)) all_files.add(os.path.normpath(os.path.join(os.path.abspath(thread["root"]), name))) main.log.done("found %d files", len(all_files)) # -------------------------------------------------- # create a list of owned files main.log.begin("gathering a list of files owned by installed packages") owned_files = set() for contents_filename in glob.glob("/var/db/pkg/*/*/CONTENTS"): owned_files.update(re.findall("obj (.+) [0-9a-f]+ \d+", open(contents_filename).read())) unowned_files = all_files - owned_files main.log.done("found %d owned files", len(owned_files)) main.log.info("there are %d unowned files", len(unowned_files)) # -------------------------------------------------- # apply filters main.log.begin("applying filters") unowned_files_ignored = set() for filter in main.cfg.filter + main.targets: unowned_files_ignored.update(fnmatch.filter(unowned_files, filter)) files_to_output = unowned_files - unowned_files_ignored color = "y" if files_to_output else "g" main.log.done("found <%s>%d unowned files<.>", color, len(files_to_output)) # -------------------------------------------------- # output if files_to_output: joiner = "\x00" if main.cfg.null_separated else "\n" print(joiner.join(sorted(files_to_output)))