#! /usr/bin/env python3 # encoding: utf-8 # library imports import fnmatch import glob import os import over import re import sys import time # local imports import version # -------------------------------------------------- if __name__ == "__main__": main = over.app.Main("Find Orhpans", version.str, "AO-JSL", features={"config_file": True}) #main.add_option("filter", "Shell globs that should be filtered out.", over.callback.strings, abbr="f", count=1) main.add_option("filter", "Shell globs that should be filtered out.", over.callback.strings, abbr="f", count=1, overwrite=False) main.add_option("null-separated", "Output each filename separated with a null character. Otherwise use a newline.", bool, [False], abbr="0", count=1) main.add_doc("Description", ["Finds files on Gentoo systems that aren\"t owned by any installed package and lists them."]) main.setup() # works around bug over:#15 (https://git.covalent.cz/overwatch/over/issues/15) if not main.cfg.filter: main.cfg.filter = ["*.pyc", "/lib*/modules/*", "*.pyo", "/usr/share/mime/*", "/usr/local/*", "/usr/src/*"] if not main.targets: main.print("specify at least one directory to work on, e.g. /usr<.> or /lib64<.>", main.print.tl.fail) main.exit(1) # -------------------------------------------------- # Create a list of all files all_files = set() threads = [] # {proc, output_buffer} for root in main.targets: proc = over.cmd.Command("find", root, "-type", "f", "-print0") proc.run() threads.append({"proc": proc, "output_buffer": [], "root": root}) main.print("gathering a list of all files using %d processes" %(len(threads)), main.print.tl.start) while True: at_least_one_lives = False for thread in threads: tmp = thread["proc"].get_output() if not tmp is None: thread["output_buffer"].append(tmp) at_least_one_lives = True if at_least_one_lives: time.sleep(.25) else: break main.print("post-processing file list") for thread in threads: output = b"".join(thread["output_buffer"]).decode("utf-8") all_files.update(os.path.normpath(os.path.join(os.path.abspath(thread["root"]), f)) for f in output.split("\x00") if f) main.print("found %d files" %(len(all_files)), main.print.tl.done) # -------------------------------------------------- # create a list of owned files main.print("gathering a list of files owned by installed packages", main.print.tl.start) owned_files = set() for contents_filename in glob.glob("/var/db/pkg/*/*/CONTENTS"): owned_files.update(re.findall("obj (.+) [0-9a-f]+ \d+", open(contents_filename).read())) unowned_files = all_files - owned_files main.print("found %d owned files" %(len(owned_files)), main.print.tl.done) main.print("there are %d unowned files" %(len(unowned_files))) # -------------------------------------------------- # apply filters main.print("applying filters", main.print.tl.start) unowned_files_ignored = set() for filter in main.cfg.filter: unowned_files_ignored.update(fnmatch.filter(unowned_files, filter)) files_to_output = unowned_files - unowned_files_ignored color = "y" if files_to_output else "g" main.print("found <%s>%d unowned files<.>" %(color, len(files_to_output)), main.print.tl.done) # -------------------------------------------------- # output if files_to_output: joiner = "\x00" if main.cfg.null_separated else "\n" print(joiner.join(sorted(files_to_output)))