From 8da671a4bbcfe2cfb36013a6a80d55aa8e6dd73b Mon Sep 17 00:00:00 2001 From: Martinez Date: Thu, 12 Feb 2015 15:29:31 +0100 Subject: [PATCH] finished --- find-orphans.py | 57 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/find-orphans.py b/find-orphans.py index 93c6d0a..eb0e582 100755 --- a/find-orphans.py +++ b/find-orphans.py @@ -2,6 +2,8 @@ # encoding: utf-8 # library imports +import fnmatch +import glob import os import over import re @@ -23,8 +25,9 @@ _print = over.core.textui.Output('Find Orphans') if __name__ == '__main__': main = over.core.app.Main('Find Orphans', None, 'AWARE-Overwatch Joint Software License') - main.add_option('filter', 'str', ['/home/*', '/var/*', '*.pyc'], 'Shell globs that should be filtered out.', plural=True) + main.add_option('filter', 'str', ['/home/*', '/var/*', '*.pyc', '/lib*/modules/*', '*.pyo'], 'Shell globs that should be filtered out.', plural=True) main.add_option('null-separated', 'bool', False, 'Output each filename separated with a null character. Otherwise use a newline.', short_name='0') + main.add_option('output', 'bool', False, 'Output filenames to stdout. Otherwise just counts them.') main.add_help('Description', ['Finds files on Gentoo systems that aren\'t owned by any installed package and lists them.']) main.enable_help('h') main.parse() @@ -34,17 +37,17 @@ if __name__ == '__main__': main.exit(silent=True) # -------------------------------------------------- - # Create a file list + # Create a list of all files - file_list = [] + all_files = set() threads = [] # {proc, output_buffer} for root in main.targets: - proc = over.core.cmd.Command('find', root, '-print0') + proc = over.core.cmd.Command('find', root, '-type', 'f', '-print0') proc.run() - threads.append({'proc': proc, 'output_buffer': []}) + threads.append({'proc': proc, 'output_buffer': [], 'root': root}) - _print('gathering file list using %d processes' %(len(threads)), prefix.start) + _print('gathering a list of all files using %d processes' %(len(threads)), prefix.start) while True: at_least_one_lives = False @@ -57,10 +60,50 @@ if __name__ == '__main__': at_least_one_lives = True if at_least_one_lives: - time.sleep(1) + time.sleep(.25) else: break _print('post-processing file list') + for thread in threads: + output = b''.join(thread['output_buffer']).decode('utf-8') + all_files.update(os.path.normpath(os.path.join(os.path.abspath(thread['root']), f)) for f in output.split('\x00') if f) + _print('found %d files' %(len(all_files)), prefix.done) + + # -------------------------------------------------- + # create a list of owned files + + _print('gathering a list of files owned by installed packages', prefix.start) + + owned_files = set() + + for contents_filename in glob.glob('/var/db/pkg/*/*/CONTENTS'): + owned_files.update(re.findall('obj (.+) [0-9a-f]+ \d+', open(contents_filename).read())) + + unowned_files = all_files - owned_files + + _print('found %d owned files' %(len(owned_files)), prefix.done) + _print('there are %d unowned files' %(len(unowned_files))) + + # -------------------------------------------------- + # apply filters + + _print('applying filters', prefix.start) + + unowned_files_ignored = set() + + for filter in main.cfg.filter: + unowned_files_ignored.update(fnmatch.filter(unowned_files, filter)) + + files_to_output = unowned_files - unowned_files_ignored + + _print('found §y%d unowned files§/' %(len(files_to_output)), prefix.done) + + # -------------------------------------------------- + # output + + if main.cfg.output: + joiner = '\x00' if main.cfg.null_separated else '\n' + print(joiner.join(sorted(files_to_output)))