This commit is contained in:
Martinez 2015-02-12 15:29:31 +01:00
parent 242896ba37
commit 8da671a4bb

View file

@ -2,6 +2,8 @@
# encoding: utf-8 # encoding: utf-8
# library imports # library imports
import fnmatch
import glob
import os import os
import over import over
import re import re
@ -23,8 +25,9 @@ _print = over.core.textui.Output('Find Orphans')
if __name__ == '__main__': if __name__ == '__main__':
main = over.core.app.Main('Find Orphans', None, 'AWARE-Overwatch Joint Software License') main = over.core.app.Main('Find Orphans', None, 'AWARE-Overwatch Joint Software License')
main.add_option('filter', 'str', ['/home/*', '/var/*', '*.pyc'], 'Shell globs that should be filtered out.', plural=True) main.add_option('filter', 'str', ['/home/*', '/var/*', '*.pyc', '/lib*/modules/*', '*.pyo'], 'Shell globs that should be filtered out.', plural=True)
main.add_option('null-separated', 'bool', False, 'Output each filename separated with a null character. Otherwise use a newline.', short_name='0') main.add_option('null-separated', 'bool', False, 'Output each filename separated with a null character. Otherwise use a newline.', short_name='0')
main.add_option('output', 'bool', False, 'Output filenames to stdout. Otherwise just counts them.')
main.add_help('Description', ['Finds files on Gentoo systems that aren\'t owned by any installed package and lists them.']) main.add_help('Description', ['Finds files on Gentoo systems that aren\'t owned by any installed package and lists them.'])
main.enable_help('h') main.enable_help('h')
main.parse() main.parse()
@ -34,17 +37,17 @@ if __name__ == '__main__':
main.exit(silent=True) main.exit(silent=True)
# -------------------------------------------------- # --------------------------------------------------
# Create a file list # Create a list of all files
file_list = [] all_files = set()
threads = [] # {proc, output_buffer} threads = [] # {proc, output_buffer}
for root in main.targets: for root in main.targets:
proc = over.core.cmd.Command('find', root, '-print0') proc = over.core.cmd.Command('find', root, '-type', 'f', '-print0')
proc.run() proc.run()
threads.append({'proc': proc, 'output_buffer': []}) threads.append({'proc': proc, 'output_buffer': [], 'root': root})
_print('gathering file list using %d processes' %(len(threads)), prefix.start) _print('gathering a list of all files using %d processes' %(len(threads)), prefix.start)
while True: while True:
at_least_one_lives = False at_least_one_lives = False
@ -57,10 +60,50 @@ if __name__ == '__main__':
at_least_one_lives = True at_least_one_lives = True
if at_least_one_lives: if at_least_one_lives:
time.sleep(1) time.sleep(.25)
else: else:
break break
_print('post-processing file list') _print('post-processing file list')
for thread in threads:
output = b''.join(thread['output_buffer']).decode('utf-8')
all_files.update(os.path.normpath(os.path.join(os.path.abspath(thread['root']), f)) for f in output.split('\x00') if f)
_print('found %d files' %(len(all_files)), prefix.done)
# --------------------------------------------------
# create a list of owned files
_print('gathering a list of files owned by installed packages', prefix.start)
owned_files = set()
for contents_filename in glob.glob('/var/db/pkg/*/*/CONTENTS'):
owned_files.update(re.findall('obj (.+) [0-9a-f]+ \d+', open(contents_filename).read()))
unowned_files = all_files - owned_files
_print('found %d owned files' %(len(owned_files)), prefix.done)
_print('there are %d unowned files' %(len(unowned_files)))
# --------------------------------------------------
# apply filters
_print('applying filters', prefix.start)
unowned_files_ignored = set()
for filter in main.cfg.filter:
unowned_files_ignored.update(fnmatch.filter(unowned_files, filter))
files_to_output = unowned_files - unowned_files_ignored
_print('found §y%d unowned files§/' %(len(files_to_output)), prefix.done)
# --------------------------------------------------
# output
if main.cfg.output:
joiner = '\x00' if main.cfg.null_separated else '\n'
print(joiner.join(sorted(files_to_output)))