#! /bin/env python3 # encoding: utf-8 # library imports import fnmatch import glob import os import over import re import sys import time # -------------------------------------------------- prefix = over.core.textui.prefix _print = over.core.textui.Output('Find Orphans') # -------------------------------------------------- # Functions # -------------------------------------------------- # Classes # -------------------------------------------------- if __name__ == '__main__': main = over.core.app.Main('Find Orphans', None, 'AWARE-Overwatch Joint Software License') main.add_option('filter', 'str', ['/home/*', '/var/*', '*.pyc', '/lib*/modules/*', '*.pyo', '/usr/share/mime/*', '/usr/local/*'], 'Shell globs that should be filtered out.', plural=True) main.add_option('null-separated', 'bool', False, 'Output each filename separated with a null character. Otherwise use a newline.', short_name='0') main.add_option('output', 'bool', True, 'Output filenames to stdout. Otherwise just counts them.') main.add_help('Description', ['Finds files on Gentoo systems that aren\'t owned by any installed package and lists them.']) main.enable_help('h') main.parse() if not main.targets: _print('specify at least one directory to work on, e.g. \'/\'', prefix.fail) main.exit(silent=True) # -------------------------------------------------- # Create a list of all files all_files = set() threads = [] # {proc, output_buffer} for root in main.targets: proc = over.core.cmd.Command('find', root, '-type', 'f', '-print0') proc.run() threads.append({'proc': proc, 'output_buffer': [], 'root': root}) _print('gathering a list of all files using %d processes' %(len(threads)), prefix.start) while True: at_least_one_lives = False for thread in threads: tmp = thread['proc'].get_output() if not tmp is None: thread['output_buffer'].append(tmp) at_least_one_lives = True if at_least_one_lives: time.sleep(.25) else: break _print('post-processing file list') for thread in threads: output = b''.join(thread['output_buffer']).decode('utf-8') all_files.update(os.path.normpath(os.path.join(os.path.abspath(thread['root']), f)) for f in output.split('\x00') if f) _print('found %d files' %(len(all_files)), prefix.done) # -------------------------------------------------- # create a list of owned files _print('gathering a list of files owned by installed packages', prefix.start) owned_files = set() for contents_filename in glob.glob('/var/db/pkg/*/*/CONTENTS'): owned_files.update(re.findall('obj (.+) [0-9a-f]+ \d+', open(contents_filename).read())) unowned_files = all_files - owned_files _print('found %d owned files' %(len(owned_files)), prefix.done) _print('there are %d unowned files' %(len(unowned_files))) # -------------------------------------------------- # apply filters _print('applying filters', prefix.start) unowned_files_ignored = set() for filter in main.cfg.filter: unowned_files_ignored.update(fnmatch.filter(unowned_files, filter)) files_to_output = unowned_files - unowned_files_ignored _print('found §y%d unowned files§/' %(len(files_to_output)), prefix.done) # -------------------------------------------------- # output if main.cfg.output: joiner = '\x00' if main.cfg.null_separated else '\n' print(joiner.join(sorted(files_to_output)))