find-orphans/find-orphans.py
2015-02-12 15:29:31 +01:00

109 lines
3.3 KiB
Python
Executable file

#! /bin/env python3
# encoding: utf-8
# library imports
import fnmatch
import glob
import os
import over
import re
import sys
import time
# --------------------------------------------------
prefix = over.core.textui.prefix
_print = over.core.textui.Output('Find Orphans')
# --------------------------------------------------
# Functions
# --------------------------------------------------
# Classes
# --------------------------------------------------
if __name__ == '__main__':
main = over.core.app.Main('Find Orphans', None, 'AWARE-Overwatch Joint Software License')
main.add_option('filter', 'str', ['/home/*', '/var/*', '*.pyc', '/lib*/modules/*', '*.pyo'], 'Shell globs that should be filtered out.', plural=True)
main.add_option('null-separated', 'bool', False, 'Output each filename separated with a null character. Otherwise use a newline.', short_name='0')
main.add_option('output', 'bool', False, 'Output filenames to stdout. Otherwise just counts them.')
main.add_help('Description', ['Finds files on Gentoo systems that aren\'t owned by any installed package and lists them.'])
main.enable_help('h')
main.parse()
if not main.targets:
_print('specify at least one directory to work on, e.g. \'/\'', prefix.fail)
main.exit(silent=True)
# --------------------------------------------------
# Create a list of all files
all_files = set()
threads = [] # {proc, output_buffer}
for root in main.targets:
proc = over.core.cmd.Command('find', root, '-type', 'f', '-print0')
proc.run()
threads.append({'proc': proc, 'output_buffer': [], 'root': root})
_print('gathering a list of all files using %d processes' %(len(threads)), prefix.start)
while True:
at_least_one_lives = False
for thread in threads:
tmp = thread['proc'].get_output()
if not tmp is None:
thread['output_buffer'].append(tmp)
at_least_one_lives = True
if at_least_one_lives:
time.sleep(.25)
else:
break
_print('post-processing file list')
for thread in threads:
output = b''.join(thread['output_buffer']).decode('utf-8')
all_files.update(os.path.normpath(os.path.join(os.path.abspath(thread['root']), f)) for f in output.split('\x00') if f)
_print('found %d files' %(len(all_files)), prefix.done)
# --------------------------------------------------
# create a list of owned files
_print('gathering a list of files owned by installed packages', prefix.start)
owned_files = set()
for contents_filename in glob.glob('/var/db/pkg/*/*/CONTENTS'):
owned_files.update(re.findall('obj (.+) [0-9a-f]+ \d+', open(contents_filename).read()))
unowned_files = all_files - owned_files
_print('found %d owned files' %(len(owned_files)), prefix.done)
_print('there are %d unowned files' %(len(unowned_files)))
# --------------------------------------------------
# apply filters
_print('applying filters', prefix.start)
unowned_files_ignored = set()
for filter in main.cfg.filter:
unowned_files_ignored.update(fnmatch.filter(unowned_files, filter))
files_to_output = unowned_files - unowned_files_ignored
_print('found §y%d unowned files§/' %(len(files_to_output)), prefix.done)
# --------------------------------------------------
# output
if main.cfg.output:
joiner = '\x00' if main.cfg.null_separated else '\n'
print(joiner.join(sorted(files_to_output)))