import
This commit is contained in:
commit
7e93ffe71f
4 changed files with 150 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
over
|
||||||
|
__pycache__
|
129
dedup.py
Executable file
129
dedup.py
Executable file
|
@ -0,0 +1,129 @@
|
||||||
|
#! /usr/bin/env python3
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
# library imports
|
||||||
|
import datetime
|
||||||
|
import glob
|
||||||
|
import hashlib
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
import over
|
||||||
|
|
||||||
|
# local imports
|
||||||
|
import version
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
|
# Exceptions
|
||||||
|
|
||||||
|
class ConfigurationError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
|
# Functions
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
|
# Classes
|
||||||
|
|
||||||
|
class UniqueStore:
|
||||||
|
def __init__(self, path, algo):
|
||||||
|
self.path = path
|
||||||
|
self.hasher = getattr(hashlib, algo)
|
||||||
|
self.log = over.text.Output("store")
|
||||||
|
|
||||||
|
|
||||||
|
self.store = {} # size: {hash: path in store}
|
||||||
|
|
||||||
|
self.log("loading storage", self.log.tl.start)
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for file in os.listdir(path):
|
||||||
|
self.add(os.path.join(path, file), write_enabled=False)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
self.log("loaded %d blocks" %(i), self.log.tl.done)
|
||||||
|
|
||||||
|
def add(self, path, write_enabled=True, verbose=False):
|
||||||
|
"""
|
||||||
|
Adds a unique file to the storage. Returns True iff the file was new, False otherwise.
|
||||||
|
|
||||||
|
@while adding a file to the hashed store
|
||||||
|
"""
|
||||||
|
|
||||||
|
stat = os.stat(path)
|
||||||
|
size = stat.st_size
|
||||||
|
hasher = self.hasher(open(path, "rb").read())
|
||||||
|
hash_raw = hasher.digest()
|
||||||
|
hash_hr = hasher.hexdigest()[:16]
|
||||||
|
|
||||||
|
is_new = False
|
||||||
|
|
||||||
|
if size in self.store:
|
||||||
|
suspects = self.store[size]
|
||||||
|
|
||||||
|
if hash_raw in suspects:
|
||||||
|
store_path = suspects[hash_raw]
|
||||||
|
if verbose: self.log("%s (%s) is already known as %s" %(path, hash_hr, store_path))
|
||||||
|
else:
|
||||||
|
is_new = True
|
||||||
|
else:
|
||||||
|
is_new = True
|
||||||
|
|
||||||
|
if is_new:
|
||||||
|
filename_src = os.path.basename(path)
|
||||||
|
filename_src_name, filename_src_suffix = os.path.splitext(filename_src)
|
||||||
|
if "." in filename_src_suffix: filename_src_suffix = filename_src_suffix[1:]
|
||||||
|
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
|
||||||
|
filename_str = "%s %s %s.%s" %(
|
||||||
|
filename_src_name,
|
||||||
|
mtime.strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
hash_hr,
|
||||||
|
filename_src_suffix
|
||||||
|
)
|
||||||
|
|
||||||
|
if size not in self.store:
|
||||||
|
self.store[size] = {}
|
||||||
|
|
||||||
|
self.store[size][hash_raw] = filename_str
|
||||||
|
|
||||||
|
if write_enabled:
|
||||||
|
shutil.copy(path, os.path.join(self.path, filename_str))
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
self.log("%s (%s) added" %(path, hash_hr), self.log.tl.done)
|
||||||
|
|
||||||
|
return is_new
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main = over.app.Main("dedup", version.str, "AO-JSL")
|
||||||
|
main.add_option("store", "Directory to put unqie files into.", str, count=1, abbr="s")
|
||||||
|
main.add_option("algo", "Hashing operation to use, use anything supported by hashlib. Either <M>sha256<.> or <M>sha512<.> is recommended, though. The latter is significantly faster on 64bit CPUs.", str, ["sha512"], count=1, abbr="a")
|
||||||
|
main.add_option("verbose", "Describe the action taken with each input file.", bool, [False], abbr="v")
|
||||||
|
main.add_doc("Description", ["Imports files from directory <m>targets<.> into the <W>--<g>output<.> flat directory, making sure these are unique. The files are all renamed to <c>original-filename mtime hash.suffix<.>."])
|
||||||
|
main.setup()
|
||||||
|
|
||||||
|
if not main.cfg.store:
|
||||||
|
main.print("no <W>--<g>store<.> specified", main.print.tl.fail)
|
||||||
|
main.exit(1)
|
||||||
|
|
||||||
|
store_dir = os.path.realpath(main.cfg.store)
|
||||||
|
|
||||||
|
if not os.path.exists(store_dir):
|
||||||
|
main.print("creating <y>%s<.>" %(store_dir), main.print.tl.exec)
|
||||||
|
os.mkdir(store_dir)
|
||||||
|
|
||||||
|
if not os.path.isdir(store_dir):
|
||||||
|
main.print("<r>%s<.> is not a writable store directory" %(store_dir), main.print.tl.fail)
|
||||||
|
main.exit(1)
|
||||||
|
|
||||||
|
store = UniqueStore(store_dir, main.cfg.algo)
|
||||||
|
|
||||||
|
for src in main.targets:
|
||||||
|
if main.cfg.verbose:
|
||||||
|
main.print("processing %s" %(src))
|
||||||
|
|
||||||
|
files = [f for f in glob.glob(os.path.join(os.path.realpath(src), "**")) if os.path.isfile(f)]
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
store.add(file, verbose=main.cfg.verbose)
|
11
install.sh
Executable file
11
install.sh
Executable file
|
@ -0,0 +1,11 @@
|
||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
ROOT="${1}"
|
||||||
|
LIBDIR="${ROOT}/usr/lib/over/dedup"
|
||||||
|
BINDIR="${ROOT}/usr/bin"
|
||||||
|
BIN="dedup"
|
||||||
|
|
||||||
|
mkdir -p "${LIBDIR}"
|
||||||
|
cp *.py "${LIBDIR}"
|
||||||
|
mkdir -p "${BINDIR}"
|
||||||
|
ln -s "${LIBDIR}/${BIN}.py" "${BINDIR}/${BIN}"
|
8
version.py
Normal file
8
version.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
#! /usr/bin/env python3
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
major = 0 # VERSION_MAJOR_IDENTIFIER
|
||||||
|
minor = 0 # VERSION_MINOR_IDENTIFIER
|
||||||
|
# VERSION_LAST_MM 0.0
|
||||||
|
patch = 0 # VERSION_PATCH_IDENTIFIER
|
||||||
|
str = ".".join(str(v) for v in (major, minor, patch))
|
Loading…
Add table
Add a link
Reference in a new issue