renamed to HaSt
This commit is contained in:
parent
7e93ffe71f
commit
52b8cda647
3 changed files with 71 additions and 23 deletions
|
@ -6,6 +6,7 @@ import datetime
|
|||
import glob
|
||||
import hashlib
|
||||
import shutil
|
||||
import re
|
||||
import os
|
||||
import over
|
||||
|
||||
|
@ -21,6 +22,23 @@ class ConfigurationError(Exception):
|
|||
# --------------------------------------------------
|
||||
# Functions
|
||||
|
||||
def walk(entry, callback, *args, **kwargs):
|
||||
"""
|
||||
For each file contained under the `entry` point,
|
||||
calls callback(file, *args, **kwargs).
|
||||
|
||||
@while processing a directory tree
|
||||
"""
|
||||
|
||||
entry = os.path.realpath(entry)
|
||||
|
||||
if os.path.isdir(entry):
|
||||
for name in os.listdir(entry):
|
||||
path = os.path.join(entry, name)
|
||||
walk(path, callback, *args, **kwargs)
|
||||
else:
|
||||
callback(entry, *args, **kwargs)
|
||||
|
||||
# --------------------------------------------------
|
||||
# Classes
|
||||
|
||||
|
@ -30,19 +48,24 @@ class UniqueStore:
|
|||
self.hasher = getattr(hashlib, algo)
|
||||
self.log = over.text.Output("store")
|
||||
|
||||
self.count_new = 0
|
||||
self.count_skipped = 0
|
||||
self.count_total = 0
|
||||
|
||||
self.store = {} # size: {hash: path in store}
|
||||
|
||||
self.log("loading storage", self.log.tl.start)
|
||||
|
||||
i = 0
|
||||
for file in os.listdir(path):
|
||||
self.add(os.path.join(path, file), write_enabled=False)
|
||||
i += 1
|
||||
self.add(os.path.join(path, file), preload=True)
|
||||
self.count_total += 1
|
||||
|
||||
self.log("loaded %d blocks" %(i), self.log.tl.done)
|
||||
self.log("loaded %d blocks" %(self.count_total), self.log.tl.done)
|
||||
|
||||
def add(self, path, write_enabled=True, verbose=False):
|
||||
def log_stats(self):
|
||||
self.log("added %d, skipped %d, holding %d objects" %(self.count_new, self.count_skipped, self.count_total))
|
||||
|
||||
def add(self, path, preload=False, dt_from_name=False, verbose=False):
|
||||
"""
|
||||
Adds a unique file to the storage. Returns True iff the file was new, False otherwise.
|
||||
|
||||
|
@ -72,35 +95,59 @@ class UniqueStore:
|
|||
filename_src = os.path.basename(path)
|
||||
filename_src_name, filename_src_suffix = os.path.splitext(filename_src)
|
||||
if "." in filename_src_suffix: filename_src_suffix = filename_src_suffix[1:]
|
||||
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
|
||||
filename_str = "%s %s %s.%s" %(
|
||||
filename_src_name,
|
||||
mtime.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
hash_hr,
|
||||
filename_src_suffix
|
||||
)
|
||||
|
||||
mtime = None
|
||||
|
||||
if dt_from_name:
|
||||
try:
|
||||
dt_raw = re.findall("([12][0-9]{3}[01][0-9][0-3][0-9])[_\-:,. ]([012][0-9][0-5][0-9][0-5][0-9])", filename_src_name)[0]
|
||||
dt_raw = " ".join(dt_raw)
|
||||
mtime = datetime.datetime.strptime(dt_raw, "%Y%m%d %H%M%S")
|
||||
except:
|
||||
pass
|
||||
|
||||
if not mtime:
|
||||
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
|
||||
|
||||
if preload:
|
||||
filename_str = filename_src
|
||||
else:
|
||||
filename_str = "%s %s %s.%s" %(
|
||||
filename_src_name,
|
||||
mtime.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
hash_hr,
|
||||
filename_src_suffix
|
||||
)
|
||||
|
||||
if size not in self.store:
|
||||
self.store[size] = {}
|
||||
|
||||
self.store[size][hash_raw] = filename_str
|
||||
|
||||
if write_enabled:
|
||||
if not preload:
|
||||
shutil.copy(path, os.path.join(self.path, filename_str))
|
||||
shutil.copystat(path, os.path.join(self.path, filename_str))
|
||||
self.count_new += 1
|
||||
self.count_total += 1
|
||||
|
||||
if verbose:
|
||||
self.log("%s (%s) added" %(path, hash_hr), self.log.tl.done)
|
||||
|
||||
elif not preload:
|
||||
self.count_skipped += 1
|
||||
|
||||
return is_new
|
||||
|
||||
# --------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
main = over.app.Main("dedup", version.str, "AO-JSL")
|
||||
main.add_option("store", "Directory to put unqie files into.", str, count=1, abbr="s")
|
||||
main = over.app.Main("hast", version.str, "AO-JSL")
|
||||
main.add_option("store", "Directory to put unique files into.", str, count=1, abbr="s")
|
||||
main.add_option("algo", "Hashing operation to use, use anything supported by hashlib. Either <M>sha256<.> or <M>sha512<.> is recommended, though. The latter is significantly faster on 64bit CPUs.", str, ["sha512"], count=1, abbr="a")
|
||||
main.add_option("datetime-from-name", "Extract file datetime from its name if it matches IMG_YYYYmmdd_HHMMSS.", bool, [True], abbr="d")
|
||||
main.add_option("stats", "Print stats before exiting.", bool, [True])
|
||||
main.add_option("verbose", "Describe the action taken with each input file.", bool, [False], abbr="v")
|
||||
main.add_doc("Description", ["Imports files from directory <m>targets<.> into the <W>--<g>output<.> flat directory, making sure these are unique. The files are all renamed to <c>original-filename mtime hash.suffix<.>."])
|
||||
main.add_doc("Description", ["Hashed storage - imports files from directory <m>targets<.> into the <W>--<g>output<.> flat directory, making sure these are unique. The files are all renamed to <c>original-filename mtime hash.suffix<.>."])
|
||||
main.setup()
|
||||
|
||||
if not main.cfg.store:
|
||||
|
@ -123,7 +170,7 @@ if __name__ == "__main__":
|
|||
if main.cfg.verbose:
|
||||
main.print("processing %s" %(src))
|
||||
|
||||
files = [f for f in glob.glob(os.path.join(os.path.realpath(src), "**")) if os.path.isfile(f)]
|
||||
|
||||
for file in files:
|
||||
store.add(file, verbose=main.cfg.verbose)
|
||||
walk(src, store.add, verbose=main.cfg.verbose, dt_from_name=main.cfg.datetime_from_name)
|
||||
|
||||
if main.cfg.stats:
|
||||
store.log_stats()
|
|
@ -1,9 +1,10 @@
|
|||
#! /bin/bash
|
||||
|
||||
NAME="hast"
|
||||
ROOT="${1}"
|
||||
LIBDIR="${ROOT}/usr/lib/over/dedup"
|
||||
LIBDIR="${ROOT}/usr/lib/over/${NAME}"
|
||||
BINDIR="${ROOT}/usr/bin"
|
||||
BIN="dedup"
|
||||
BIN="${NAME}"
|
||||
|
||||
mkdir -p "${LIBDIR}"
|
||||
cp *.py "${LIBDIR}"
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# encoding: utf-8
|
||||
|
||||
major = 0 # VERSION_MAJOR_IDENTIFIER
|
||||
minor = 0 # VERSION_MINOR_IDENTIFIER
|
||||
minor = 1 # VERSION_MINOR_IDENTIFIER
|
||||
# VERSION_LAST_MM 0.0
|
||||
patch = 0 # VERSION_PATCH_IDENTIFIER
|
||||
str = ".".join(str(v) for v in (major, minor, patch))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue