Module refinery.units.meta.dedup
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from refinery.units import Unit, Arg
class dedup(Unit):
"""
Deduplicates a sequence of multiple inputs. The deduplication is limited to the current `refinery.lib.frame`.
"""
def __init__(self, count: Arg.Switch('-c', help='Store the count of each deduplicated chunk.') = False):
super().__init__(count=count)
def filter(self, chunks):
if self.args.count:
from collections import Counter
barrier = Counter(chunks)
for chunk in chunks:
if not chunk.visible:
yield chunk
continue
barrier.update(chunk)
for chunk, count in barrier.items():
chunk.meta['count'] = count
yield chunk
else:
from hashlib import md5
barrier = set()
for chunk in chunks:
if not chunk.visible:
yield chunk
continue
hashed = md5(chunk).digest()
if hashed not in barrier:
barrier.add(hashed)
yield chunk
Classes
class dedup (count=False)
-
Deduplicates a sequence of multiple inputs. The deduplication is limited to the current
refinery.lib.frame
.Expand source code Browse git
class dedup(Unit): """ Deduplicates a sequence of multiple inputs. The deduplication is limited to the current `refinery.lib.frame`. """ def __init__(self, count: Arg.Switch('-c', help='Store the count of each deduplicated chunk.') = False): super().__init__(count=count) def filter(self, chunks): if self.args.count: from collections import Counter barrier = Counter(chunks) for chunk in chunks: if not chunk.visible: yield chunk continue barrier.update(chunk) for chunk, count in barrier.items(): chunk.meta['count'] = count yield chunk else: from hashlib import md5 barrier = set() for chunk in chunks: if not chunk.visible: yield chunk continue hashed = md5(chunk).digest() if hashed not in barrier: barrier.add(hashed) yield chunk
Ancestors
Class variables
var required_dependencies
var optional_dependencies
Inherited members