Module refinery.units.misc.xkey
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from collections import Counter
from refinery import Unit, Arg
class xkey(Unit):
"""
The unit expects encrypted input which was encrypted byte-wise with a polyalphabetic key, and
where the plaintext also has one letter that occurs with overwhelming frequency. This is often
the case for the zero byte in binary formats such as PE files, and the space character in text
files. Based on this assumption, the unit computes the most likely key. This can be useful to
decrypt PE and uncompressed text files that were encrypted byte-wise using a short key.
"""
def __init__(
self,
range: Arg.Bounds(help='range of length values to try in Python slice syntax, the default is {default}.') = slice(1, 32),
):
super().__init__(range=range)
def process(self, data: bytearray):
score = 0
guess = None
bounds: slice = self.args.range
data = memoryview(data)
n = len(data)
start = bounds.start or 1
stop = min(bounds.stop or n, n)
if bounds.step is not None:
step = bounds.step
if bounds.start is None:
start *= step
else:
step = 1
self.log_debug(F'received input range [{bounds.start}:{bounds.stop}:{bounds.step}], using [{start}:{stop}:{step}]')
for _count in range(start, stop, step):
_guess = [Counter(data[j::_count]).most_common(1)[0] for j in range(_count)]
_score = sum(letter_count for _, letter_count in _guess) / n
# This scaling accounts for the smaller probability of larger keys. The scaling power is arbitrary and has
# been chosen as 5 based on in-the-wild examples. For illustration, consider that a key of length equal to
# the input can be chosen such that the input is annihilated entirely, so that would always yield the best
# score. However, we are looking for an annihilating sequence of relatively small length.
_score = _score * ((n - _count) / (n - 1)) ** 5
logmsg = F'got score {_score * 100:5.2f}% for length {_count}'
if _score > score:
self.log_info(logmsg)
score = _score
guess = bytearray(value for value, _ in _guess)
else:
self.log_debug(logmsg)
return guess
Classes
class xkey (range=slice(1, 32, None))
-
The unit expects encrypted input which was encrypted byte-wise with a polyalphabetic key, and where the plaintext also has one letter that occurs with overwhelming frequency. This is often the case for the zero byte in binary formats such as PE files, and the space character in text files. Based on this assumption, the unit computes the most likely key. This can be useful to decrypt PE and uncompressed text files that were encrypted byte-wise using a short key.
Expand source code Browse git
class xkey(Unit): """ The unit expects encrypted input which was encrypted byte-wise with a polyalphabetic key, and where the plaintext also has one letter that occurs with overwhelming frequency. This is often the case for the zero byte in binary formats such as PE files, and the space character in text files. Based on this assumption, the unit computes the most likely key. This can be useful to decrypt PE and uncompressed text files that were encrypted byte-wise using a short key. """ def __init__( self, range: Arg.Bounds(help='range of length values to try in Python slice syntax, the default is {default}.') = slice(1, 32), ): super().__init__(range=range) def process(self, data: bytearray): score = 0 guess = None bounds: slice = self.args.range data = memoryview(data) n = len(data) start = bounds.start or 1 stop = min(bounds.stop or n, n) if bounds.step is not None: step = bounds.step if bounds.start is None: start *= step else: step = 1 self.log_debug(F'received input range [{bounds.start}:{bounds.stop}:{bounds.step}], using [{start}:{stop}:{step}]') for _count in range(start, stop, step): _guess = [Counter(data[j::_count]).most_common(1)[0] for j in range(_count)] _score = sum(letter_count for _, letter_count in _guess) / n # This scaling accounts for the smaller probability of larger keys. The scaling power is arbitrary and has # been chosen as 5 based on in-the-wild examples. For illustration, consider that a key of length equal to # the input can be chosen such that the input is annihilated entirely, so that would always yield the best # score. However, we are looking for an annihilating sequence of relatively small length. _score = _score * ((n - _count) / (n - 1)) ** 5 logmsg = F'got score {_score * 100:5.2f}% for length {_count}' if _score > score: self.log_info(logmsg) score = _score guess = bytearray(value for value, _ in _guess) else: self.log_debug(logmsg) return guess
Ancestors
Subclasses
Class variables
var required_dependencies
var optional_dependencies
Inherited members