Module refinery.units.pattern.dnsdomain
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from refinery.units.pattern import PatternExtractorBase
def _lps(maxlen):
"""
Brute force regular expression pattern for a length prefixed domain name component.
"""
return BR'|'.join(BR'\x%02x[a-z0-9\-\_]{%d}' % (d, d) for d in range(1, maxlen + 1))
class dnsdomain(PatternExtractorBase):
"""
Extracts domain names in the format as they appear in DNS requests. This
can be used as a quick and dirty way to extract domains from PCAP files,
for example.
"""
_DOMAIN_CHARACTERS = (
B'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
B'abcdefghijklmnopqrstuvwxyz'
B'0123456789-_'
)
_DOMAIN_PATTERN = BR'(?:%s){1,20}(?:%s)\b' % (_lps(0xFF), _lps(25))
def process(self, data):
def transform(match):
match = bytearray(match[0])
pos = 0
while pos < len(match):
length = match[pos]
match[pos] = 0x2E
if len(match) < length + pos:
return None
if any(x not in self._DOMAIN_CHARACTERS for x in match[pos + 1 : pos + length]):
return None
pos += 1 + length
return match[1:]
yield from self.matches_filtered(memoryview(data), self._DOMAIN_PATTERN, transform)
Classes
class dnsdomain (min=1, max=None, len=None, stripspace=False, duplicates=False, longest=False, take=None)
-
Extracts domain names in the format as they appear in DNS requests. This can be used as a quick and dirty way to extract domains from PCAP files, for example.
Expand source code Browse git
class dnsdomain(PatternExtractorBase): """ Extracts domain names in the format as they appear in DNS requests. This can be used as a quick and dirty way to extract domains from PCAP files, for example. """ _DOMAIN_CHARACTERS = ( B'ABCDEFGHIJKLMNOPQRSTUVWXYZ' B'abcdefghijklmnopqrstuvwxyz' B'0123456789-_' ) _DOMAIN_PATTERN = BR'(?:%s){1,20}(?:%s)\b' % (_lps(0xFF), _lps(25)) def process(self, data): def transform(match): match = bytearray(match[0]) pos = 0 while pos < len(match): length = match[pos] match[pos] = 0x2E if len(match) < length + pos: return None if any(x not in self._DOMAIN_CHARACTERS for x in match[pos + 1 : pos + length]): return None pos += 1 + length return match[1:] yield from self.matches_filtered(memoryview(data), self._DOMAIN_PATTERN, transform)
Ancestors
Class variables
var required_dependencies
var optional_dependencies
Inherited members