Module refinery.lib.unrar.headers

Header structures, enums, and constants for RAR archive formats.

Expand source code Browse git
"""
Header structures, enums, and constants for RAR archive formats.
"""
from __future__ import annotations

import enum
import itertools
import struct

from dataclasses import dataclass
from datetime import datetime, timezone
from typing import NamedTuple

from refinery.lib.types import buf

RAR_HEADER_V15 = b'Rar!\x1a\a\00'   # 52 61 72 21 1A 07 00
RAR_HEADER_V50 = b'Rar!\x1a\a\01\0' # 52 61 72 21 1A 07 01 00
RAR_HEADER_V14 = b'RE~^'            # 52 45 7E 5E


class RarFormat(enum.IntEnum):
    RARFMT14 = 14
    RARFMT15 = 15
    RARFMT50 = 50


SIZEOF_MARKHEAD3 = 7
SIZEOF_MARKHEAD5 = 8
SIZEOF_MAINHEAD14 = 7
SIZEOF_FILEHEAD14 = 21
SIZEOF_SHORTBLOCKHEAD = 7


class HeaderType(enum.IntEnum):
    HEAD_MARK = 0x00
    HEAD_MAIN = 0x01
    HEAD_FILE = 0x02
    HEAD_SERVICE = 0x03
    HEAD_CRYPT = 0x04
    HEAD_ENDARC = 0x05
    HEAD_UNKNOWN = 0xFF
    HEAD3_MARK = 0x72
    HEAD3_MAIN = 0x73
    HEAD3_FILE = 0x74
    HEAD3_CMT = 0x75
    HEAD3_AV = 0x76
    HEAD3_OLDSERVICE = 0x77
    HEAD3_PROTECT = 0x78
    HEAD3_SIGN = 0x79
    HEAD3_SERVICE = 0x7A
    HEAD3_ENDARC = 0x7B


_HEAD3_TO_5 = {
    HeaderType.HEAD3_MAIN: HeaderType.HEAD_MAIN,
    HeaderType.HEAD3_FILE: HeaderType.HEAD_FILE,
    HeaderType.HEAD3_SERVICE: HeaderType.HEAD_SERVICE,
    HeaderType.HEAD3_ENDARC: HeaderType.HEAD_ENDARC,
}


class MHD(enum.IntEnum):
    VOLUME = 0x0001
    COMMENT = 0x0002
    LOCK = 0x0004
    SOLID = 0x0008
    PACK_COMMENT = 0x0010
    NEWNUMBERING = 0x0010
    AV = 0x0020
    PROTECT = 0x0040
    PASSWORD = 0x0080
    FIRSTVOLUME = 0x0100


class LHD(enum.IntEnum):
    SPLIT_BEFORE = 0x0001
    SPLIT_AFTER = 0x0002
    PASSWORD = 0x0004
    COMMENT = 0x0008
    SOLID = 0x0010
    WINDOWMASK = 0x00E0
    WINDOW64 = 0x0000
    WINDOW128 = 0x0020
    WINDOW256 = 0x0040
    WINDOW512 = 0x0060
    WINDOW1024 = 0x0080
    WINDOW2048 = 0x00A0
    WINDOW4096 = 0x00C0
    DIRECTORY = 0x00E0
    LARGE = 0x0100
    UNICODE = 0x0200
    SALT = 0x0400
    VERSION = 0x0800
    EXTTIME = 0x1000
    SKIP_IF_UNKNOWN = 0x4000
    LONG_BLOCK = 0x8000


class EARC(enum.IntEnum):
    NEXT_VOLUME = 0x0001
    DATACRC = 0x0002
    REVSPACE = 0x0004
    VOLNUMBER = 0x0008


class HFL(enum.IntEnum):
    EXTRA = 0x0001
    DATA = 0x0002
    SKIPIFUNKNOWN = 0x0004
    SPLITBEFORE = 0x0008
    SPLITAFTER = 0x0010
    CHILD = 0x0020
    INHERITED = 0x0040


class MHFL(enum.IntEnum):
    VOLUME = 0x0001
    VOLNUMBER = 0x0002
    SOLID = 0x0004
    PROTECT = 0x0008
    LOCK = 0x0010


class FHFL(enum.IntEnum):
    DIRECTORY = 0x0001
    UTIME = 0x0002
    CRC32 = 0x0004
    UNPUNKNOWN = 0x0008


class EHFL(enum.IntEnum):
    NEXTVOLUME = 0x0001


class CHFL(enum.IntEnum):
    CRYPT_PSWCHECK = 0x0001


class FCI(enum.IntEnum):
    ALGO_MASK = 0x003F
    SOLID = 0x0040
    METHOD_SHIFT = 7
    METHOD_MASK = 0x0380
    DICT_SHIFT = 10
    DICT_MASK = 0x3C00


FHEXTRA_CRYPT = 0x01
FHEXTRA_HASH = 0x02
FHEXTRA_HTIME = 0x03
FHEXTRA_REDIR = 0x05

FHEXTRA_HASH_BLAKE2 = 0x00

FHEXTRA_HTIME_UNIXTIME = 0x01
FHEXTRA_HTIME_MTIME = 0x02
FHEXTRA_HTIME_CTIME = 0x04
FHEXTRA_HTIME_ATIME = 0x08

FHEXTRA_CRYPT_PSWCHECK = 0x01
FHEXTRA_CRYPT_HASHMAC = 0x02


SIZE_SALT50 = 16
SIZE_SALT30 = 8
SIZE_INITV = 16
SIZE_PSWCHECK = 8
SIZE_PSWCHECK_CSUM = 4
CRYPT_BLOCK_SIZE = 16
CRYPT5_KDF_LG2_COUNT_MAX = 24


class CryptMethod(enum.IntEnum):
    CRYPT_NONE = 0
    CRYPT_RAR13 = 1
    CRYPT_RAR15 = 2
    CRYPT_RAR20 = 3
    CRYPT_RAR30 = 4
    CRYPT_RAR50 = 5


class HostSystem(enum.IntEnum):
    HOST5_WINDOWS = 0
    HOST5_UNIX = 1
    HOST_MSDOS = 0
    HOST_OS2 = 1
    HOST_WIN32 = 2
    HOST_UNIX = 3
    HOST_MACOS = 4
    HOST_BEOS = 5


class HostSystemType(enum.IntEnum):
    HSYS_WINDOWS = 0
    HSYS_UNIX = 1
    HSYS_UNKNOWN = 2


class FileSystemRedirect(enum.IntEnum):
    FSREDIR_NONE = 0
    FSREDIR_UNIXSYMLINK = 1
    FSREDIR_WINSYMLINK = 2
    FSREDIR_JUNCTION = 3
    FSREDIR_HARDLINK = 4
    FSREDIR_FILECOPY = 5


class HashType(enum.IntEnum):
    HASH_NONE = 0
    HASH_RAR14 = 1
    HASH_CRC32 = 2
    HASH_BLAKE2 = 3


BLAKE2_DIGEST_SIZE = 32


def dos_datetime(dostime: int) -> datetime | None:
    """
    Convert a DOS-format date/time to a Python datetime.
    """
    try:
        date_part = (dostime >> 16) & 0xFFFF
        time_part = dostime & 0xFFFF
        year = ((date_part >> 9) & 0x7F) + 1980
        month = (date_part >> 5) & 0x0F
        day = date_part & 0x1F
        hour = (time_part >> 11) & 0x1F
        minute = (time_part >> 5) & 0x3F
        second = (time_part & 0x1F) * 2
        return datetime(year, month or 1, day or 1, hour, minute, min(second, 59))
    except (ValueError, OverflowError):
        return None


def decode_rar4_filename(name_bytes: bytes, enc_data: bytes) -> str:
    """
    Decode a RAR 4.x Unicode filename from the encoded representation.
    The name_bytes is the ASCII portion; enc_data contains the encoding flags
    and high bytes. Returns the decoded Unicode filename.
    """
    if not enc_data:
        return name_bytes.decode('latin-1')

    enc_pos = 0
    dec_pos = 0
    name_size = len(name_bytes)
    result = []

    high_byte = enc_data[enc_pos] if enc_pos < len(enc_data) else 0
    enc_pos += 1
    flags = 0
    flag_bits = 0

    while enc_pos < len(enc_data):
        if flag_bits == 0:
            if enc_pos >= len(enc_data):
                break
            flags = enc_data[enc_pos]
            enc_pos += 1
            flag_bits = 8

        switch = flags >> 6
        if switch == 0:
            if enc_pos >= len(enc_data):
                break
            result.append(chr(enc_data[enc_pos]))
            enc_pos += 1
            dec_pos += 1
        elif switch == 1:
            if enc_pos >= len(enc_data):
                break
            result.append(chr(enc_data[enc_pos] + (high_byte << 8)))
            enc_pos += 1
            dec_pos += 1
        elif switch == 2:
            if enc_pos + 1 >= len(enc_data):
                break
            result.append(chr(enc_data[enc_pos] + (enc_data[enc_pos + 1] << 8)))
            enc_pos += 2
            dec_pos += 1
        elif switch == 3:
            if enc_pos >= len(enc_data):
                break
            length = enc_data[enc_pos]
            enc_pos += 1
            if length & 0x80:
                if enc_pos >= len(enc_data):
                    break
                correction = enc_data[enc_pos]
                enc_pos += 1
                for _ in range((length & 0x7F) + 2):
                    if dec_pos >= name_size:
                        break
                    result.append(chr(((name_bytes[dec_pos] + correction) & 0xFF) + (high_byte << 8)))
                    dec_pos += 1
            else:
                for _ in range(length + 2):
                    if dec_pos >= name_size:
                        break
                    result.append(chr(name_bytes[dec_pos]))
                    dec_pos += 1

        flags = (flags << 2) & 0xFF
        flag_bits -= 2

    return ''.join(result)


class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0


class RarEndArchiveHeader(NamedTuple):
    next_volume: bool
    data_crc: int | None
    vol_number: int | None


@dataclass(repr=False)
class RarFileEntry:
    """
    Metadata for a single file or service entry in a RAR archive.
    """
    name: str = ''
    size: int = 0
    packed_size: int = 0
    date: datetime | None = None
    ctime: datetime | None = None
    atime: datetime | None = None
    method: int = 0
    is_dir: bool = False
    is_encrypted: bool = False
    crc32: int = 0
    hash_type: int = HashType.HASH_NONE
    hash_digest: buf = b''
    host_os: int = 0
    hs_type: int = HostSystemType.HSYS_UNKNOWN
    unp_ver: int = 0
    win_size: int = 0
    solid: bool = False
    split_before: bool = False
    split_after: bool = False
    crypt_method: int = CryptMethod.CRYPT_NONE
    salt: buf = b''
    init_v: buf = b''
    lg2_count: int = 0
    psw_check: buf = b''
    use_psw_check: bool = False
    hash_key: buf = b''
    use_hash_key: bool = False
    redir_type: int = FileSystemRedirect.FSREDIR_NONE
    redir_name: str = ''
    is_service: bool = False
    header_type: int = HeaderType.HEAD_FILE
    header_flags: int = 0
    file_flags: int = 0
    _volume_index: int = 0
    _data_offset: int = 0
    _data_size: int = 0
    unknown_unp_size: bool = False

    def __repr__(self):
        kind = 'dir' if self.is_dir else 'file'
        enc = ' [encrypted]' if self.is_encrypted else ''
        return F'<RarFileEntry:{kind}:{self.name}{enc}>'


class RarCryptHeader(NamedTuple):
    lg2_count: int
    salt: buf
    use_psw_check: bool
    psw_check: buf
    header_iv: buf = b''


class RawHeaderReader:
    """
    Simple binary reader for header data.
    """

    def __init__(self, data: bytes | bytearray | memoryview):
        self.data = memoryview(data)
        self.pos = 0

    def get1(self) -> int:
        if self.pos < len(self.data):
            v = self.data[self.pos]
            self.pos += 1
            return v
        return 0

    def get2(self) -> int:
        if self.pos + 1 < len(self.data):
            v = self.data[self.pos] | (self.data[self.pos + 1] << 8)
            self.pos += 2
            return v
        return 0

    def get4(self) -> int:
        if self.pos + 3 < len(self.data):
            v, = struct.unpack_from('<I', self.data, self.pos)
            self.pos += 4
            return v
        return 0

    def get8(self) -> int:
        if self.pos + 7 < len(self.data):
            v, = struct.unpack_from('<Q', self.data, self.pos)
            self.pos += 8
            return v
        return 0

    def getv(self) -> int:
        """
        Read a RAR5 variable-length integer.
        """
        result = 0
        shift = 0
        while self.pos < len(self.data) and shift < 64:
            b = self.data[self.pos]
            self.pos += 1
            result += (b & 0x7F) << shift
            if not (b & 0x80):
                return result
            shift += 7
        return 0

    def getv_size(self, pos: int | None = None) -> int:
        """
        Return byte count of vint at given position.
        """
        p = pos if pos is not None else self.pos
        for i in range(p, len(self.data)):
            if not (self.data[i] & 0x80):
                return i - p + 1
        return 0

    def getb(self, size: int) -> memoryview:
        end = min(self.pos + size, len(self.data))
        result = self.data[self.pos:end]
        self.pos = end
        return result

    def remaining(self) -> int:
        return max(0, len(self.data) - self.pos)

    def set_pos(self, pos: int):
        self.pos = pos

    def get_pos(self) -> int:
        return self.pos


def _detect_crypt_method_15(unp_ver: int) -> int:
    if unp_ver == 13:
        return CryptMethod.CRYPT_RAR13
    elif unp_ver == 15:
        return CryptMethod.CRYPT_RAR15
    elif unp_ver in (20, 26):
        return CryptMethod.CRYPT_RAR20
    else:
        return CryptMethod.CRYPT_RAR30


def parse_header14_main(data: bytes | memoryview) -> RarMainHeader:
    """
    Parse a RAR 1.4 main archive header from the full 7-byte header
    (including the 4-byte magic RE~^).
    """
    if len(data) < SIZEOF_MAINHEAD14:
        return RarMainHeader()
    flags = data[6]
    return RarMainHeader(
        flags=flags,
        is_volume=bool(flags & 0x01),
        is_solid=bool(flags & 0x08),
        is_locked=bool(flags & 0x04),
        comment_in_header=bool(flags & 0x02),
    )


def parse_header14_file(data: bytes | memoryview, pos: int) -> tuple[RarFileEntry, int] | None:
    """
    Parse a RAR 1.4 file header at the given position.
    Returns (entry, next_pos) or None if data exhausted.

    RAR 1.4 file header layout (21 bytes + name):
      4 bytes LE: DataSize (packed size)
      4 bytes LE: UnpSize
      2 bytes LE: CRC16
      2 bytes LE: HeadSize
      4 bytes LE: FileTime (DOS format)
      1 byte:    FileAttr
      1 byte:    Flags (bit 2 = encrypted)
      1 byte:    UnpVer raw (2 → version 13, else version 10)
      1 byte:    NameSize
      1 byte:    Method
      N bytes:   FileName
    """
    if pos + SIZEOF_FILEHEAD14 > len(data):
        return None

    data_size, = struct.unpack_from('<I', data, pos)
    unp_size, = struct.unpack_from('<I', data, pos + 4)
    file_crc, = struct.unpack_from('<H', data, pos + 8)
    file_time, = struct.unpack_from('<I', data, pos + 12)
    file_attr = data[pos + 16]
    flags = data[pos + 17]
    name_size = data[pos + 19]
    method = data[pos + 20]

    if pos + SIZEOF_FILEHEAD14 + name_size > len(data):
        return None

    name_bytes = bytes(data[pos + SIZEOF_FILEHEAD14:pos + SIZEOF_FILEHEAD14 + name_size])
    name = name_bytes.rstrip(b'\x00').decode('latin-1')

    encrypted = bool(flags & 0x04)

    hd = RarFileEntry()
    hd.header_type = HeaderType.HEAD_FILE
    hd.name = name
    hd.size = unp_size
    hd.packed_size = data_size
    hd.date = dos_datetime(file_time)
    hd.method = method
    hd.is_encrypted = encrypted
    hd.crypt_method = CryptMethod.CRYPT_RAR13 if encrypted else CryptMethod.CRYPT_NONE
    hd.hash_type = HashType.HASH_RAR14
    hd.crc32 = file_crc
    hd.unp_ver = 15
    hd.win_size = 0x10000
    hd.file_flags = file_attr
    hd.is_dir = bool(file_attr & 0x10)

    actual_head_size = SIZEOF_FILEHEAD14 + name_size
    hd._data_offset = pos + actual_head_size
    hd._data_size = data_size

    next_pos = pos + actual_head_size + data_size
    return hd, next_pos


def parse_header15(raw: RawHeaderReader) -> tuple:
    """
    Parse a RAR 1.5-4.x header block from the given RawHeaderReader.
    Returns (header_type, header_size, flags, parsed_object, next_block_delta).
    parsed_object is RarMainHeader, RarFileEntry, RarEndArchiveHeader, or None.
    """
    raw.get2()
    header_type_raw = raw.get1()
    flags = raw.get2()
    head_size = raw.get2()

    if head_size < SIZEOF_SHORTBLOCKHEAD:
        return HeaderType.HEAD_UNKNOWN, head_size, flags, None, head_size

    header_type = HeaderType(header_type_raw)
    header_type = _HEAD3_TO_5.get(header_type, header_type)

    if header_type == HeaderType.HEAD_MAIN:
        raw.get2()
        raw.get4()
        is_volume = bool(flags & MHD.VOLUME)
        is_solid = bool(flags & MHD.SOLID)
        is_locked = bool(flags & MHD.LOCK)
        is_protected = bool(flags & MHD.PROTECT)
        is_encrypted = bool(flags & MHD.PASSWORD)
        first_volume = bool(flags & MHD.FIRSTVOLUME)
        new_numbering = bool(flags & MHD.NEWNUMBERING)
        comment_in_header = bool(flags & MHD.COMMENT)

        mh = RarMainHeader(
            flags=flags,
            is_volume=is_volume,
            is_solid=is_solid,
            is_locked=is_locked,
            is_protected=is_protected,
            is_encrypted=is_encrypted,
            first_volume=first_volume,
            new_numbering=new_numbering,
            comment_in_header=comment_in_header,
            vol_number=0,
        )
        return header_type, head_size, flags, mh, head_size

    elif header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
        hd = RarFileEntry()
        hd.header_type = header_type
        hd.header_flags = flags
        hd.is_service = (header_type == HeaderType.HEAD_SERVICE)

        hd.split_before = bool(flags & LHD.SPLIT_BEFORE)
        hd.split_after = bool(flags & LHD.SPLIT_AFTER)
        hd.is_encrypted = bool(flags & LHD.PASSWORD)
        hd.solid = (not hd.is_service) and bool(flags & LHD.SOLID)
        hd.is_dir = (flags & LHD.WINDOWMASK) == LHD.DIRECTORY
        hd.win_size = 0 if hd.is_dir else 0x10000 << ((flags & LHD.WINDOWMASK) >> 5)

        data_size = raw.get4()
        low_unp_size = raw.get4()
        hd.host_os = raw.get1()
        hd.hash_type = HashType.HASH_CRC32
        hd.crc32 = raw.get4()
        file_time = raw.get4()
        hd.unp_ver = raw.get1()
        hd.method = raw.get1() - 0x30

        if hd.unp_ver < 20 and (raw.data[raw.pos - 1] if raw.pos > 0 else 0):
            pass  # handled below

        name_size = raw.get2()
        hd.file_flags = raw.get4()

        if hd.is_encrypted:
            hd.crypt_method = _detect_crypt_method_15(hd.unp_ver)

        if hd.host_os in (HostSystem.HOST_UNIX, HostSystem.HOST_BEOS):
            hd.hs_type = HostSystemType.HSYS_UNIX
        elif hd.host_os <= HostSystem.HOST_BEOS:
            hd.hs_type = HostSystemType.HSYS_WINDOWS

        if hd.host_os == HostSystem.HOST_UNIX and (hd.file_flags & 0xF000) == 0xA000:
            hd.redir_type = FileSystemRedirect.FSREDIR_UNIXSYMLINK

        if hd.unp_ver < 20 and (hd.file_flags & 0x10):
            hd.is_dir = True

        large_file = bool(flags & LHD.LARGE)
        if large_file:
            high_pack = raw.get4()
            high_unp = raw.get4()
            hd.unknown_unp_size = (low_unp_size == 0xFFFFFFFF and high_unp == 0xFFFFFFFF)
        else:
            high_pack = 0
            high_unp = 0
            hd.unknown_unp_size = (low_unp_size == 0xFFFFFFFF)

        hd.packed_size = (high_pack << 32) | data_size
        hd.size = (high_unp << 32) | low_unp_size

        read_name_size = min(name_size, raw.remaining())
        name_bytes = bytes(raw.getb(read_name_size))

        if not hd.is_service:
            if flags & LHD.UNICODE:
                null_pos = name_bytes.find(b'\x00')
                if null_pos >= 0 and null_pos + 1 < len(name_bytes):
                    ascii_part = name_bytes[:null_pos]
                    enc_part = name_bytes[null_pos + 1:]
                    hd.name = decode_rar4_filename(ascii_part, enc_part)
                else:
                    hd.name = name_bytes.split(b'\x00', 1)[0].decode('latin-1')
            else:
                hd.name = name_bytes.rstrip(b'\x00').decode('latin-1')
        else:
            hd.name = name_bytes.rstrip(b'\x00').decode('latin-1', errors='replace')

        if flags & LHD.SALT:
            hd.salt = raw.getb(SIZE_SALT30)

        hd.date = dos_datetime(file_time)

        if flags & LHD.EXTTIME:
            _parse_ext_time(raw, hd, file_time)

        next_delta = head_size + hd.packed_size
        return header_type, head_size, flags, hd, next_delta

    elif header_type == HeaderType.HEAD_ENDARC:
        next_vol = bool(flags & EARC.NEXT_VOLUME)
        data_crc_present = bool(flags & EARC.DATACRC)
        vol_number_present = bool(flags & EARC.VOLNUMBER)
        data_crc = raw.get4() if data_crc_present else None
        vol_number = raw.get2() if vol_number_present else None
        eh = RarEndArchiveHeader(
            next_volume=next_vol,
            data_crc=data_crc,
            vol_number=vol_number,
        )
        return header_type, head_size, flags, eh, head_size

    else:
        next_delta = head_size
        if flags & LHD.LONG_BLOCK:
            next_delta += raw.get4()
        return header_type, head_size, flags, None, next_delta


def _parse_ext_time(raw: RawHeaderReader, hd: RarFileEntry, file_time: int):
    """
    Parse RAR 1.5-4.x extended time fields.
    """
    if raw.remaining() < 2:
        return
    ext_flags = raw.get2()
    for i in range(4):
        rmode = (ext_flags >> ((3 - i) * 4)) & 0xF
        if not (rmode & 8):
            continue
        if i == 0:
            base_time = file_time
        else:
            base_time = raw.get4() if raw.remaining() >= 4 else 0
        dt = dos_datetime(base_time)
        if dt is None:
            continue
        count = rmode & 3
        reminder = 0
        for j in range(count):
            b = raw.get1()
            reminder |= b << ((j + 3 - count) * 8)

        if i == 0:
            hd.date = dt
        elif i == 1:
            hd.ctime = dt
        elif i == 2:
            hd.atime = dt


def parse_header50(data: bytes | memoryview, offset: int = 0) -> tuple:
    """
    Parse a RAR 5.0 header block.
    Returns (header_type, header_total_size, parsed_object, next_block_delta).
    """
    raw = RawHeaderReader(data)

    raw.get4()
    size_bytes = raw.getv_size(4)
    block_size = raw.getv()

    if block_size == 0 or size_bytes == 0:
        return HeaderType.HEAD_UNKNOWN, 0, None, 0

    header_size = 4 + size_bytes + block_size

    header_type = HeaderType(raw.getv())
    block_flags = raw.getv()

    extra_size = 0
    if block_flags & HFL.EXTRA:
        extra_size = raw.getv()

    data_size = 0
    if block_flags & HFL.DATA:
        data_size = raw.getv()

    next_delta = header_size + data_size

    if header_type == HeaderType.HEAD_CRYPT:
        raw.getv()
        enc_flags = raw.getv()
        use_psw_check = bool(enc_flags & CHFL.CRYPT_PSWCHECK)
        lg2_count = raw.get1()
        salt = raw.getb(SIZE_SALT50)
        psw_check = b''
        if use_psw_check:
            psw_check = raw.getb(SIZE_PSWCHECK)
            csum = raw.getb(SIZE_PSWCHECK_CSUM)
            import hashlib
            digest = hashlib.sha256(psw_check).digest()
            if csum != digest[:SIZE_PSWCHECK_CSUM]:
                use_psw_check = False
        ch = RarCryptHeader(
            lg2_count=lg2_count,
            salt=salt,
            use_psw_check=use_psw_check,
            psw_check=psw_check,
        )
        return header_type, header_size, ch, next_delta

    elif header_type == HeaderType.HEAD_MAIN:
        arc_flags = raw.getv()
        is_volume = bool(arc_flags & MHFL.VOLUME)
        is_solid = bool(arc_flags & MHFL.SOLID)
        is_locked = bool(arc_flags & MHFL.LOCK)
        is_protected = bool(arc_flags & MHFL.PROTECT)
        vol_number = 0
        if arc_flags & MHFL.VOLNUMBER:
            vol_number = raw.getv()
        first_volume = is_volume and vol_number == 0

        mh = RarMainHeader(
            flags=arc_flags,
            is_volume=is_volume,
            is_solid=is_solid,
            is_locked=is_locked,
            is_protected=is_protected,
            is_encrypted=False,
            first_volume=first_volume,
            new_numbering=True,
            comment_in_header=False,
            vol_number=vol_number,
        )
        return header_type, header_size, mh, next_delta

    elif header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
        hd = RarFileEntry()
        hd.header_type = header_type
        hd.header_flags = block_flags
        hd.is_service = (header_type == HeaderType.HEAD_SERVICE)

        hd.packed_size = data_size
        hd.file_flags = raw.getv()
        hd.size = raw.getv()
        hd.unknown_unp_size = bool(hd.file_flags & FHFL.UNPUNKNOWN)

        raw.getv()
        hd.is_dir = bool(hd.file_flags & FHFL.DIRECTORY)

        if hd.file_flags & FHFL.UTIME:
            unix_time = raw.get4()
            try:
                hd.date = datetime.fromtimestamp(unix_time, tz=timezone.utc)
            except (OSError, OverflowError, ValueError):
                pass

        if hd.file_flags & FHFL.CRC32:
            hd.hash_type = HashType.HASH_CRC32
            hd.crc32 = raw.get4()

        comp_info = raw.getv()
        hd.method = (comp_info >> FCI.METHOD_SHIFT) & 7
        hd.unp_ver = (comp_info & FCI.ALGO_MASK) + 50
        hd.solid = bool(comp_info & FCI.SOLID)
        hd.win_size = 0 if hd.is_dir else 0x20000 << ((comp_info >> FCI.DICT_SHIFT) & 0xF)

        hd.host_os = raw.getv()
        if hd.host_os == HostSystem.HOST5_UNIX:
            hd.hs_type = HostSystemType.HSYS_UNIX
        elif hd.host_os == HostSystem.HOST5_WINDOWS:
            hd.hs_type = HostSystemType.HSYS_WINDOWS

        name_size = raw.getv()
        name_bytes = bytes(raw.getb(name_size))
        hd.name = name_bytes.rstrip(b'\x00').decode('utf-8', errors='replace')

        hd.split_before = bool(block_flags & HFL.SPLITBEFORE)
        hd.split_after = bool(block_flags & HFL.SPLITAFTER)

        if extra_size > 0:
            _parse_extra50(raw, hd, extra_size, header_size)

        return hd.header_type, header_size, hd, next_delta

    elif header_type == HeaderType.HEAD_ENDARC:
        arc_flags = raw.getv()
        eh = RarEndArchiveHeader(
            next_volume=bool(arc_flags & EHFL.NEXTVOLUME),
            data_crc=None,
            vol_number=None,
        )
        return header_type, header_size, eh, next_delta

    return header_type, header_size, None, next_delta


def _parse_extra50(
    raw: RawHeaderReader,
    hd: RarFileEntry,
    extra_size: int,
    header_size: int,
):
    """
    Parse RAR5 extra fields for a file/service header.
    """
    extra_start = header_size - extra_size
    if extra_start < raw.get_pos():
        return
    raw.set_pos(extra_start)

    while raw.remaining() >= 2:
        field_size = raw.getv()
        if field_size <= 0 or raw.remaining() == 0 or field_size > raw.remaining():
            break
        next_pos = raw.get_pos() + field_size
        field_type = raw.getv()

        if field_type == FHEXTRA_CRYPT:
            enc_version = raw.getv()
            if enc_version <= 0:  # CRYPT_VERSION = 0
                enc_flags = raw.getv()
                hd.use_psw_check = bool(enc_flags & FHEXTRA_CRYPT_PSWCHECK)
                hd.use_hash_key = bool(enc_flags & FHEXTRA_CRYPT_HASHMAC)
                hd.lg2_count = raw.get1()
                hd.salt = raw.getb(SIZE_SALT50)
                hd.init_v = raw.getb(SIZE_INITV)
                if hd.use_psw_check:
                    hd.psw_check = raw.getb(SIZE_PSWCHECK)
                    csum = raw.getb(SIZE_PSWCHECK_CSUM)
                    import hashlib
                    digest = hashlib.sha256(hd.psw_check).digest()
                    hd.use_psw_check = (csum == digest[:SIZE_PSWCHECK_CSUM])
                    if hd.is_service and hd.psw_check == b'\x00' * SIZE_PSWCHECK:
                        hd.use_psw_check = False
                hd.crypt_method = CryptMethod.CRYPT_RAR50
                hd.is_encrypted = True

        elif field_type == FHEXTRA_HASH:
            hash_type = raw.getv()
            if hash_type == FHEXTRA_HASH_BLAKE2:
                hd.hash_type = HashType.HASH_BLAKE2
                hd.hash_digest = raw.getb(BLAKE2_DIGEST_SIZE)

        elif field_type == FHEXTRA_HTIME:
            time_flags = raw.getv()
            is_unix = bool(time_flags & FHEXTRA_HTIME_UNIXTIME)
            if time_flags & FHEXTRA_HTIME_MTIME:
                if is_unix:
                    ts = raw.get4()
                    try:
                        hd.date = datetime.fromtimestamp(ts, tz=timezone.utc)
                    except (OSError, OverflowError, ValueError):
                        pass
                else:
                    raw.get8()
            if time_flags & FHEXTRA_HTIME_CTIME:
                if is_unix:
                    ts = raw.get4()
                    try:
                        hd.ctime = datetime.fromtimestamp(ts, tz=timezone.utc)
                    except (OSError, OverflowError, ValueError):
                        pass
                else:
                    raw.get8()
            if time_flags & FHEXTRA_HTIME_ATIME:
                if is_unix:
                    ts = raw.get4()
                    try:
                        hd.atime = datetime.fromtimestamp(ts, tz=timezone.utc)
                    except (OSError, OverflowError, ValueError):
                        pass
                else:
                    raw.get8()

        elif field_type == FHEXTRA_REDIR:
            hd.redir_type = raw.getv()
            raw.getv()
            redir_name_size = raw.getv()
            redir_name = bytes(raw.getb(redir_name_size))
            hd.redir_name = redir_name.decode('utf-8', errors='replace')

        raw.set_pos(next_pos)


def parse_headers(
    data: bytes | memoryview,
    fmt: RarFormat,
    password: str | None = None,
) -> tuple[
    RarMainHeader | None,
    list[RarFileEntry],
    RarEndArchiveHeader | None,
    RarCryptHeader | None,
]:
    """
    Parse all headers from a RAR volume.
    Returns (main_header, file_entries, end_header, crypt_header).
    """
    pos = 0
    main_header = None
    entries: list[RarFileEntry] = []
    end_header = None
    crypt_header = None
    encrypted = False
    _hdr_key = None

    view = memoryview(data)

    if fmt == RarFormat.RARFMT50:
        pos = SIZEOF_MARKHEAD5
    elif fmt == RarFormat.RARFMT15:
        pos = SIZEOF_MARKHEAD3
    elif fmt == RarFormat.RARFMT14:
        pos = 0

    if fmt == RarFormat.RARFMT14:
        main_header = parse_header14_main(view)
        pos = SIZEOF_MAINHEAD14
        while pos < len(data):
            result = parse_header14_file(view, pos)
            if result is None:
                break
            entry, next_pos = result
            entry._volume_index = 0
            entries.append(entry)
            if next_pos <= pos:
                break
            pos = next_pos
        return main_header, entries, end_header, crypt_header

    while pos < len(data):
        remaining = view[pos:]
        if len(remaining) < SIZEOF_SHORTBLOCKHEAD:
            break

        if encrypted:
            if crypt_header is not None:
                if password is None or fmt != RarFormat.RARFMT50:
                    break
                if _hdr_key is None:
                    from refinery.lib.unrar.crypt import rar5_pbkdf2, rar5_psw_check
                    _hdr_key, _, psw_check_value = rar5_pbkdf2(
                        password, crypt_header.salt, crypt_header.lg2_count)
                    if crypt_header.use_psw_check:
                        computed = rar5_psw_check(psw_check_value)
                        if computed != crypt_header.psw_check:
                            from refinery.lib.unrar import RarInvalidPassword
                            raise RarInvalidPassword
                if len(remaining) < SIZE_INITV:
                    break
                iv = remaining[:SIZE_INITV]
                enc_data = remaining[SIZE_INITV:]
                if not enc_data:
                    break
                if pad := -len(enc_data) % CRYPT_BLOCK_SIZE:
                    enc_data = bytearray(enc_data)
                    enc_data.extend(itertools.repeat(0, pad))
                from Cryptodome.Cipher import AES
                cipher = AES.new(_hdr_key, AES.MODE_CBC, iv=iv)
                dec_data = cipher.decrypt(enc_data)
                header_type, header_size, parsed, next_delta = parse_header50(dec_data)
                if header_size == 0:
                    break
                data_size = next_delta - header_size
                enc_header_size = header_size
                if enc_header_size % CRYPT_BLOCK_SIZE:
                    enc_header_size += CRYPT_BLOCK_SIZE - enc_header_size % CRYPT_BLOCK_SIZE
                abs_data_offset = pos + SIZE_INITV + enc_header_size

                if header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
                    if isinstance(parsed, RarFileEntry):
                        parsed._volume_index = 0
                        parsed._data_offset = abs_data_offset
                        parsed._data_size = data_size
                        entries.append(parsed)
                elif header_type == HeaderType.HEAD_MAIN:
                    if isinstance(parsed, RarMainHeader):
                        main_header = parsed
                elif header_type == HeaderType.HEAD_ENDARC:
                    if isinstance(parsed, RarEndArchiveHeader):
                        end_header = parsed
                    break

                pos = abs_data_offset + data_size
                continue
            elif fmt == RarFormat.RARFMT15:
                if password is None:
                    break
                if len(remaining) < SIZE_SALT30 + CRYPT_BLOCK_SIZE:
                    break
                salt = remaining[:SIZE_SALT30]
                from refinery.lib.unrar.crypt import rar3_kdf
                key, iv = rar3_kdf(password, salt)
                enc_data = remaining[SIZE_SALT30:]
                if pad := -len(enc_data) % CRYPT_BLOCK_SIZE:
                    enc_data = bytearray(enc_data)
                    enc_data.extend(itertools.repeat(0, pad))
                from Cryptodome.Cipher import AES
                cipher = AES.new(key, AES.MODE_CBC, iv=iv)
                dec_data = cipher.decrypt(enc_data)
                raw = RawHeaderReader(dec_data)
                header_type, header_size, flags, parsed, next_delta = parse_header15(raw)
                if header_size == 0:
                    break
                enc_header_size = header_size
                if enc_header_size % CRYPT_BLOCK_SIZE:
                    enc_header_size += CRYPT_BLOCK_SIZE - enc_header_size % CRYPT_BLOCK_SIZE
                abs_data_offset = pos + SIZE_SALT30 + enc_header_size
                data_size = next_delta - header_size

                if header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
                    if isinstance(parsed, RarFileEntry):
                        parsed._volume_index = 0
                        parsed._data_offset = abs_data_offset
                        parsed._data_size = data_size
                        entries.append(parsed)
                elif header_type == HeaderType.HEAD_MAIN:
                    if isinstance(parsed, RarMainHeader):
                        main_header = parsed
                elif header_type == HeaderType.HEAD_ENDARC:
                    if isinstance(parsed, RarEndArchiveHeader):
                        end_header = parsed
                    break

                pos = abs_data_offset + data_size
                continue
            else:
                break

        if fmt == RarFormat.RARFMT50:
            header_type, header_size, parsed, next_delta = parse_header50(remaining)
        elif fmt == RarFormat.RARFMT15:
            raw = RawHeaderReader(remaining)
            header_type, header_size, flags, parsed, next_delta = parse_header15(raw)
        else:
            break

        if header_size == 0:
            break

        if header_type == HeaderType.HEAD_CRYPT:
            if isinstance(parsed, RarCryptHeader):
                iv_start = pos + next_delta
                iv_end = iv_start + SIZE_INITV
                header_iv = view[iv_start:iv_end] if iv_end <= len(view) else b''
                crypt_header = parsed._replace(header_iv=header_iv)
                encrypted = True

        elif header_type == HeaderType.HEAD_MAIN:
            if isinstance(parsed, RarMainHeader):
                main_header = parsed
                if main_header.is_encrypted:
                    encrypted = True

        elif header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
            if isinstance(parsed, RarFileEntry):
                parsed._volume_index = 0
                parsed._data_offset = pos + header_size
                parsed._data_size = parsed.packed_size
                entries.append(parsed)

        elif header_type == HeaderType.HEAD_ENDARC:
            if isinstance(parsed, RarEndArchiveHeader):
                end_header = parsed
            break

        if next_delta <= 0:
            break
        pos += next_delta

    return main_header, entries, end_header, crypt_header

Functions

def dos_datetime(dostime)

Convert a DOS-format date/time to a Python datetime.

Expand source code Browse git
def dos_datetime(dostime: int) -> datetime | None:
    """
    Convert a DOS-format date/time to a Python datetime.
    """
    try:
        date_part = (dostime >> 16) & 0xFFFF
        time_part = dostime & 0xFFFF
        year = ((date_part >> 9) & 0x7F) + 1980
        month = (date_part >> 5) & 0x0F
        day = date_part & 0x1F
        hour = (time_part >> 11) & 0x1F
        minute = (time_part >> 5) & 0x3F
        second = (time_part & 0x1F) * 2
        return datetime(year, month or 1, day or 1, hour, minute, min(second, 59))
    except (ValueError, OverflowError):
        return None
def decode_rar4_filename(name_bytes, enc_data)

Decode a RAR 4.x Unicode filename from the encoded representation. The name_bytes is the ASCII portion; enc_data contains the encoding flags and high bytes. Returns the decoded Unicode filename.

Expand source code Browse git
def decode_rar4_filename(name_bytes: bytes, enc_data: bytes) -> str:
    """
    Decode a RAR 4.x Unicode filename from the encoded representation.
    The name_bytes is the ASCII portion; enc_data contains the encoding flags
    and high bytes. Returns the decoded Unicode filename.
    """
    if not enc_data:
        return name_bytes.decode('latin-1')

    enc_pos = 0
    dec_pos = 0
    name_size = len(name_bytes)
    result = []

    high_byte = enc_data[enc_pos] if enc_pos < len(enc_data) else 0
    enc_pos += 1
    flags = 0
    flag_bits = 0

    while enc_pos < len(enc_data):
        if flag_bits == 0:
            if enc_pos >= len(enc_data):
                break
            flags = enc_data[enc_pos]
            enc_pos += 1
            flag_bits = 8

        switch = flags >> 6
        if switch == 0:
            if enc_pos >= len(enc_data):
                break
            result.append(chr(enc_data[enc_pos]))
            enc_pos += 1
            dec_pos += 1
        elif switch == 1:
            if enc_pos >= len(enc_data):
                break
            result.append(chr(enc_data[enc_pos] + (high_byte << 8)))
            enc_pos += 1
            dec_pos += 1
        elif switch == 2:
            if enc_pos + 1 >= len(enc_data):
                break
            result.append(chr(enc_data[enc_pos] + (enc_data[enc_pos + 1] << 8)))
            enc_pos += 2
            dec_pos += 1
        elif switch == 3:
            if enc_pos >= len(enc_data):
                break
            length = enc_data[enc_pos]
            enc_pos += 1
            if length & 0x80:
                if enc_pos >= len(enc_data):
                    break
                correction = enc_data[enc_pos]
                enc_pos += 1
                for _ in range((length & 0x7F) + 2):
                    if dec_pos >= name_size:
                        break
                    result.append(chr(((name_bytes[dec_pos] + correction) & 0xFF) + (high_byte << 8)))
                    dec_pos += 1
            else:
                for _ in range(length + 2):
                    if dec_pos >= name_size:
                        break
                    result.append(chr(name_bytes[dec_pos]))
                    dec_pos += 1

        flags = (flags << 2) & 0xFF
        flag_bits -= 2

    return ''.join(result)
def parse_header14_main(data)

Parse a RAR 1.4 main archive header from the full 7-byte header (including the 4-byte magic RE~^).

Expand source code Browse git
def parse_header14_main(data: bytes | memoryview) -> RarMainHeader:
    """
    Parse a RAR 1.4 main archive header from the full 7-byte header
    (including the 4-byte magic RE~^).
    """
    if len(data) < SIZEOF_MAINHEAD14:
        return RarMainHeader()
    flags = data[6]
    return RarMainHeader(
        flags=flags,
        is_volume=bool(flags & 0x01),
        is_solid=bool(flags & 0x08),
        is_locked=bool(flags & 0x04),
        comment_in_header=bool(flags & 0x02),
    )
def parse_header14_file(data, pos)

Parse a RAR 1.4 file header at the given position. Returns (entry, next_pos) or None if data exhausted.

RAR 1.4 file header layout (21 bytes + name): 4 bytes LE: DataSize (packed size) 4 bytes LE: UnpSize 2 bytes LE: CRC16 2 bytes LE: HeadSize 4 bytes LE: FileTime (DOS format) 1 byte: FileAttr 1 byte: Flags (bit 2 = encrypted) 1 byte: UnpVer raw (2 → version 13, else version 10) 1 byte: NameSize 1 byte: Method N bytes: FileName

Expand source code Browse git
def parse_header14_file(data: bytes | memoryview, pos: int) -> tuple[RarFileEntry, int] | None:
    """
    Parse a RAR 1.4 file header at the given position.
    Returns (entry, next_pos) or None if data exhausted.

    RAR 1.4 file header layout (21 bytes + name):
      4 bytes LE: DataSize (packed size)
      4 bytes LE: UnpSize
      2 bytes LE: CRC16
      2 bytes LE: HeadSize
      4 bytes LE: FileTime (DOS format)
      1 byte:    FileAttr
      1 byte:    Flags (bit 2 = encrypted)
      1 byte:    UnpVer raw (2 → version 13, else version 10)
      1 byte:    NameSize
      1 byte:    Method
      N bytes:   FileName
    """
    if pos + SIZEOF_FILEHEAD14 > len(data):
        return None

    data_size, = struct.unpack_from('<I', data, pos)
    unp_size, = struct.unpack_from('<I', data, pos + 4)
    file_crc, = struct.unpack_from('<H', data, pos + 8)
    file_time, = struct.unpack_from('<I', data, pos + 12)
    file_attr = data[pos + 16]
    flags = data[pos + 17]
    name_size = data[pos + 19]
    method = data[pos + 20]

    if pos + SIZEOF_FILEHEAD14 + name_size > len(data):
        return None

    name_bytes = bytes(data[pos + SIZEOF_FILEHEAD14:pos + SIZEOF_FILEHEAD14 + name_size])
    name = name_bytes.rstrip(b'\x00').decode('latin-1')

    encrypted = bool(flags & 0x04)

    hd = RarFileEntry()
    hd.header_type = HeaderType.HEAD_FILE
    hd.name = name
    hd.size = unp_size
    hd.packed_size = data_size
    hd.date = dos_datetime(file_time)
    hd.method = method
    hd.is_encrypted = encrypted
    hd.crypt_method = CryptMethod.CRYPT_RAR13 if encrypted else CryptMethod.CRYPT_NONE
    hd.hash_type = HashType.HASH_RAR14
    hd.crc32 = file_crc
    hd.unp_ver = 15
    hd.win_size = 0x10000
    hd.file_flags = file_attr
    hd.is_dir = bool(file_attr & 0x10)

    actual_head_size = SIZEOF_FILEHEAD14 + name_size
    hd._data_offset = pos + actual_head_size
    hd._data_size = data_size

    next_pos = pos + actual_head_size + data_size
    return hd, next_pos
def parse_header15(raw)

Parse a RAR 1.5-4.x header block from the given RawHeaderReader. Returns (header_type, header_size, flags, parsed_object, next_block_delta). parsed_object is RarMainHeader, RarFileEntry, RarEndArchiveHeader, or None.

Expand source code Browse git
def parse_header15(raw: RawHeaderReader) -> tuple:
    """
    Parse a RAR 1.5-4.x header block from the given RawHeaderReader.
    Returns (header_type, header_size, flags, parsed_object, next_block_delta).
    parsed_object is RarMainHeader, RarFileEntry, RarEndArchiveHeader, or None.
    """
    raw.get2()
    header_type_raw = raw.get1()
    flags = raw.get2()
    head_size = raw.get2()

    if head_size < SIZEOF_SHORTBLOCKHEAD:
        return HeaderType.HEAD_UNKNOWN, head_size, flags, None, head_size

    header_type = HeaderType(header_type_raw)
    header_type = _HEAD3_TO_5.get(header_type, header_type)

    if header_type == HeaderType.HEAD_MAIN:
        raw.get2()
        raw.get4()
        is_volume = bool(flags & MHD.VOLUME)
        is_solid = bool(flags & MHD.SOLID)
        is_locked = bool(flags & MHD.LOCK)
        is_protected = bool(flags & MHD.PROTECT)
        is_encrypted = bool(flags & MHD.PASSWORD)
        first_volume = bool(flags & MHD.FIRSTVOLUME)
        new_numbering = bool(flags & MHD.NEWNUMBERING)
        comment_in_header = bool(flags & MHD.COMMENT)

        mh = RarMainHeader(
            flags=flags,
            is_volume=is_volume,
            is_solid=is_solid,
            is_locked=is_locked,
            is_protected=is_protected,
            is_encrypted=is_encrypted,
            first_volume=first_volume,
            new_numbering=new_numbering,
            comment_in_header=comment_in_header,
            vol_number=0,
        )
        return header_type, head_size, flags, mh, head_size

    elif header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
        hd = RarFileEntry()
        hd.header_type = header_type
        hd.header_flags = flags
        hd.is_service = (header_type == HeaderType.HEAD_SERVICE)

        hd.split_before = bool(flags & LHD.SPLIT_BEFORE)
        hd.split_after = bool(flags & LHD.SPLIT_AFTER)
        hd.is_encrypted = bool(flags & LHD.PASSWORD)
        hd.solid = (not hd.is_service) and bool(flags & LHD.SOLID)
        hd.is_dir = (flags & LHD.WINDOWMASK) == LHD.DIRECTORY
        hd.win_size = 0 if hd.is_dir else 0x10000 << ((flags & LHD.WINDOWMASK) >> 5)

        data_size = raw.get4()
        low_unp_size = raw.get4()
        hd.host_os = raw.get1()
        hd.hash_type = HashType.HASH_CRC32
        hd.crc32 = raw.get4()
        file_time = raw.get4()
        hd.unp_ver = raw.get1()
        hd.method = raw.get1() - 0x30

        if hd.unp_ver < 20 and (raw.data[raw.pos - 1] if raw.pos > 0 else 0):
            pass  # handled below

        name_size = raw.get2()
        hd.file_flags = raw.get4()

        if hd.is_encrypted:
            hd.crypt_method = _detect_crypt_method_15(hd.unp_ver)

        if hd.host_os in (HostSystem.HOST_UNIX, HostSystem.HOST_BEOS):
            hd.hs_type = HostSystemType.HSYS_UNIX
        elif hd.host_os <= HostSystem.HOST_BEOS:
            hd.hs_type = HostSystemType.HSYS_WINDOWS

        if hd.host_os == HostSystem.HOST_UNIX and (hd.file_flags & 0xF000) == 0xA000:
            hd.redir_type = FileSystemRedirect.FSREDIR_UNIXSYMLINK

        if hd.unp_ver < 20 and (hd.file_flags & 0x10):
            hd.is_dir = True

        large_file = bool(flags & LHD.LARGE)
        if large_file:
            high_pack = raw.get4()
            high_unp = raw.get4()
            hd.unknown_unp_size = (low_unp_size == 0xFFFFFFFF and high_unp == 0xFFFFFFFF)
        else:
            high_pack = 0
            high_unp = 0
            hd.unknown_unp_size = (low_unp_size == 0xFFFFFFFF)

        hd.packed_size = (high_pack << 32) | data_size
        hd.size = (high_unp << 32) | low_unp_size

        read_name_size = min(name_size, raw.remaining())
        name_bytes = bytes(raw.getb(read_name_size))

        if not hd.is_service:
            if flags & LHD.UNICODE:
                null_pos = name_bytes.find(b'\x00')
                if null_pos >= 0 and null_pos + 1 < len(name_bytes):
                    ascii_part = name_bytes[:null_pos]
                    enc_part = name_bytes[null_pos + 1:]
                    hd.name = decode_rar4_filename(ascii_part, enc_part)
                else:
                    hd.name = name_bytes.split(b'\x00', 1)[0].decode('latin-1')
            else:
                hd.name = name_bytes.rstrip(b'\x00').decode('latin-1')
        else:
            hd.name = name_bytes.rstrip(b'\x00').decode('latin-1', errors='replace')

        if flags & LHD.SALT:
            hd.salt = raw.getb(SIZE_SALT30)

        hd.date = dos_datetime(file_time)

        if flags & LHD.EXTTIME:
            _parse_ext_time(raw, hd, file_time)

        next_delta = head_size + hd.packed_size
        return header_type, head_size, flags, hd, next_delta

    elif header_type == HeaderType.HEAD_ENDARC:
        next_vol = bool(flags & EARC.NEXT_VOLUME)
        data_crc_present = bool(flags & EARC.DATACRC)
        vol_number_present = bool(flags & EARC.VOLNUMBER)
        data_crc = raw.get4() if data_crc_present else None
        vol_number = raw.get2() if vol_number_present else None
        eh = RarEndArchiveHeader(
            next_volume=next_vol,
            data_crc=data_crc,
            vol_number=vol_number,
        )
        return header_type, head_size, flags, eh, head_size

    else:
        next_delta = head_size
        if flags & LHD.LONG_BLOCK:
            next_delta += raw.get4()
        return header_type, head_size, flags, None, next_delta
def parse_header50(data, offset=0)

Parse a RAR 5.0 header block. Returns (header_type, header_total_size, parsed_object, next_block_delta).

Expand source code Browse git
def parse_header50(data: bytes | memoryview, offset: int = 0) -> tuple:
    """
    Parse a RAR 5.0 header block.
    Returns (header_type, header_total_size, parsed_object, next_block_delta).
    """
    raw = RawHeaderReader(data)

    raw.get4()
    size_bytes = raw.getv_size(4)
    block_size = raw.getv()

    if block_size == 0 or size_bytes == 0:
        return HeaderType.HEAD_UNKNOWN, 0, None, 0

    header_size = 4 + size_bytes + block_size

    header_type = HeaderType(raw.getv())
    block_flags = raw.getv()

    extra_size = 0
    if block_flags & HFL.EXTRA:
        extra_size = raw.getv()

    data_size = 0
    if block_flags & HFL.DATA:
        data_size = raw.getv()

    next_delta = header_size + data_size

    if header_type == HeaderType.HEAD_CRYPT:
        raw.getv()
        enc_flags = raw.getv()
        use_psw_check = bool(enc_flags & CHFL.CRYPT_PSWCHECK)
        lg2_count = raw.get1()
        salt = raw.getb(SIZE_SALT50)
        psw_check = b''
        if use_psw_check:
            psw_check = raw.getb(SIZE_PSWCHECK)
            csum = raw.getb(SIZE_PSWCHECK_CSUM)
            import hashlib
            digest = hashlib.sha256(psw_check).digest()
            if csum != digest[:SIZE_PSWCHECK_CSUM]:
                use_psw_check = False
        ch = RarCryptHeader(
            lg2_count=lg2_count,
            salt=salt,
            use_psw_check=use_psw_check,
            psw_check=psw_check,
        )
        return header_type, header_size, ch, next_delta

    elif header_type == HeaderType.HEAD_MAIN:
        arc_flags = raw.getv()
        is_volume = bool(arc_flags & MHFL.VOLUME)
        is_solid = bool(arc_flags & MHFL.SOLID)
        is_locked = bool(arc_flags & MHFL.LOCK)
        is_protected = bool(arc_flags & MHFL.PROTECT)
        vol_number = 0
        if arc_flags & MHFL.VOLNUMBER:
            vol_number = raw.getv()
        first_volume = is_volume and vol_number == 0

        mh = RarMainHeader(
            flags=arc_flags,
            is_volume=is_volume,
            is_solid=is_solid,
            is_locked=is_locked,
            is_protected=is_protected,
            is_encrypted=False,
            first_volume=first_volume,
            new_numbering=True,
            comment_in_header=False,
            vol_number=vol_number,
        )
        return header_type, header_size, mh, next_delta

    elif header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
        hd = RarFileEntry()
        hd.header_type = header_type
        hd.header_flags = block_flags
        hd.is_service = (header_type == HeaderType.HEAD_SERVICE)

        hd.packed_size = data_size
        hd.file_flags = raw.getv()
        hd.size = raw.getv()
        hd.unknown_unp_size = bool(hd.file_flags & FHFL.UNPUNKNOWN)

        raw.getv()
        hd.is_dir = bool(hd.file_flags & FHFL.DIRECTORY)

        if hd.file_flags & FHFL.UTIME:
            unix_time = raw.get4()
            try:
                hd.date = datetime.fromtimestamp(unix_time, tz=timezone.utc)
            except (OSError, OverflowError, ValueError):
                pass

        if hd.file_flags & FHFL.CRC32:
            hd.hash_type = HashType.HASH_CRC32
            hd.crc32 = raw.get4()

        comp_info = raw.getv()
        hd.method = (comp_info >> FCI.METHOD_SHIFT) & 7
        hd.unp_ver = (comp_info & FCI.ALGO_MASK) + 50
        hd.solid = bool(comp_info & FCI.SOLID)
        hd.win_size = 0 if hd.is_dir else 0x20000 << ((comp_info >> FCI.DICT_SHIFT) & 0xF)

        hd.host_os = raw.getv()
        if hd.host_os == HostSystem.HOST5_UNIX:
            hd.hs_type = HostSystemType.HSYS_UNIX
        elif hd.host_os == HostSystem.HOST5_WINDOWS:
            hd.hs_type = HostSystemType.HSYS_WINDOWS

        name_size = raw.getv()
        name_bytes = bytes(raw.getb(name_size))
        hd.name = name_bytes.rstrip(b'\x00').decode('utf-8', errors='replace')

        hd.split_before = bool(block_flags & HFL.SPLITBEFORE)
        hd.split_after = bool(block_flags & HFL.SPLITAFTER)

        if extra_size > 0:
            _parse_extra50(raw, hd, extra_size, header_size)

        return hd.header_type, header_size, hd, next_delta

    elif header_type == HeaderType.HEAD_ENDARC:
        arc_flags = raw.getv()
        eh = RarEndArchiveHeader(
            next_volume=bool(arc_flags & EHFL.NEXTVOLUME),
            data_crc=None,
            vol_number=None,
        )
        return header_type, header_size, eh, next_delta

    return header_type, header_size, None, next_delta
def parse_headers(data, fmt, password=None)

Parse all headers from a RAR volume. Returns (main_header, file_entries, end_header, crypt_header).

Expand source code Browse git
def parse_headers(
    data: bytes | memoryview,
    fmt: RarFormat,
    password: str | None = None,
) -> tuple[
    RarMainHeader | None,
    list[RarFileEntry],
    RarEndArchiveHeader | None,
    RarCryptHeader | None,
]:
    """
    Parse all headers from a RAR volume.
    Returns (main_header, file_entries, end_header, crypt_header).
    """
    pos = 0
    main_header = None
    entries: list[RarFileEntry] = []
    end_header = None
    crypt_header = None
    encrypted = False
    _hdr_key = None

    view = memoryview(data)

    if fmt == RarFormat.RARFMT50:
        pos = SIZEOF_MARKHEAD5
    elif fmt == RarFormat.RARFMT15:
        pos = SIZEOF_MARKHEAD3
    elif fmt == RarFormat.RARFMT14:
        pos = 0

    if fmt == RarFormat.RARFMT14:
        main_header = parse_header14_main(view)
        pos = SIZEOF_MAINHEAD14
        while pos < len(data):
            result = parse_header14_file(view, pos)
            if result is None:
                break
            entry, next_pos = result
            entry._volume_index = 0
            entries.append(entry)
            if next_pos <= pos:
                break
            pos = next_pos
        return main_header, entries, end_header, crypt_header

    while pos < len(data):
        remaining = view[pos:]
        if len(remaining) < SIZEOF_SHORTBLOCKHEAD:
            break

        if encrypted:
            if crypt_header is not None:
                if password is None or fmt != RarFormat.RARFMT50:
                    break
                if _hdr_key is None:
                    from refinery.lib.unrar.crypt import rar5_pbkdf2, rar5_psw_check
                    _hdr_key, _, psw_check_value = rar5_pbkdf2(
                        password, crypt_header.salt, crypt_header.lg2_count)
                    if crypt_header.use_psw_check:
                        computed = rar5_psw_check(psw_check_value)
                        if computed != crypt_header.psw_check:
                            from refinery.lib.unrar import RarInvalidPassword
                            raise RarInvalidPassword
                if len(remaining) < SIZE_INITV:
                    break
                iv = remaining[:SIZE_INITV]
                enc_data = remaining[SIZE_INITV:]
                if not enc_data:
                    break
                if pad := -len(enc_data) % CRYPT_BLOCK_SIZE:
                    enc_data = bytearray(enc_data)
                    enc_data.extend(itertools.repeat(0, pad))
                from Cryptodome.Cipher import AES
                cipher = AES.new(_hdr_key, AES.MODE_CBC, iv=iv)
                dec_data = cipher.decrypt(enc_data)
                header_type, header_size, parsed, next_delta = parse_header50(dec_data)
                if header_size == 0:
                    break
                data_size = next_delta - header_size
                enc_header_size = header_size
                if enc_header_size % CRYPT_BLOCK_SIZE:
                    enc_header_size += CRYPT_BLOCK_SIZE - enc_header_size % CRYPT_BLOCK_SIZE
                abs_data_offset = pos + SIZE_INITV + enc_header_size

                if header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
                    if isinstance(parsed, RarFileEntry):
                        parsed._volume_index = 0
                        parsed._data_offset = abs_data_offset
                        parsed._data_size = data_size
                        entries.append(parsed)
                elif header_type == HeaderType.HEAD_MAIN:
                    if isinstance(parsed, RarMainHeader):
                        main_header = parsed
                elif header_type == HeaderType.HEAD_ENDARC:
                    if isinstance(parsed, RarEndArchiveHeader):
                        end_header = parsed
                    break

                pos = abs_data_offset + data_size
                continue
            elif fmt == RarFormat.RARFMT15:
                if password is None:
                    break
                if len(remaining) < SIZE_SALT30 + CRYPT_BLOCK_SIZE:
                    break
                salt = remaining[:SIZE_SALT30]
                from refinery.lib.unrar.crypt import rar3_kdf
                key, iv = rar3_kdf(password, salt)
                enc_data = remaining[SIZE_SALT30:]
                if pad := -len(enc_data) % CRYPT_BLOCK_SIZE:
                    enc_data = bytearray(enc_data)
                    enc_data.extend(itertools.repeat(0, pad))
                from Cryptodome.Cipher import AES
                cipher = AES.new(key, AES.MODE_CBC, iv=iv)
                dec_data = cipher.decrypt(enc_data)
                raw = RawHeaderReader(dec_data)
                header_type, header_size, flags, parsed, next_delta = parse_header15(raw)
                if header_size == 0:
                    break
                enc_header_size = header_size
                if enc_header_size % CRYPT_BLOCK_SIZE:
                    enc_header_size += CRYPT_BLOCK_SIZE - enc_header_size % CRYPT_BLOCK_SIZE
                abs_data_offset = pos + SIZE_SALT30 + enc_header_size
                data_size = next_delta - header_size

                if header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
                    if isinstance(parsed, RarFileEntry):
                        parsed._volume_index = 0
                        parsed._data_offset = abs_data_offset
                        parsed._data_size = data_size
                        entries.append(parsed)
                elif header_type == HeaderType.HEAD_MAIN:
                    if isinstance(parsed, RarMainHeader):
                        main_header = parsed
                elif header_type == HeaderType.HEAD_ENDARC:
                    if isinstance(parsed, RarEndArchiveHeader):
                        end_header = parsed
                    break

                pos = abs_data_offset + data_size
                continue
            else:
                break

        if fmt == RarFormat.RARFMT50:
            header_type, header_size, parsed, next_delta = parse_header50(remaining)
        elif fmt == RarFormat.RARFMT15:
            raw = RawHeaderReader(remaining)
            header_type, header_size, flags, parsed, next_delta = parse_header15(raw)
        else:
            break

        if header_size == 0:
            break

        if header_type == HeaderType.HEAD_CRYPT:
            if isinstance(parsed, RarCryptHeader):
                iv_start = pos + next_delta
                iv_end = iv_start + SIZE_INITV
                header_iv = view[iv_start:iv_end] if iv_end <= len(view) else b''
                crypt_header = parsed._replace(header_iv=header_iv)
                encrypted = True

        elif header_type == HeaderType.HEAD_MAIN:
            if isinstance(parsed, RarMainHeader):
                main_header = parsed
                if main_header.is_encrypted:
                    encrypted = True

        elif header_type in (HeaderType.HEAD_FILE, HeaderType.HEAD_SERVICE):
            if isinstance(parsed, RarFileEntry):
                parsed._volume_index = 0
                parsed._data_offset = pos + header_size
                parsed._data_size = parsed.packed_size
                entries.append(parsed)

        elif header_type == HeaderType.HEAD_ENDARC:
            if isinstance(parsed, RarEndArchiveHeader):
                end_header = parsed
            break

        if next_delta <= 0:
            break
        pos += next_delta

    return main_header, entries, end_header, crypt_header

Classes

class RarFormat (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class RarFormat(enum.IntEnum):
    RARFMT14 = 14
    RARFMT15 = 15
    RARFMT50 = 50

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var RARFMT14

The type of the None singleton.

var RARFMT15

The type of the None singleton.

var RARFMT50

The type of the None singleton.

class HeaderType (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class HeaderType(enum.IntEnum):
    HEAD_MARK = 0x00
    HEAD_MAIN = 0x01
    HEAD_FILE = 0x02
    HEAD_SERVICE = 0x03
    HEAD_CRYPT = 0x04
    HEAD_ENDARC = 0x05
    HEAD_UNKNOWN = 0xFF
    HEAD3_MARK = 0x72
    HEAD3_MAIN = 0x73
    HEAD3_FILE = 0x74
    HEAD3_CMT = 0x75
    HEAD3_AV = 0x76
    HEAD3_OLDSERVICE = 0x77
    HEAD3_PROTECT = 0x78
    HEAD3_SIGN = 0x79
    HEAD3_SERVICE = 0x7A
    HEAD3_ENDARC = 0x7B

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var HEAD_MARK

The type of the None singleton.

var HEAD_MAIN

The type of the None singleton.

var HEAD_FILE

The type of the None singleton.

var HEAD_SERVICE

The type of the None singleton.

var HEAD_CRYPT

The type of the None singleton.

var HEAD_ENDARC

The type of the None singleton.

var HEAD_UNKNOWN

The type of the None singleton.

var HEAD3_MARK

The type of the None singleton.

var HEAD3_MAIN

The type of the None singleton.

var HEAD3_FILE

The type of the None singleton.

var HEAD3_CMT

The type of the None singleton.

var HEAD3_AV

The type of the None singleton.

var HEAD3_OLDSERVICE

The type of the None singleton.

var HEAD3_PROTECT

The type of the None singleton.

var HEAD3_SIGN

The type of the None singleton.

var HEAD3_SERVICE

The type of the None singleton.

var HEAD3_ENDARC

The type of the None singleton.

class MHD (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class MHD(enum.IntEnum):
    VOLUME = 0x0001
    COMMENT = 0x0002
    LOCK = 0x0004
    SOLID = 0x0008
    PACK_COMMENT = 0x0010
    NEWNUMBERING = 0x0010
    AV = 0x0020
    PROTECT = 0x0040
    PASSWORD = 0x0080
    FIRSTVOLUME = 0x0100

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var VOLUME

The type of the None singleton.

var COMMENT

The type of the None singleton.

var LOCK

The type of the None singleton.

var SOLID

The type of the None singleton.

var PACK_COMMENT

The type of the None singleton.

var NEWNUMBERING

The type of the None singleton.

var AV

The type of the None singleton.

var PROTECT

The type of the None singleton.

var PASSWORD

The type of the None singleton.

var FIRSTVOLUME

The type of the None singleton.

class LHD (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class LHD(enum.IntEnum):
    SPLIT_BEFORE = 0x0001
    SPLIT_AFTER = 0x0002
    PASSWORD = 0x0004
    COMMENT = 0x0008
    SOLID = 0x0010
    WINDOWMASK = 0x00E0
    WINDOW64 = 0x0000
    WINDOW128 = 0x0020
    WINDOW256 = 0x0040
    WINDOW512 = 0x0060
    WINDOW1024 = 0x0080
    WINDOW2048 = 0x00A0
    WINDOW4096 = 0x00C0
    DIRECTORY = 0x00E0
    LARGE = 0x0100
    UNICODE = 0x0200
    SALT = 0x0400
    VERSION = 0x0800
    EXTTIME = 0x1000
    SKIP_IF_UNKNOWN = 0x4000
    LONG_BLOCK = 0x8000

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var SPLIT_BEFORE

The type of the None singleton.

var SPLIT_AFTER

The type of the None singleton.

var PASSWORD

The type of the None singleton.

var COMMENT

The type of the None singleton.

var SOLID

The type of the None singleton.

var WINDOWMASK

The type of the None singleton.

var WINDOW64

The type of the None singleton.

var WINDOW128

The type of the None singleton.

var WINDOW256

The type of the None singleton.

var WINDOW512

The type of the None singleton.

var WINDOW1024

The type of the None singleton.

var WINDOW2048

The type of the None singleton.

var WINDOW4096

The type of the None singleton.

var DIRECTORY

The type of the None singleton.

var LARGE

The type of the None singleton.

var UNICODE

The type of the None singleton.

var SALT

The type of the None singleton.

var VERSION

The type of the None singleton.

var EXTTIME

The type of the None singleton.

var SKIP_IF_UNKNOWN

The type of the None singleton.

var LONG_BLOCK

The type of the None singleton.

class EARC (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class EARC(enum.IntEnum):
    NEXT_VOLUME = 0x0001
    DATACRC = 0x0002
    REVSPACE = 0x0004
    VOLNUMBER = 0x0008

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var NEXT_VOLUME

The type of the None singleton.

var DATACRC

The type of the None singleton.

var REVSPACE

The type of the None singleton.

var VOLNUMBER

The type of the None singleton.

class HFL (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class HFL(enum.IntEnum):
    EXTRA = 0x0001
    DATA = 0x0002
    SKIPIFUNKNOWN = 0x0004
    SPLITBEFORE = 0x0008
    SPLITAFTER = 0x0010
    CHILD = 0x0020
    INHERITED = 0x0040

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var EXTRA

The type of the None singleton.

var DATA

The type of the None singleton.

var SKIPIFUNKNOWN

The type of the None singleton.

var SPLITBEFORE

The type of the None singleton.

var SPLITAFTER

The type of the None singleton.

var CHILD

The type of the None singleton.

var INHERITED

The type of the None singleton.

class MHFL (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class MHFL(enum.IntEnum):
    VOLUME = 0x0001
    VOLNUMBER = 0x0002
    SOLID = 0x0004
    PROTECT = 0x0008
    LOCK = 0x0010

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var VOLUME

The type of the None singleton.

var VOLNUMBER

The type of the None singleton.

var SOLID

The type of the None singleton.

var PROTECT

The type of the None singleton.

var LOCK

The type of the None singleton.

class FHFL (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class FHFL(enum.IntEnum):
    DIRECTORY = 0x0001
    UTIME = 0x0002
    CRC32 = 0x0004
    UNPUNKNOWN = 0x0008

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var DIRECTORY

The type of the None singleton.

var UTIME

The type of the None singleton.

var CRC32

The type of the None singleton.

var UNPUNKNOWN

The type of the None singleton.

class EHFL (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class EHFL(enum.IntEnum):
    NEXTVOLUME = 0x0001

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var NEXTVOLUME

The type of the None singleton.

class CHFL (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class CHFL(enum.IntEnum):
    CRYPT_PSWCHECK = 0x0001

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var CRYPT_PSWCHECK

The type of the None singleton.

class FCI (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class FCI(enum.IntEnum):
    ALGO_MASK = 0x003F
    SOLID = 0x0040
    METHOD_SHIFT = 7
    METHOD_MASK = 0x0380
    DICT_SHIFT = 10
    DICT_MASK = 0x3C00

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var ALGO_MASK

The type of the None singleton.

var SOLID

The type of the None singleton.

var METHOD_SHIFT

The type of the None singleton.

var METHOD_MASK

The type of the None singleton.

var DICT_SHIFT

The type of the None singleton.

var DICT_MASK

The type of the None singleton.

class CryptMethod (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class CryptMethod(enum.IntEnum):
    CRYPT_NONE = 0
    CRYPT_RAR13 = 1
    CRYPT_RAR15 = 2
    CRYPT_RAR20 = 3
    CRYPT_RAR30 = 4
    CRYPT_RAR50 = 5

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var CRYPT_NONE

The type of the None singleton.

var CRYPT_RAR13

The type of the None singleton.

var CRYPT_RAR15

The type of the None singleton.

var CRYPT_RAR20

The type of the None singleton.

var CRYPT_RAR30

The type of the None singleton.

var CRYPT_RAR50

The type of the None singleton.

class HostSystem (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class HostSystem(enum.IntEnum):
    HOST5_WINDOWS = 0
    HOST5_UNIX = 1
    HOST_MSDOS = 0
    HOST_OS2 = 1
    HOST_WIN32 = 2
    HOST_UNIX = 3
    HOST_MACOS = 4
    HOST_BEOS = 5

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var HOST5_WINDOWS

The type of the None singleton.

var HOST5_UNIX

The type of the None singleton.

var HOST_MSDOS

The type of the None singleton.

var HOST_OS2

The type of the None singleton.

var HOST_WIN32

The type of the None singleton.

var HOST_UNIX

The type of the None singleton.

var HOST_MACOS

The type of the None singleton.

var HOST_BEOS

The type of the None singleton.

class HostSystemType (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class HostSystemType(enum.IntEnum):
    HSYS_WINDOWS = 0
    HSYS_UNIX = 1
    HSYS_UNKNOWN = 2

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var HSYS_WINDOWS

The type of the None singleton.

var HSYS_UNIX

The type of the None singleton.

var HSYS_UNKNOWN

The type of the None singleton.

class FileSystemRedirect (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class FileSystemRedirect(enum.IntEnum):
    FSREDIR_NONE = 0
    FSREDIR_UNIXSYMLINK = 1
    FSREDIR_WINSYMLINK = 2
    FSREDIR_JUNCTION = 3
    FSREDIR_HARDLINK = 4
    FSREDIR_FILECOPY = 5

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var FSREDIR_NONE

The type of the None singleton.

The type of the None singleton.

The type of the None singleton.

var FSREDIR_JUNCTION

The type of the None singleton.

The type of the None singleton.

var FSREDIR_FILECOPY

The type of the None singleton.

class HashType (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git
class HashType(enum.IntEnum):
    HASH_NONE = 0
    HASH_RAR14 = 1
    HASH_CRC32 = 2
    HASH_BLAKE2 = 3

Ancestors

  • enum.IntEnum
  • builtins.int
  • enum.ReprEnum
  • enum.Enum

Class variables

var HASH_NONE

The type of the None singleton.

var HASH_RAR14

The type of the None singleton.

var HASH_CRC32

The type of the None singleton.

var HASH_BLAKE2

The type of the None singleton.

class RarMainHeader (flags=0, is_volume=False, is_solid=False, is_locked=False, is_protected=False, is_encrypted=False, first_volume=False, new_numbering=False, comment_in_header=False, vol_number=0)

RarMainHeader(flags, is_volume, is_solid, is_locked, is_protected, is_encrypted, first_volume, new_numbering, comment_in_header, vol_number)

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0

Ancestors

  • builtins.tuple

Instance variables

var flags

Alias for field number 0

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var is_volume

Alias for field number 1

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var is_solid

Alias for field number 2

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var is_locked

Alias for field number 3

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var is_protected

Alias for field number 4

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var is_encrypted

Alias for field number 5

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var first_volume

Alias for field number 6

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var new_numbering

Alias for field number 7

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var comment_in_header

Alias for field number 8

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
var vol_number

Alias for field number 9

Expand source code Browse git
class RarMainHeader(NamedTuple):
    flags: int = 0
    is_volume: bool = False
    is_solid: bool = False
    is_locked: bool = False
    is_protected: bool = False
    is_encrypted: bool = False
    first_volume: bool = False
    new_numbering: bool = False
    comment_in_header: bool = False
    vol_number: int = 0
class RarEndArchiveHeader (next_volume, data_crc, vol_number)

RarEndArchiveHeader(next_volume, data_crc, vol_number)

Expand source code Browse git
class RarEndArchiveHeader(NamedTuple):
    next_volume: bool
    data_crc: int | None
    vol_number: int | None

Ancestors

  • builtins.tuple

Instance variables

var next_volume

Alias for field number 0

Expand source code Browse git
class RarEndArchiveHeader(NamedTuple):
    next_volume: bool
    data_crc: int | None
    vol_number: int | None
var data_crc

Alias for field number 1

Expand source code Browse git
class RarEndArchiveHeader(NamedTuple):
    next_volume: bool
    data_crc: int | None
    vol_number: int | None
var vol_number

Alias for field number 2

Expand source code Browse git
class RarEndArchiveHeader(NamedTuple):
    next_volume: bool
    data_crc: int | None
    vol_number: int | None
class RarFileEntry (name='', size=0, packed_size=0, date=None, ctime=None, atime=None, method=0, is_dir=False, is_encrypted=False, crc32=0, hash_type=0, hash_digest=b'', host_os=0, hs_type=2, unp_ver=0, win_size=0, solid=False, split_before=False, split_after=False, crypt_method=0, salt=b'', init_v=b'', lg2_count=0, psw_check=b'', use_psw_check=False, hash_key=b'', use_hash_key=False, redir_type=0, redir_name='', is_service=False, header_type=2, header_flags=0, file_flags=0, unknown_unp_size=False)

Metadata for a single file or service entry in a RAR archive.

Expand source code Browse git
@dataclass(repr=False)
class RarFileEntry:
    """
    Metadata for a single file or service entry in a RAR archive.
    """
    name: str = ''
    size: int = 0
    packed_size: int = 0
    date: datetime | None = None
    ctime: datetime | None = None
    atime: datetime | None = None
    method: int = 0
    is_dir: bool = False
    is_encrypted: bool = False
    crc32: int = 0
    hash_type: int = HashType.HASH_NONE
    hash_digest: buf = b''
    host_os: int = 0
    hs_type: int = HostSystemType.HSYS_UNKNOWN
    unp_ver: int = 0
    win_size: int = 0
    solid: bool = False
    split_before: bool = False
    split_after: bool = False
    crypt_method: int = CryptMethod.CRYPT_NONE
    salt: buf = b''
    init_v: buf = b''
    lg2_count: int = 0
    psw_check: buf = b''
    use_psw_check: bool = False
    hash_key: buf = b''
    use_hash_key: bool = False
    redir_type: int = FileSystemRedirect.FSREDIR_NONE
    redir_name: str = ''
    is_service: bool = False
    header_type: int = HeaderType.HEAD_FILE
    header_flags: int = 0
    file_flags: int = 0
    _volume_index: int = 0
    _data_offset: int = 0
    _data_size: int = 0
    unknown_unp_size: bool = False

    def __repr__(self):
        kind = 'dir' if self.is_dir else 'file'
        enc = ' [encrypted]' if self.is_encrypted else ''
        return F'<RarFileEntry:{kind}:{self.name}{enc}>'

Instance variables

var name

The type of the None singleton.

var size

The type of the None singleton.

var packed_size

The type of the None singleton.

var date

The type of the None singleton.

var ctime

The type of the None singleton.

var atime

The type of the None singleton.

var method

The type of the None singleton.

var is_dir

The type of the None singleton.

var is_encrypted

The type of the None singleton.

var crc32

The type of the None singleton.

var hash_type

The type of the None singleton.

var hash_digest

The type of the None singleton.

var host_os

The type of the None singleton.

var hs_type

The type of the None singleton.

var unp_ver

The type of the None singleton.

var win_size

The type of the None singleton.

var solid

The type of the None singleton.

var split_before

The type of the None singleton.

var split_after

The type of the None singleton.

var crypt_method

The type of the None singleton.

var salt

The type of the None singleton.

var init_v

The type of the None singleton.

var lg2_count

The type of the None singleton.

var psw_check

The type of the None singleton.

var use_psw_check

The type of the None singleton.

var hash_key

The type of the None singleton.

var use_hash_key

The type of the None singleton.

var redir_type

The type of the None singleton.

var redir_name

The type of the None singleton.

var is_service

The type of the None singleton.

var header_type

The type of the None singleton.

var header_flags

The type of the None singleton.

var file_flags

The type of the None singleton.

var unknown_unp_size

The type of the None singleton.

class RarCryptHeader (lg2_count, salt, use_psw_check, psw_check, header_iv=b'')

RarCryptHeader(lg2_count, salt, use_psw_check, psw_check, header_iv)

Expand source code Browse git
class RarCryptHeader(NamedTuple):
    lg2_count: int
    salt: buf
    use_psw_check: bool
    psw_check: buf
    header_iv: buf = b''

Ancestors

  • builtins.tuple

Instance variables

var lg2_count

Alias for field number 0

Expand source code Browse git
class RarCryptHeader(NamedTuple):
    lg2_count: int
    salt: buf
    use_psw_check: bool
    psw_check: buf
    header_iv: buf = b''
var salt

Alias for field number 1

Expand source code Browse git
class RarCryptHeader(NamedTuple):
    lg2_count: int
    salt: buf
    use_psw_check: bool
    psw_check: buf
    header_iv: buf = b''
var use_psw_check

Alias for field number 2

Expand source code Browse git
class RarCryptHeader(NamedTuple):
    lg2_count: int
    salt: buf
    use_psw_check: bool
    psw_check: buf
    header_iv: buf = b''
var psw_check

Alias for field number 3

Expand source code Browse git
class RarCryptHeader(NamedTuple):
    lg2_count: int
    salt: buf
    use_psw_check: bool
    psw_check: buf
    header_iv: buf = b''
var header_iv

Alias for field number 4

Expand source code Browse git
class RarCryptHeader(NamedTuple):
    lg2_count: int
    salt: buf
    use_psw_check: bool
    psw_check: buf
    header_iv: buf = b''
class RawHeaderReader (data)

Simple binary reader for header data.

Expand source code Browse git
class RawHeaderReader:
    """
    Simple binary reader for header data.
    """

    def __init__(self, data: bytes | bytearray | memoryview):
        self.data = memoryview(data)
        self.pos = 0

    def get1(self) -> int:
        if self.pos < len(self.data):
            v = self.data[self.pos]
            self.pos += 1
            return v
        return 0

    def get2(self) -> int:
        if self.pos + 1 < len(self.data):
            v = self.data[self.pos] | (self.data[self.pos + 1] << 8)
            self.pos += 2
            return v
        return 0

    def get4(self) -> int:
        if self.pos + 3 < len(self.data):
            v, = struct.unpack_from('<I', self.data, self.pos)
            self.pos += 4
            return v
        return 0

    def get8(self) -> int:
        if self.pos + 7 < len(self.data):
            v, = struct.unpack_from('<Q', self.data, self.pos)
            self.pos += 8
            return v
        return 0

    def getv(self) -> int:
        """
        Read a RAR5 variable-length integer.
        """
        result = 0
        shift = 0
        while self.pos < len(self.data) and shift < 64:
            b = self.data[self.pos]
            self.pos += 1
            result += (b & 0x7F) << shift
            if not (b & 0x80):
                return result
            shift += 7
        return 0

    def getv_size(self, pos: int | None = None) -> int:
        """
        Return byte count of vint at given position.
        """
        p = pos if pos is not None else self.pos
        for i in range(p, len(self.data)):
            if not (self.data[i] & 0x80):
                return i - p + 1
        return 0

    def getb(self, size: int) -> memoryview:
        end = min(self.pos + size, len(self.data))
        result = self.data[self.pos:end]
        self.pos = end
        return result

    def remaining(self) -> int:
        return max(0, len(self.data) - self.pos)

    def set_pos(self, pos: int):
        self.pos = pos

    def get_pos(self) -> int:
        return self.pos

Methods

def get1(self)
Expand source code Browse git
def get1(self) -> int:
    if self.pos < len(self.data):
        v = self.data[self.pos]
        self.pos += 1
        return v
    return 0
def get2(self)
Expand source code Browse git
def get2(self) -> int:
    if self.pos + 1 < len(self.data):
        v = self.data[self.pos] | (self.data[self.pos + 1] << 8)
        self.pos += 2
        return v
    return 0
def get4(self)
Expand source code Browse git
def get4(self) -> int:
    if self.pos + 3 < len(self.data):
        v, = struct.unpack_from('<I', self.data, self.pos)
        self.pos += 4
        return v
    return 0
def get8(self)
Expand source code Browse git
def get8(self) -> int:
    if self.pos + 7 < len(self.data):
        v, = struct.unpack_from('<Q', self.data, self.pos)
        self.pos += 8
        return v
    return 0
def getv(self)

Read a RAR5 variable-length integer.

Expand source code Browse git
def getv(self) -> int:
    """
    Read a RAR5 variable-length integer.
    """
    result = 0
    shift = 0
    while self.pos < len(self.data) and shift < 64:
        b = self.data[self.pos]
        self.pos += 1
        result += (b & 0x7F) << shift
        if not (b & 0x80):
            return result
        shift += 7
    return 0
def getv_size(self, pos=None)

Return byte count of vint at given position.

Expand source code Browse git
def getv_size(self, pos: int | None = None) -> int:
    """
    Return byte count of vint at given position.
    """
    p = pos if pos is not None else self.pos
    for i in range(p, len(self.data)):
        if not (self.data[i] & 0x80):
            return i - p + 1
    return 0
def getb(self, size)
Expand source code Browse git
def getb(self, size: int) -> memoryview:
    end = min(self.pos + size, len(self.data))
    result = self.data[self.pos:end]
    self.pos = end
    return result
def remaining(self)
Expand source code Browse git
def remaining(self) -> int:
    return max(0, len(self.data) - self.pos)
def set_pos(self, pos)
Expand source code Browse git
def set_pos(self, pos: int):
    self.pos = pos
def get_pos(self)
Expand source code Browse git
def get_pos(self) -> int:
    return self.pos