Module refinery.units.formats.java.jvdasm

Java disassembler. The main logic is implemented int JvOpCode.

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Java disassembler. The main logic is implemented int `refinery.lib.java.JvOpCode`.
"""
import re
import io
import collections

from refinery.units.formats import PathExtractorUnit, UnpackResult

from refinery.lib.java import (
    opc,
    JvClassFile,
    JvClassMember,
    JvCode,
    JvString,
    JvClassProperty,
    JvBaseType,
    JvTypePath,
)


def _parse_descriptor(
    descriptor: str,
    color_reset: str,
    color_space: str,
    color_types: str,
    color_array: str,
):
    def parse_type_list(args: str):
        while args:
            suffix = ''
            while args.startswith('['):
                args = args[1:]
                suffix += '[]'
            code, args = args[0], args[1:]
            if code == 'L':
                spec, _, args = args.partition(';')
                *ns, t = spec.split('/')
                ns = '.'.join([F'{color_space}{part}{color_reset}' for part in ns])
                spec = F'{ns}.{color_types}{t}{color_reset}'
            else:
                spec = {
                    'Z': 'boolean',
                    'B': 'byte',
                    'S': 'short',
                    'I': 'int',
                    'J': 'long',
                    'F': 'float',
                    'D': 'double',
                    'C': 'char',
                    'V': 'void',
                }[code]
                spec = F'{color_types}{spec}{color_reset}'
            yield F'{spec}{color_array}{suffix}{color_reset}'

    args, retval = re.match(R'^\((.*?)\)(.*?)$', descriptor).groups()
    retval, = parse_type_list(retval)
    return retval, tuple(parse_type_list(args))


class jvdasm(PathExtractorUnit):
    """
    Disassembles the JVM bytecode instructions of methods of classes defined in Java class
    files. The unit is implemented as a path extractor and each path name corresponds to the
    name of one method defined in the class file.
    """
    _OPC_STRLEN = max(len(op.name) for op in opc)

    def _hex(self, bytestring, sep=''):
        return sep.join(F'{x:02x}' for x in bytestring)

    def __init__(
        self, *paths,
        gray: PathExtractorUnit.Arg.Switch('-g', help='Disable colored output.') = False,
        **keywords
    ):
        super().__init__(*paths, gray=gray, **keywords)

    def unpack(self, data):
        def _name(method: JvClassMember):
            name = method.name
            if name == '<init>':
                _, _, name = str(jc.this).rpartition('/')
            elif m := re.fullmatch('<(.*?)>', name):
                name = F'.{m[0]}'
            return name

        def _path(method: JvClassMember):
            return F'{jc.this!s}/{_name(method)}'
        try:
            if self.args.gray or not self.isatty:
                raise ImportError
            import colorama
        except ImportError:
            class _FG():
                def __getattr__(self, _):
                    return ''
            FG = _FG()
            RS = ''
        else:
            FG = colorama.Fore
            RS = colorama.Style.RESET_ALL
        finally:
            c_none = RS
            c_space = FG.LIGHTCYAN_EX
            c_types = FG.LIGHTCYAN_EX
            c_member = FG.LIGHTYELLOW_EX
            c_kwd = FG.LIGHTYELLOW_EX
            c_const = FG.LIGHTRED_EX
            c_string = FG.LIGHTRED_EX
            c_address = FG.LIGHTBLACK_EX
            c_label = RS

        def _color(arg, offset):
            if isinstance(arg, (str, JvString)):
                color = c_string
            elif isinstance(arg, (JvClassProperty, JvTypePath)):
                ns, dd, prop = str(arg).partition('::')
                if not dd:
                    return repr(arg)
                ns = ns.split('.')
                ns = '.'.join(F'{c_space}{p}{c_none}' for p in ns)
                return F'{ns}{dd}{c_member}{prop}{c_none}'
            elif isinstance(arg, int) and arg + offset in labels:
                return F'{c_label}0x{arg + offset:08X}{c_none}'
            elif isinstance(arg, (bool, int, float)):
                color = c_const
            elif isinstance(arg, JvBaseType):
                color = c_kwd
            else:
                return repr(arg)
            return F'{color}{arg!r}{c_none}'

        jc = JvClassFile(data)
        tab = ' '
        namespace = '.'.join(str(jc.this).split('/'))
        opcw = self._OPC_STRLEN
        path_counter = collections.defaultdict(int)
        path_index = collections.defaultdict(int)

        for method in jc.methods:
            path_counter[_path(method)] += 1
        for method in jc.methods:
            for attribute in method.attributes:
                if attribute.name == 'Code': break
            else:
                self.log_warn(F'no code found for method: {method.name}')
                continue
            code: JvCode = attribute.parse(JvCode)
            with io.StringIO() as display:
                rv, args = _parse_descriptor(method.descriptor, c_none, c_space, c_types, c_kwd)
                args = ', '.join(args)
                print(
                    F'{c_types}{rv}{c_none} {c_space}{namespace}{c_none}'
                    F'::{c_member}{_name(method)}{c_none}({args})', file=display)
                offset = 0
                labels = set()
                addresses = set()

                for op in code.disassembly:
                    addresses.add(offset)
                    if op.table:
                        labels.update(offset + jmp for jmp in op.table.values())
                    elif op.code in (opc.goto, opc.goto_w):
                        labels.update(offset + arg for arg in op.arguments if isinstance(arg, int))
                    offset += len(op.raw)

                offset = 0
                labels = labels & addresses

                for op in code.disassembly:
                    if offset in labels:
                        label = F'{c_label}{offset:08X}{c_none}:'
                    else:
                        label = F'{c_address}{offset:08X}{c_none}:'
                    addr = offset
                    olen = len(op.raw)
                    offset += olen
                    if op.table is None:
                        args = ', '.join(_color(a, addr) for a in op.arguments)
                    else:
                        ow = 4 if op.code is opc.tableswitch else 8
                        olen = olen - (len(op.table) - 1) * ow
                        args = F'___default => {c_label}{op.table[None] + addr:#010x}{c_none}'
                        jmps = []
                        for k, (key, jmp) in enumerate(op.table.items()):
                            if key is None:
                                continue
                            raw = self._hex(op.raw[olen + k * ow: olen + k * ow + ow], ' ')
                            jmps.append(
                                F'{label}{tab}'
                                F'{raw!s:<{opcw + 15}} '
                                F'{c_const}{key:#010x}{c_none} => '
                                F'{c_label}{jmp + addr:#010x}{c_none}')
                        args = '\n'.join((args, *jmps))
                    opch = self._hex(op.raw[:olen], ' ')
                    if len(opch) > 14:
                        opch += F'\n{label}{tab}{tab:<15}'
                    print(
                        F'{label}{tab}'
                        F'{opch:<15}'
                        F'{c_kwd}{op.code!r:<{opcw}}{c_none} {args}', file=display)
                path = _path(method)
                if path_counter[path] > 1:
                    k = path_index[path]
                    path_index[path] = k + 1
                    path = F'{path}[{k}]'
                yield UnpackResult(path, display.getvalue().encode(self.codec))

    @classmethod
    def handles(self, data):
        return data[:4] == B'\xCA\xFE\xBA\xBE'

Classes

class jvdasm (*paths, gray=False, path=b'path', regex=False, exact=False, fuzzy=0, drop_path=False, join_path=False, list=False)

Disassembles the JVM bytecode instructions of methods of classes defined in Java class files. The unit is implemented as a path extractor and each path name corresponds to the name of one method defined in the class file.

Expand source code Browse git
class jvdasm(PathExtractorUnit):
    """
    Disassembles the JVM bytecode instructions of methods of classes defined in Java class
    files. The unit is implemented as a path extractor and each path name corresponds to the
    name of one method defined in the class file.
    """
    _OPC_STRLEN = max(len(op.name) for op in opc)

    def _hex(self, bytestring, sep=''):
        return sep.join(F'{x:02x}' for x in bytestring)

    def __init__(
        self, *paths,
        gray: PathExtractorUnit.Arg.Switch('-g', help='Disable colored output.') = False,
        **keywords
    ):
        super().__init__(*paths, gray=gray, **keywords)

    def unpack(self, data):
        def _name(method: JvClassMember):
            name = method.name
            if name == '<init>':
                _, _, name = str(jc.this).rpartition('/')
            elif m := re.fullmatch('<(.*?)>', name):
                name = F'.{m[0]}'
            return name

        def _path(method: JvClassMember):
            return F'{jc.this!s}/{_name(method)}'
        try:
            if self.args.gray or not self.isatty:
                raise ImportError
            import colorama
        except ImportError:
            class _FG():
                def __getattr__(self, _):
                    return ''
            FG = _FG()
            RS = ''
        else:
            FG = colorama.Fore
            RS = colorama.Style.RESET_ALL
        finally:
            c_none = RS
            c_space = FG.LIGHTCYAN_EX
            c_types = FG.LIGHTCYAN_EX
            c_member = FG.LIGHTYELLOW_EX
            c_kwd = FG.LIGHTYELLOW_EX
            c_const = FG.LIGHTRED_EX
            c_string = FG.LIGHTRED_EX
            c_address = FG.LIGHTBLACK_EX
            c_label = RS

        def _color(arg, offset):
            if isinstance(arg, (str, JvString)):
                color = c_string
            elif isinstance(arg, (JvClassProperty, JvTypePath)):
                ns, dd, prop = str(arg).partition('::')
                if not dd:
                    return repr(arg)
                ns = ns.split('.')
                ns = '.'.join(F'{c_space}{p}{c_none}' for p in ns)
                return F'{ns}{dd}{c_member}{prop}{c_none}'
            elif isinstance(arg, int) and arg + offset in labels:
                return F'{c_label}0x{arg + offset:08X}{c_none}'
            elif isinstance(arg, (bool, int, float)):
                color = c_const
            elif isinstance(arg, JvBaseType):
                color = c_kwd
            else:
                return repr(arg)
            return F'{color}{arg!r}{c_none}'

        jc = JvClassFile(data)
        tab = ' '
        namespace = '.'.join(str(jc.this).split('/'))
        opcw = self._OPC_STRLEN
        path_counter = collections.defaultdict(int)
        path_index = collections.defaultdict(int)

        for method in jc.methods:
            path_counter[_path(method)] += 1
        for method in jc.methods:
            for attribute in method.attributes:
                if attribute.name == 'Code': break
            else:
                self.log_warn(F'no code found for method: {method.name}')
                continue
            code: JvCode = attribute.parse(JvCode)
            with io.StringIO() as display:
                rv, args = _parse_descriptor(method.descriptor, c_none, c_space, c_types, c_kwd)
                args = ', '.join(args)
                print(
                    F'{c_types}{rv}{c_none} {c_space}{namespace}{c_none}'
                    F'::{c_member}{_name(method)}{c_none}({args})', file=display)
                offset = 0
                labels = set()
                addresses = set()

                for op in code.disassembly:
                    addresses.add(offset)
                    if op.table:
                        labels.update(offset + jmp for jmp in op.table.values())
                    elif op.code in (opc.goto, opc.goto_w):
                        labels.update(offset + arg for arg in op.arguments if isinstance(arg, int))
                    offset += len(op.raw)

                offset = 0
                labels = labels & addresses

                for op in code.disassembly:
                    if offset in labels:
                        label = F'{c_label}{offset:08X}{c_none}:'
                    else:
                        label = F'{c_address}{offset:08X}{c_none}:'
                    addr = offset
                    olen = len(op.raw)
                    offset += olen
                    if op.table is None:
                        args = ', '.join(_color(a, addr) for a in op.arguments)
                    else:
                        ow = 4 if op.code is opc.tableswitch else 8
                        olen = olen - (len(op.table) - 1) * ow
                        args = F'___default => {c_label}{op.table[None] + addr:#010x}{c_none}'
                        jmps = []
                        for k, (key, jmp) in enumerate(op.table.items()):
                            if key is None:
                                continue
                            raw = self._hex(op.raw[olen + k * ow: olen + k * ow + ow], ' ')
                            jmps.append(
                                F'{label}{tab}'
                                F'{raw!s:<{opcw + 15}} '
                                F'{c_const}{key:#010x}{c_none} => '
                                F'{c_label}{jmp + addr:#010x}{c_none}')
                        args = '\n'.join((args, *jmps))
                    opch = self._hex(op.raw[:olen], ' ')
                    if len(opch) > 14:
                        opch += F'\n{label}{tab}{tab:<15}'
                    print(
                        F'{label}{tab}'
                        F'{opch:<15}'
                        F'{c_kwd}{op.code!r:<{opcw}}{c_none} {args}', file=display)
                path = _path(method)
                if path_counter[path] > 1:
                    k = path_index[path]
                    path_index[path] = k + 1
                    path = F'{path}[{k}]'
                yield UnpackResult(path, display.getvalue().encode(self.codec))

    @classmethod
    def handles(self, data):
        return data[:4] == B'\xCA\xFE\xBA\xBE'

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Methods

def unpack(self, data)
Expand source code Browse git
def unpack(self, data):
    def _name(method: JvClassMember):
        name = method.name
        if name == '<init>':
            _, _, name = str(jc.this).rpartition('/')
        elif m := re.fullmatch('<(.*?)>', name):
            name = F'.{m[0]}'
        return name

    def _path(method: JvClassMember):
        return F'{jc.this!s}/{_name(method)}'
    try:
        if self.args.gray or not self.isatty:
            raise ImportError
        import colorama
    except ImportError:
        class _FG():
            def __getattr__(self, _):
                return ''
        FG = _FG()
        RS = ''
    else:
        FG = colorama.Fore
        RS = colorama.Style.RESET_ALL
    finally:
        c_none = RS
        c_space = FG.LIGHTCYAN_EX
        c_types = FG.LIGHTCYAN_EX
        c_member = FG.LIGHTYELLOW_EX
        c_kwd = FG.LIGHTYELLOW_EX
        c_const = FG.LIGHTRED_EX
        c_string = FG.LIGHTRED_EX
        c_address = FG.LIGHTBLACK_EX
        c_label = RS

    def _color(arg, offset):
        if isinstance(arg, (str, JvString)):
            color = c_string
        elif isinstance(arg, (JvClassProperty, JvTypePath)):
            ns, dd, prop = str(arg).partition('::')
            if not dd:
                return repr(arg)
            ns = ns.split('.')
            ns = '.'.join(F'{c_space}{p}{c_none}' for p in ns)
            return F'{ns}{dd}{c_member}{prop}{c_none}'
        elif isinstance(arg, int) and arg + offset in labels:
            return F'{c_label}0x{arg + offset:08X}{c_none}'
        elif isinstance(arg, (bool, int, float)):
            color = c_const
        elif isinstance(arg, JvBaseType):
            color = c_kwd
        else:
            return repr(arg)
        return F'{color}{arg!r}{c_none}'

    jc = JvClassFile(data)
    tab = ' '
    namespace = '.'.join(str(jc.this).split('/'))
    opcw = self._OPC_STRLEN
    path_counter = collections.defaultdict(int)
    path_index = collections.defaultdict(int)

    for method in jc.methods:
        path_counter[_path(method)] += 1
    for method in jc.methods:
        for attribute in method.attributes:
            if attribute.name == 'Code': break
        else:
            self.log_warn(F'no code found for method: {method.name}')
            continue
        code: JvCode = attribute.parse(JvCode)
        with io.StringIO() as display:
            rv, args = _parse_descriptor(method.descriptor, c_none, c_space, c_types, c_kwd)
            args = ', '.join(args)
            print(
                F'{c_types}{rv}{c_none} {c_space}{namespace}{c_none}'
                F'::{c_member}{_name(method)}{c_none}({args})', file=display)
            offset = 0
            labels = set()
            addresses = set()

            for op in code.disassembly:
                addresses.add(offset)
                if op.table:
                    labels.update(offset + jmp for jmp in op.table.values())
                elif op.code in (opc.goto, opc.goto_w):
                    labels.update(offset + arg for arg in op.arguments if isinstance(arg, int))
                offset += len(op.raw)

            offset = 0
            labels = labels & addresses

            for op in code.disassembly:
                if offset in labels:
                    label = F'{c_label}{offset:08X}{c_none}:'
                else:
                    label = F'{c_address}{offset:08X}{c_none}:'
                addr = offset
                olen = len(op.raw)
                offset += olen
                if op.table is None:
                    args = ', '.join(_color(a, addr) for a in op.arguments)
                else:
                    ow = 4 if op.code is opc.tableswitch else 8
                    olen = olen - (len(op.table) - 1) * ow
                    args = F'___default => {c_label}{op.table[None] + addr:#010x}{c_none}'
                    jmps = []
                    for k, (key, jmp) in enumerate(op.table.items()):
                        if key is None:
                            continue
                        raw = self._hex(op.raw[olen + k * ow: olen + k * ow + ow], ' ')
                        jmps.append(
                            F'{label}{tab}'
                            F'{raw!s:<{opcw + 15}} '
                            F'{c_const}{key:#010x}{c_none} => '
                            F'{c_label}{jmp + addr:#010x}{c_none}')
                    args = '\n'.join((args, *jmps))
                opch = self._hex(op.raw[:olen], ' ')
                if len(opch) > 14:
                    opch += F'\n{label}{tab}{tab:<15}'
                print(
                    F'{label}{tab}'
                    F'{opch:<15}'
                    F'{c_kwd}{op.code!r:<{opcw}}{c_none} {args}', file=display)
            path = _path(method)
            if path_counter[path] > 1:
                k = path_index[path]
                path_index[path] = k + 1
                path = F'{path}[{k}]'
            yield UnpackResult(path, display.getvalue().encode(self.codec))

Inherited members