Module `refinery.lib.thirdparty.xlm.wrappers`

Expand source code Browse git

from __future__ import annotations

import re

from fnmatch import fnmatch
from io import BytesIO
from typing import Any
from zipfile import ZipFile

from refinery.lib.thirdparty.xlm.model import (
    Boundsheet,
    Cell,
    RowAttribute,
    XlApplicationInternational,
)

_XL_INTERNATIONAL_DEFAULTS = {
    XlApplicationInternational.xlLeftBracket : '[',
    XlApplicationInternational.xlListSeparator: ',',
    XlApplicationInternational.xlRightBracket : ']',
}

_XLSM_COLOR_TABLE: list[tuple[int, int, int, int]] = [
    (  0,   0,   0,  1), (255, 255, 255,  2), (255,   0,   0,  3), (  0, 255,   0,  4),
    (  0,   0, 255,  5), (255, 255,   0,  6), (255,   0, 255,  7), (  0, 255, 255,  8),
    (128,   0,   0,  9), (  0, 128,   0, 10), (  0,   0, 128, 11), (128, 128,   0, 12),
    (128,   0, 128, 13), (  0, 128, 128, 14), (192, 192, 192, 15), (128, 128, 128, 16),
    (153, 153, 255, 17), (153,  51, 102, 18), (255, 255, 204, 19), (204, 255, 255, 20),
    (102,   0, 102, 21), (255, 128, 128, 22), (  0, 102, 204, 23), (204, 204, 255, 24),
    (  0,   0, 128, 25), (255,   0, 255, 26), (255, 255,   0, 27), (  0, 255, 255, 28),
    (128,   0, 128, 29), (128,   0,   0, 30), (  0, 128, 128, 31), (  0,   0, 255, 32),
    (  0, 204, 255, 33), (204, 255, 255, 34), (204, 255, 204, 35), (255, 255, 153, 36),
    (153, 204, 255, 37), (255, 153, 204, 38), (204, 153, 255, 39), (255, 204, 153, 40),
    ( 51, 102, 255, 41), ( 51, 204, 204, 42), (153, 204,   0, 43), (255, 204,   0, 44),
    (255, 153,   0, 45), (255, 102,   0, 46), (102, 102, 153, 47), (150, 150, 150, 48),
    (  0,  51, 102, 49), ( 51, 153, 102, 50), (  0,  51,   0, 51), ( 51,  51,   0, 52),
    (153,  51,   0, 53), (153,  51, 102, 54), ( 51,  51, 153, 55), ( 51,  51,  51, 56),
]

CellInfoResult = tuple[Any, bool, bool]


class XLSWrapper:
    def __init__(self, data: bytes | bytearray):
        import xlrd2
        self._xlrd2 = xlrd2
        self.xls_workbook = xlrd2.open_workbook(file_contents=bytes(data), formatting_info=True)
        self._workbook_name = 'workbook.xls'
        self._macrosheets: dict[str, Boundsheet] | None = None
        self._worksheets: dict[str, Boundsheet] | None = None
        self._defined_names: dict[str, Any] | None = None
        control_chars = ''.join(map(chr, range(0, 32)))
        control_chars += ''.join(map(chr, range(127, 160)))
        control_chars += '\ufefe\uffff\ufeff\ufffe\uffef\ufff0\ufff1\ufff6\ufefd\udddd\ufffd'
        self._control_char_re = re.compile(f'[{re.escape(control_chars)}]')

    oNUM = 2
    oSTRG = 1
    oREF = -1
    oARR = 6

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> dict[str, Any]:
        if self._defined_names is None:
            self._defined_names = {}
            for name_obj, cells in self.xls_workbook.name_map.items():
                name = name_obj.lower()
                index = 1 if len(cells) > 1 else 0
                filtered_name = name.lower()
                if name != filtered_name:
                    if filtered_name in self._defined_names:
                        filtered_name = filtered_name + str(index)
                    if cells[0].result is not None:
                        self._defined_names[filtered_name] = cells[0].result.text

                if name in self._defined_names:
                    name = name + str(index)
                if cells[0].result is not None:
                    cell_location = cells[0].result.text
                    if cells[0].result.kind == self.oNUM:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oSTRG:
                        self._defined_names[name] = cells[0].result.text
                    elif cells[0].result.kind == self.oARR:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oREF:
                        if '$' in cell_location:
                            self._defined_names[name] = cells[0].result.text
                        else:
                            curr_cell = cells[0].result
                            if 'auto_open' in name:
                                coords = curr_cell.value[0].coords
                                r = int(coords[3])
                                c = int(coords[5])
                                sheet_name = curr_cell.text.split('!')[0].replace("'", '')
                                cell_ref = f'${Cell.convert_to_column_name(c)}${r}'
                                self._defined_names[name] = f'{sheet_name}!{cell_ref}'
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: Any = []
        name = name.lower().replace('[', '')
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
            if isinstance(result, list) and len(result) == 0:
                for defined_name, cell_address in self.get_defined_names().items():
                    lastidx = 0
                    match = True
                    for c in name:
                        idx = defined_name.find(c, lastidx)
                        if idx == -1:
                            match = False
                            break
                        lastidx = idx
                    if match:
                        result.append((defined_name, cell_address))
        return result

    def _load_cells(self, boundsheet: Boundsheet, xls_sheet: Any) -> None:
        try:
            for xls_cell in xls_sheet.get_used_cells():
                cell = Cell()
                cell.sheet = boundsheet
                if xls_cell.formula is not None and len(xls_cell.formula) > 0:
                    cell.formula = f'={xls_cell.formula}'
                cell.value = xls_cell.value
                cell.row = xls_cell.row + 1
                cell.column = Cell.convert_to_column_name(xls_cell.column + 1)
                if cell.value is not None or cell.formula is not None:
                    boundsheet.add_cell(cell)
        except Exception:
            pass

    def get_macrosheets(self) -> dict[str, Boundsheet]:
        if self._macrosheets is None:
            import xlrd2
            self._macrosheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_MACROSHEET:
                    macrosheet = Boundsheet(sheet.name, 'Macrosheet')
                    self._load_cells(macrosheet, sheet)
                    self._macrosheets[sheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> dict[str, Boundsheet]:
        if self._worksheets is None:
            import xlrd2
            self._worksheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_WORKSHEET:
                    worksheet = Boundsheet(sheet.name, 'Worksheet')
                    self._load_cells(worksheet, sheet)
                    self._worksheets[sheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        sheet = self.xls_workbook.sheet_by_name(sheet_name)
        irow = int(row) - 1
        column = Cell.convert_to_column_index(col) - 1
        tid = int(float(info_type_id))
        data: Any = None
        not_exist = False
        not_implemented = False

        if tid == 5:
            data = sheet.cell(irow, column).value
        elif tid == 17:
            if irow in sheet.rowinfo_map:
                data = sheet.rowinfo_map[irow].height
            else:
                data = sheet.default_row_height
            data = round(Cell.convert_twip_to_point(data) * 4) / 4
        else:
            if (irow, column) in sheet.used_cells:
                cell = sheet.cell(irow, column)
                if cell.xf_index is not None and cell.xf_index < len(self.xls_workbook.xf_list):
                    fmt = self.xls_workbook.xf_list[cell.xf_index]
                    font = self.xls_workbook.font_list[fmt.font_index]
                else:
                    normal_style = self.xls_workbook.style_name_map['Normal'][1]
                    fmt = self.xls_workbook.xf_list[normal_style]
                    font = self.xls_workbook.font_list[fmt.font_index]
            else:
                normal_style = self.xls_workbook.style_name_map['Normal'][1]
                fmt = self.xls_workbook.xf_list[normal_style]
                font = self.xls_workbook.font_list[fmt.font_index]

            if tid == 8:
                data = fmt.alignment.hor_align + 1
            elif tid == 19:
                data = Cell.convert_twip_to_point(font.height)
            elif tid == 24:
                data = font.colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 38:
                data = fmt.background.pattern_colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 50:
                data = fmt.alignment.vert_align + 1
            else:
                not_implemented = True

        return data, not_exist, not_implemented


class XLSBWrapper:
    def __init__(self, data: bytes | bytearray):
        from pyxlsb2 import open_workbook
        self._xlsb_workbook = open_workbook(BytesIO(data))
        self._workbook_name = 'workbook.xlsb'
        self._macrosheets: dict[str, Boundsheet] | None = None
        self._worksheets: dict[str, Boundsheet] | None = None
        self._defined_names: dict[str, Any] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> dict[str, Any]:
        if self._defined_names is None:
            names: dict[str, Any] = {}
            for key, val in self._xlsb_workbook.defined_names.items():
                names[key.lower()] = key.lower(), val.formula
            self._defined_names = names
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        if full_match:
            if name.lower() in self.get_defined_names():
                result.append(self.get_defined_names()[name.lower()])
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name.lower()):
                    result.append(cell_address)
        return result

    def _load_cells(self, boundsheet: Boundsheet) -> None:
        from pyxlsb2.formula import Formula
        row_cnt = 0
        with self._xlsb_workbook.get_sheet_by_name(boundsheet.name) as sheet:
            for row in sheet:
                if row_cnt > 1048576:
                    break
                row_cnt += 1
                column_cnt = 0
                for cell in row:
                    if column_cnt > 16384:
                        break
                    tmp_cell = Cell()
                    tmp_cell.row = cell.row_num + 1
                    tmp_cell.column = Cell.convert_to_column_name(cell.col + 1)
                    tmp_cell.value = cell.value
                    tmp_cell.sheet = boundsheet
                    formula_str = Formula.parse(cell.formula)
                    if formula_str._tokens:
                        try:
                            tmp_cell.formula = f'={formula_str.stringify(self._xlsb_workbook)}'
                        except NotImplementedError:
                            pass
                        except Exception:
                            pass
                    if tmp_cell.value is not None or tmp_cell.formula is not None:
                        boundsheet.cells[tmp_cell.get_local_address()] = tmp_cell
                    column_cnt += 1

    def get_macrosheets(self) -> dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'macrosheet':
                    macrosheet = Boundsheet(xlsb_sheet.name, 'macrosheet')
                    self._load_cells(macrosheet)
                    self._macrosheets[macrosheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'worksheet':
                    worksheet = Boundsheet(xlsb_sheet.name, 'worksheet')
                    self._load_cells(worksheet)
                    self._worksheets[worksheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        return None, False, True


_OOXML_NS = {
    's' : 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
    'r' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
    'xm': 'http://schemas.microsoft.com/office/excel/2006/main',
    'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
    'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
    'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
}

_MACRO_REL_TYPES = {
    'http://schemas.microsoft.com/office/2006/relationships/xlMacrosheet',
    'http://schemas.microsoft.com/office/2006/relationships/xlIntlMacrosheet',
}
_WORKSHEET_REL_TYPE = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'


def _etfind(root: Any, path: str, ns: dict[str, str] = _OOXML_NS) -> Any:
    return root.find(path, ns)


def _etfindall(root: Any, path: str, ns: dict[str, str] = _OOXML_NS) -> list[Any]:
    return root.findall(path, ns)


class XLSMWrapper:
    def __init__(self, data: bytes | bytearray):
        self._data = BytesIO(data)
        self._workbook_name = 'workbook.xlsm'
        self._content_types: Any = None
        self._workbook_xml: Any = None
        self._workbook_rels: dict[str, tuple[str, str]] | None = None
        self._defined_names: dict[str, str] | None = None
        self._macrosheets: dict[str, Boundsheet] | None = None
        self._worksheets: dict[str, Boundsheet] | None = None
        self._shared_strings: list[str] | None = None
        self._style_xml: Any = None
        self._color_map: dict[tuple[int, int, int], int] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def _get_zip_files(self, patterns: list[str] | None = None) -> dict[str, bytes]:
        result: dict[str, bytes] = {}
        if not patterns:
            patterns = ['*']
        with ZipFile(self._data) as zf:
            for name in zf.namelist():
                for pat in patterns:
                    if name == pat or fnmatch(name, pat):
                        result[name] = zf.read(name)
            if not result:
                for name in zf.namelist():
                    for pat in patterns:
                        alt_pat = pat.replace('/', '\\')
                        if name == alt_pat or fnmatch(name, alt_pat):
                            result[name.replace('\\', '/')] = zf.read(name)
        return result

    def _parse_xml(self, path: str, ignore_pattern: str | None = None) -> Any:
        try:
            from defusedxml.ElementTree import fromstring
        except ImportError:
            from xml.etree.ElementTree import fromstring
        if path.startswith('/'):
            path = path[1:]
        files = self._get_zip_files([path])
        if len(files) != 1:
            return None
        content = next(iter(files.values())).decode('utf-8')
        if ignore_pattern:
            content = re.sub(ignore_pattern, '', content)
        return fromstring(content)

    def _get_workbook_path(self) -> tuple[str, str, str]:
        wb_path = 'xl/workbook.xml'
        ct = self._get_content_types()
        if ct is not None:
            for override in _etfindall(ct, './/ct:Override', _OOXML_NS) + _etfindall(ct, './/{%s}Override' % _OOXML_NS['ct']):
                ctype = override.get('ContentType', '')
                if 'sheet.main+xml' in ctype:
                    wb_path = override.get('PartName', wb_path).lstrip('/')
                    break
        wb_path = wb_path.lstrip('/')
        if '/' in wb_path:
            base_dir = wb_path[:wb_path.index('/')]
            name = wb_path[wb_path.index('/') + 1:]
        else:
            base_dir = ''
            name = wb_path
        return wb_path, base_dir, name

    def _get_content_types(self) -> Any:
        if self._content_types is None:
            self._content_types = self._parse_xml('[Content_Types].xml')
        return self._content_types

    def _get_workbook(self) -> Any:
        if self._workbook_xml is None:
            wb_path, _, _ = self._get_workbook_path()
            self._workbook_xml = self._parse_xml(wb_path)
        return self._workbook_xml

    def _get_rels(self) -> dict[str, tuple[str, str]]:
        if self._workbook_rels is None:
            self._workbook_rels = {}
            _, base_dir, name = self._get_workbook_path()
            rels_path = f'{base_dir}/_rels/{name}.rels'
            rels_xml = self._parse_xml(rels_path)
            if rels_xml is not None:
                ns = _OOXML_NS['pr']
                for rel in list(rels_xml):
                    tag = rel.tag
                    if tag == f'{{{ns}}}Relationship' or tag == 'Relationship':
                        rid = rel.get('Id', '')
                        target = rel.get('Target', '')
                        rtype = rel.get('Type', '')
                        self._workbook_rels[rid] = (target, rtype)
        return self._workbook_rels

    def _get_sheet_info(self, rid: str) -> tuple[str | None, str | None]:
        rels = self._get_rels()
        if rid not in rels:
            return None, None
        target, rtype = rels[rid]
        if rtype in _MACRO_REL_TYPES:
            return 'Macrosheet', target
        elif rtype == _WORKSHEET_REL_TYPE:
            return 'Worksheet', target
        return 'Unknown', target

    def get_defined_names(self) -> dict[str, str]:
        if self._defined_names is None:
            self._defined_names = {}
            wb = self._get_workbook()
            if wb is None:
                return self._defined_names
            sn = _OOXML_NS['s']
            dn_container = _etfind(wb, f'{{{sn}}}definedNames')
            if dn_container is None:
                return self._defined_names
            for dn in list(dn_container):
                name_attr = dn.get('name', '')
                name_key = name_attr.replace('_xlnm.', '').lower()
                cdata = dn.text or ''
                self._defined_names[name_key] = cdata
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        name = name.lower()
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
        return result

    def _get_shared_strings(self) -> list[str] | None:
        if self._shared_strings is None:
            _, base_dir, _ = self._get_workbook_path()
            ss_xml = self._parse_xml(f'{base_dir}/sharedStrings.xml')
            if ss_xml is not None:
                sn = _OOXML_NS['s']
                self._shared_strings = []
                for si in _etfindall(ss_xml, f'{{{sn}}}si'):
                    t_elem = si.find(f'{{{sn}}}t')
                    if t_elem is not None and t_elem.text is not None:
                        self._shared_strings.append(t_elem.text)
                    else:
                        r_elems = si.findall(f'{{{sn}}}r')
                        if r_elems:
                            t_in_r = r_elems[0].find(f'{{{sn}}}t')
                            if t_in_r is not None and t_in_r.text is not None:
                                self._shared_strings.append(t_in_r.text)
                            else:
                                self._shared_strings.append('')
                        else:
                            self._shared_strings.append('')
        return self._shared_strings

    def _get_sheet_infos(self, types: list[str]) -> list[dict[str, Any]]:
        result: list[dict[str, Any]] = []
        wb = self._get_workbook()
        if wb is None:
            return result
        sn = _OOXML_NS['s']
        sheets_el = _etfind(wb, f'{{{sn}}}sheets')
        if sheets_el is None:
            return result
        _, base_dir, _ = self._get_workbook_path()
        seen: set[str] = set()
        for sheet_el in list(sheets_el):
            rid = sheet_el.get(f'{{{_OOXML_NS["r"]}}}id', '')
            name = sheet_el.get('name', '')
            sheet_type, rel_path = self._get_sheet_info(rid)
            if rel_path is not None and sheet_type in types and name not in seen:
                path = f'{base_dir}/{rel_path}'
                sheet = Boundsheet(name, sheet_type)
                sheet_xml = self._parse_xml(path, ignore_pattern=r'<c[^>]+/>')
                result.append({'sheet': sheet, 'sheet_path': path, 'sheet_xml': sheet_xml})
                seen.add(name)
        return result

    def _load_macro_cells(
        self, sheet: Boundsheet, sheet_xml: Any, macrosheet_names: list[str],
    ) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        xm_ns = _OOXML_NS['xm']
        root = sheet_xml
        sheet_data = (
            _etfind(root, f'{{{xm_ns}}}sheetData')
            or _etfind(root, f'{{{sn}}}sheetData')
        )
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f') or cell_el.find(f'{{{xm_ns}}}f')
                if f_el is not None:
                    if f_el.get('bx') == '1':
                        text = f_el.text or ''
                        if text:
                            eq_pos = text.find('=')
                            if eq_pos > 0:
                                formula_text = f'=SET.NAME("{text[:eq_pos]}",{text[eq_pos + 1:]})'
                    else:
                        if f_el.text:
                            formula_text = f'={f_el.text}'
                if formula_text:
                    for ms_name in macrosheet_names:
                        if f'{ms_name.lower()}!' in formula_text.lower():
                            formula_text = re.sub(
                                f'{re.escape(ms_name)}!',
                                f"'{ms_name}'!",
                                formula_text,
                                flags=re.IGNORECASE,
                            )
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                cached_str = cell_el.get('t') == 'str'
                v_el = cell_el.find(f'{{{sn}}}v') or cell_el.find(f'{{{xm_ns}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                if formula_text or value_text:
                    cell = Cell()
                    _, cell.column, row_str = Cell.parse_cell_addr(location)
                    if row_str is not None:
                        cell.row = int(row_str)
                    cell.sheet = sheet
                    if not cached_str:
                        cell.formula = formula_text
                    cell.value = value_text
                    sheet.cells[location] = cell
                    for attr_name in cell_el.attrib:
                        if attr_name != 'r':
                            cell.attributes[attr_name] = cell_el.get(attr_name)

    def _load_worksheet_cells(self, sheet: Boundsheet, sheet_xml: Any) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        root = sheet_xml
        sheet_data = _etfind(root, f'{{{sn}}}sheetData')
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f')
                if f_el is not None and f_el.text:
                    formula_text = f'={f_el.text}'
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                v_el = cell_el.find(f'{{{sn}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                cell = Cell()
                _, cell.column, row_str = Cell.parse_cell_addr(location)
                if row_str is not None:
                    cell.row = int(row_str)
                cell.sheet = sheet
                cell.formula = formula_text
                cell.value = value_text
                sheet.cells[location] = cell
                for attr_name in cell_el.attrib:
                    if attr_name != 'r':
                        cell.attributes[attr_name] = cell_el.get(attr_name)

    def get_macrosheets(self) -> dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            infos = self._get_sheet_infos(['Macrosheet'])
            macrosheet_names = [info['sheet'].name for info in infos]
            sn = _OOXML_NS['s']
            xm_ns = _OOXML_NS['xm']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_macro_cells(info['sheet'], info['sheet_xml'], macrosheet_names)
                    root = info['sheet_xml']
                    fmt_pr = (
                        _etfind(root, f'{{{xm_ns}}}sheetFormatPr')
                        or _etfind(root, f'{{{sn}}}sheetFormatPr')
                    )
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._macrosheets[info['sheet'].name] = info['sheet']
        return self._macrosheets

    def get_worksheets(self) -> dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            infos = self._get_sheet_infos(['Worksheet'])
            sn = _OOXML_NS['s']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_worksheet_cells(info['sheet'], info['sheet_xml'])
                    root = info['sheet_xml']
                    fmt_pr = _etfind(root, f'{{{sn}}}sheetFormatPr')
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._worksheets[info['sheet'].name] = info['sheet']
        return self._worksheets

    def _get_style(self) -> Any:
        if self._style_xml is None:
            _, base_dir, _ = self._get_workbook_path()
            rels = self._get_rels()
            style_target = None
            for _, (target, rtype) in rels.items():
                if 'styles' in rtype:
                    style_target = target
                    break
            if style_target:
                self._style_xml = self._parse_xml(f'{base_dir}/{style_target}')
        return self._style_xml

    def _get_color_index(self, rgba_str: str) -> int | None:
        r = int(rgba_str[2:4], 16)
        g = int(rgba_str[4:6], 16)
        b = int(rgba_str[6:8], 16)
        if self._color_map is None:
            self._color_map = {}
            for cr, cg, cb, idx in _XLSM_COLOR_TABLE:
                if (cr, cg, cb) not in self._color_map:
                    self._color_map[(cr, cg, cb)] = idx
        return self._color_map.get((r, g, b))

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        data: Any = None
        not_exist = True
        not_implemented = False

        ms = self.get_macrosheets()
        if sheet_name not in ms:
            return data, not_exist, not_implemented
        sheet = ms[sheet_name]
        cell_addr = f'{col}{row}'
        tid = int(float(info_type_id))
        sn = _OOXML_NS['s']

        if tid == 17:
            style_xml = self._get_style()
            if row in sheet.row_attributes and RowAttribute.Height in sheet.row_attributes.get(row, {}):
                not_exist = False
                data = sheet.row_attributes[row][RowAttribute.Height]
            elif sheet.default_height is not None:
                data = sheet.default_height
            if data is not None:
                data = round(float(data) * 4) / 4
        else:
            style_xml = self._get_style()
            if style_xml is None:
                return data, not_exist, True
            not_exist = False
            cell_format = None
            font = None
            xfs = _etfind(style_xml, f'{{{sn}}}cellXfs')
            fonts_el = _etfind(style_xml, f'{{{sn}}}fonts')
            styles_el = _etfind(style_xml, f'{{{sn}}}cellStyles')
            style_xfs = _etfind(style_xml, f'{{{sn}}}cellStyleXfs')

            if cell_addr in sheet.cells:
                cell = sheet.cells[cell_addr]
                if 's' in cell.attributes and xfs is not None:
                    index = int(cell.attributes['s'])
                    xf_list = _etfindall(xfs, f'{{{sn}}}xf')
                    if index < len(xf_list):
                        cell_format = xf_list[index]
                        font_id_str = cell_format.get('fontId')
                        if font_id_str is not None and fonts_el is not None:
                            font_index = int(font_id_str)
                            font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                            if font_index < len(font_list):
                                font = font_list[font_index]

            if cell_format is None and styles_el is not None and style_xfs is not None:
                for cs in _etfindall(styles_el, f'{{{sn}}}cellStyle'):
                    if cs.get('name') == 'Normal':
                        xf_id = int(cs.get('xfId', '0'))
                        xf_list = _etfindall(style_xfs, f'{{{sn}}}xf')
                        if xf_id < len(xf_list):
                            cell_format = xf_list[xf_id]
                            font_id_str = cell_format.get('fontId')
                            if font_id_str is not None and fonts_el is not None:
                                font_index = int(font_id_str)
                                font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                                if font_index < len(font_list):
                                    font = font_list[font_index]
                        break

            if tid == 8:
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        h_map = {
                            'general': 1, 'left': 2, 'center': 3, 'right': 4,
                            'fill': 5, 'justify': 6, 'centercontinuous': 7, 'distributed': 8,
                        }
                        data = h_map.get(align.get('horizontal', 'general').lower(), 1)
                    else:
                        data = 1
            elif tid == 19:
                if font is not None:
                    sz = _etfind(font, f'{{{sn}}}sz')
                    if sz is not None:
                        data = float(sz.get('val', '11'))
            elif tid == 24:
                if font is not None:
                    color_el = _etfind(font, f'{{{sn}}}color')
                    if color_el is not None:
                        rgb = color_el.get('rgb')
                        if rgb:
                            data = self._get_color_index(rgb)
                        else:
                            data = 1
                    else:
                        data = 1
            elif tid == 38:
                if cell_format is not None:
                    fill_id_str = cell_format.get('fillId')
                    if fill_id_str is not None:
                        fills_el = _etfind(style_xml, f'{{{sn}}}fills')
                        if fills_el is not None:
                            fill_list = _etfindall(fills_el, f'{{{sn}}}fill')
                            fill_id = int(fill_id_str)
                            if fill_id < len(fill_list):
                                pf = _etfind(fill_list[fill_id], f'{{{sn}}}patternFill')
                                if pf is not None:
                                    fg = _etfind(pf, f'{{{sn}}}fgColor')
                                    if fg is not None:
                                        rgb = fg.get('rgb')
                                        if rgb:
                                            data = self._get_color_index(rgb)
                                        else:
                                            data = 0
                                    else:
                                        data = 0
            elif tid == 50:
                v_map = {'top': 1, 'center': 2, 'bottom': 3, 'justify': 4, 'distributed': 5}
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        data = v_map.get(align.get('vertical', 'bottom').lower(), 3)
                    else:
                        data = 3
                else:
                    data = 3
            else:
                not_implemented = True

        return data, not_exist, not_implemented

Classes

class XLSWrapper (data)

Expand source code Browse git

class XLSWrapper:
    def __init__(self, data: bytes | bytearray):
        import xlrd2
        self._xlrd2 = xlrd2
        self.xls_workbook = xlrd2.open_workbook(file_contents=bytes(data), formatting_info=True)
        self._workbook_name = 'workbook.xls'
        self._macrosheets: dict[str, Boundsheet] | None = None
        self._worksheets: dict[str, Boundsheet] | None = None
        self._defined_names: dict[str, Any] | None = None
        control_chars = ''.join(map(chr, range(0, 32)))
        control_chars += ''.join(map(chr, range(127, 160)))
        control_chars += '\ufefe\uffff\ufeff\ufffe\uffef\ufff0\ufff1\ufff6\ufefd\udddd\ufffd'
        self._control_char_re = re.compile(f'[{re.escape(control_chars)}]')

    oNUM = 2
    oSTRG = 1
    oREF = -1
    oARR = 6

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> dict[str, Any]:
        if self._defined_names is None:
            self._defined_names = {}
            for name_obj, cells in self.xls_workbook.name_map.items():
                name = name_obj.lower()
                index = 1 if len(cells) > 1 else 0
                filtered_name = name.lower()
                if name != filtered_name:
                    if filtered_name in self._defined_names:
                        filtered_name = filtered_name + str(index)
                    if cells[0].result is not None:
                        self._defined_names[filtered_name] = cells[0].result.text

                if name in self._defined_names:
                    name = name + str(index)
                if cells[0].result is not None:
                    cell_location = cells[0].result.text
                    if cells[0].result.kind == self.oNUM:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oSTRG:
                        self._defined_names[name] = cells[0].result.text
                    elif cells[0].result.kind == self.oARR:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oREF:
                        if '$' in cell_location:
                            self._defined_names[name] = cells[0].result.text
                        else:
                            curr_cell = cells[0].result
                            if 'auto_open' in name:
                                coords = curr_cell.value[0].coords
                                r = int(coords[3])
                                c = int(coords[5])
                                sheet_name = curr_cell.text.split('!')[0].replace("'", '')
                                cell_ref = f'${Cell.convert_to_column_name(c)}${r}'
                                self._defined_names[name] = f'{sheet_name}!{cell_ref}'
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: Any = []
        name = name.lower().replace('[', '')
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
            if isinstance(result, list) and len(result) == 0:
                for defined_name, cell_address in self.get_defined_names().items():
                    lastidx = 0
                    match = True
                    for c in name:
                        idx = defined_name.find(c, lastidx)
                        if idx == -1:
                            match = False
                            break
                        lastidx = idx
                    if match:
                        result.append((defined_name, cell_address))
        return result

    def _load_cells(self, boundsheet: Boundsheet, xls_sheet: Any) -> None:
        try:
            for xls_cell in xls_sheet.get_used_cells():
                cell = Cell()
                cell.sheet = boundsheet
                if xls_cell.formula is not None and len(xls_cell.formula) > 0:
                    cell.formula = f'={xls_cell.formula}'
                cell.value = xls_cell.value
                cell.row = xls_cell.row + 1
                cell.column = Cell.convert_to_column_name(xls_cell.column + 1)
                if cell.value is not None or cell.formula is not None:
                    boundsheet.add_cell(cell)
        except Exception:
            pass

    def get_macrosheets(self) -> dict[str, Boundsheet]:
        if self._macrosheets is None:
            import xlrd2
            self._macrosheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_MACROSHEET:
                    macrosheet = Boundsheet(sheet.name, 'Macrosheet')
                    self._load_cells(macrosheet, sheet)
                    self._macrosheets[sheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> dict[str, Boundsheet]:
        if self._worksheets is None:
            import xlrd2
            self._worksheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_WORKSHEET:
                    worksheet = Boundsheet(sheet.name, 'Worksheet')
                    self._load_cells(worksheet, sheet)
                    self._worksheets[sheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        sheet = self.xls_workbook.sheet_by_name(sheet_name)
        irow = int(row) - 1
        column = Cell.convert_to_column_index(col) - 1
        tid = int(float(info_type_id))
        data: Any = None
        not_exist = False
        not_implemented = False

        if tid == 5:
            data = sheet.cell(irow, column).value
        elif tid == 17:
            if irow in sheet.rowinfo_map:
                data = sheet.rowinfo_map[irow].height
            else:
                data = sheet.default_row_height
            data = round(Cell.convert_twip_to_point(data) * 4) / 4
        else:
            if (irow, column) in sheet.used_cells:
                cell = sheet.cell(irow, column)
                if cell.xf_index is not None and cell.xf_index < len(self.xls_workbook.xf_list):
                    fmt = self.xls_workbook.xf_list[cell.xf_index]
                    font = self.xls_workbook.font_list[fmt.font_index]
                else:
                    normal_style = self.xls_workbook.style_name_map['Normal'][1]
                    fmt = self.xls_workbook.xf_list[normal_style]
                    font = self.xls_workbook.font_list[fmt.font_index]
            else:
                normal_style = self.xls_workbook.style_name_map['Normal'][1]
                fmt = self.xls_workbook.xf_list[normal_style]
                font = self.xls_workbook.font_list[fmt.font_index]

            if tid == 8:
                data = fmt.alignment.hor_align + 1
            elif tid == 19:
                data = Cell.convert_twip_to_point(font.height)
            elif tid == 24:
                data = font.colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 38:
                data = fmt.background.pattern_colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 50:
                data = fmt.alignment.vert_align + 1
            else:
                not_implemented = True

        return data, not_exist, not_implemented

Class variables

var oNUM: The type of the None singleton.
var oSTRG: The type of the None singleton.
var oREF: The type of the None singleton.
var oARR: The type of the None singleton.

Methods

def get_xl_international_char(self, flag_name)

Expand source code Browse git

def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
    return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

def get_workbook_name(self)

Expand source code Browse git

def get_workbook_name(self) -> str:
    return self._workbook_name

def get_defined_names(self)

Expand source code Browse git

def get_defined_names(self) -> dict[str, Any]:
    if self._defined_names is None:
        self._defined_names = {}
        for name_obj, cells in self.xls_workbook.name_map.items():
            name = name_obj.lower()
            index = 1 if len(cells) > 1 else 0
            filtered_name = name.lower()
            if name != filtered_name:
                if filtered_name in self._defined_names:
                    filtered_name = filtered_name + str(index)
                if cells[0].result is not None:
                    self._defined_names[filtered_name] = cells[0].result.text

            if name in self._defined_names:
                name = name + str(index)
            if cells[0].result is not None:
                cell_location = cells[0].result.text
                if cells[0].result.kind == self.oNUM:
                    self._defined_names[name] = cells[0].result.value
                elif cells[0].result.kind == self.oSTRG:
                    self._defined_names[name] = cells[0].result.text
                elif cells[0].result.kind == self.oARR:
                    self._defined_names[name] = cells[0].result.value
                elif cells[0].result.kind == self.oREF:
                    if '$' in cell_location:
                        self._defined_names[name] = cells[0].result.text
                    else:
                        curr_cell = cells[0].result
                        if 'auto_open' in name:
                            coords = curr_cell.value[0].coords
                            r = int(coords[3])
                            c = int(coords[5])
                            sheet_name = curr_cell.text.split('!')[0].replace("'", '')
                            cell_ref = f'${Cell.convert_to_column_name(c)}${r}'
                            self._defined_names[name] = f'{sheet_name}!{cell_ref}'
    return self._defined_names

def get_defined_name(self, name, full_match=True)

Expand source code Browse git

def get_defined_name(self, name: str, full_match: bool = True) -> Any:
    result: Any = []
    name = name.lower().replace('[', '')
    if full_match:
        if name in self.get_defined_names():
            result = self._defined_names[name]  # type: ignore
    else:
        for defined_name, cell_address in self.get_defined_names().items():
            if defined_name.startswith(name):
                result.append((defined_name, cell_address))
        if isinstance(result, list) and len(result) == 0:
            for defined_name, cell_address in self.get_defined_names().items():
                lastidx = 0
                match = True
                for c in name:
                    idx = defined_name.find(c, lastidx)
                    if idx == -1:
                        match = False
                        break
                    lastidx = idx
                if match:
                    result.append((defined_name, cell_address))
    return result

def get_macrosheets(self)

Expand source code Browse git

def get_macrosheets(self) -> dict[str, Boundsheet]:
    if self._macrosheets is None:
        import xlrd2
        self._macrosheets = {}
        for sheet in self.xls_workbook.sheets():
            if sheet.boundsheet_type == xlrd2.biffh.XL_MACROSHEET:
                macrosheet = Boundsheet(sheet.name, 'Macrosheet')
                self._load_cells(macrosheet, sheet)
                self._macrosheets[sheet.name] = macrosheet
    return self._macrosheets

def get_worksheets(self)

Expand source code Browse git

def get_worksheets(self) -> dict[str, Boundsheet]:
    if self._worksheets is None:
        import xlrd2
        self._worksheets = {}
        for sheet in self.xls_workbook.sheets():
            if sheet.boundsheet_type == xlrd2.biffh.XL_WORKSHEET:
                worksheet = Boundsheet(sheet.name, 'Worksheet')
                self._load_cells(worksheet, sheet)
                self._worksheets[sheet.name] = worksheet
    return self._worksheets

def get_cell_info(self, sheet_name, col, row, info_type_id)

Expand source code Browse git

def get_cell_info(
    self, sheet_name: str, col: str, row: str, info_type_id: str,
) -> CellInfoResult:
    sheet = self.xls_workbook.sheet_by_name(sheet_name)
    irow = int(row) - 1
    column = Cell.convert_to_column_index(col) - 1
    tid = int(float(info_type_id))
    data: Any = None
    not_exist = False
    not_implemented = False

    if tid == 5:
        data = sheet.cell(irow, column).value
    elif tid == 17:
        if irow in sheet.rowinfo_map:
            data = sheet.rowinfo_map[irow].height
        else:
            data = sheet.default_row_height
        data = round(Cell.convert_twip_to_point(data) * 4) / 4
    else:
        if (irow, column) in sheet.used_cells:
            cell = sheet.cell(irow, column)
            if cell.xf_index is not None and cell.xf_index < len(self.xls_workbook.xf_list):
                fmt = self.xls_workbook.xf_list[cell.xf_index]
                font = self.xls_workbook.font_list[fmt.font_index]
            else:
                normal_style = self.xls_workbook.style_name_map['Normal'][1]
                fmt = self.xls_workbook.xf_list[normal_style]
                font = self.xls_workbook.font_list[fmt.font_index]
        else:
            normal_style = self.xls_workbook.style_name_map['Normal'][1]
            fmt = self.xls_workbook.xf_list[normal_style]
            font = self.xls_workbook.font_list[fmt.font_index]

        if tid == 8:
            data = fmt.alignment.hor_align + 1
        elif tid == 19:
            data = Cell.convert_twip_to_point(font.height)
        elif tid == 24:
            data = font.colour_index - 7 if font.colour_index > 7 else font.colour_index
        elif tid == 38:
            data = fmt.background.pattern_colour_index - 7 if font.colour_index > 7 else font.colour_index
        elif tid == 50:
            data = fmt.alignment.vert_align + 1
        else:
            not_implemented = True

    return data, not_exist, not_implemented

class XLSBWrapper (data)

Expand source code Browse git

class XLSBWrapper:
    def __init__(self, data: bytes | bytearray):
        from pyxlsb2 import open_workbook
        self._xlsb_workbook = open_workbook(BytesIO(data))
        self._workbook_name = 'workbook.xlsb'
        self._macrosheets: dict[str, Boundsheet] | None = None
        self._worksheets: dict[str, Boundsheet] | None = None
        self._defined_names: dict[str, Any] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> dict[str, Any]:
        if self._defined_names is None:
            names: dict[str, Any] = {}
            for key, val in self._xlsb_workbook.defined_names.items():
                names[key.lower()] = key.lower(), val.formula
            self._defined_names = names
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        if full_match:
            if name.lower() in self.get_defined_names():
                result.append(self.get_defined_names()[name.lower()])
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name.lower()):
                    result.append(cell_address)
        return result

    def _load_cells(self, boundsheet: Boundsheet) -> None:
        from pyxlsb2.formula import Formula
        row_cnt = 0
        with self._xlsb_workbook.get_sheet_by_name(boundsheet.name) as sheet:
            for row in sheet:
                if row_cnt > 1048576:
                    break
                row_cnt += 1
                column_cnt = 0
                for cell in row:
                    if column_cnt > 16384:
                        break
                    tmp_cell = Cell()
                    tmp_cell.row = cell.row_num + 1
                    tmp_cell.column = Cell.convert_to_column_name(cell.col + 1)
                    tmp_cell.value = cell.value
                    tmp_cell.sheet = boundsheet
                    formula_str = Formula.parse(cell.formula)
                    if formula_str._tokens:
                        try:
                            tmp_cell.formula = f'={formula_str.stringify(self._xlsb_workbook)}'
                        except NotImplementedError:
                            pass
                        except Exception:
                            pass
                    if tmp_cell.value is not None or tmp_cell.formula is not None:
                        boundsheet.cells[tmp_cell.get_local_address()] = tmp_cell
                    column_cnt += 1

    def get_macrosheets(self) -> dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'macrosheet':
                    macrosheet = Boundsheet(xlsb_sheet.name, 'macrosheet')
                    self._load_cells(macrosheet)
                    self._macrosheets[macrosheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'worksheet':
                    worksheet = Boundsheet(xlsb_sheet.name, 'worksheet')
                    self._load_cells(worksheet)
                    self._worksheets[worksheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        return None, False, True

Methods

def get_xl_international_char(self, flag_name)

Expand source code Browse git

def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
    return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

def get_workbook_name(self)

Expand source code Browse git

def get_workbook_name(self) -> str:
    return self._workbook_name

def get_defined_names(self)

Expand source code Browse git

def get_defined_names(self) -> dict[str, Any]:
    if self._defined_names is None:
        names: dict[str, Any] = {}
        for key, val in self._xlsb_workbook.defined_names.items():
            names[key.lower()] = key.lower(), val.formula
        self._defined_names = names
    return self._defined_names

def get_defined_name(self, name, full_match=True)

Expand source code Browse git

def get_defined_name(self, name: str, full_match: bool = True) -> Any:
    result: list[Any] = []
    if full_match:
        if name.lower() in self.get_defined_names():
            result.append(self.get_defined_names()[name.lower()])
    else:
        for defined_name, cell_address in self.get_defined_names().items():
            if defined_name.startswith(name.lower()):
                result.append(cell_address)
    return result

def get_macrosheets(self)

Expand source code Browse git

def get_macrosheets(self) -> dict[str, Boundsheet]:
    if self._macrosheets is None:
        self._macrosheets = {}
        for xlsb_sheet in self._xlsb_workbook.sheets:
            if xlsb_sheet.type == 'macrosheet':
                macrosheet = Boundsheet(xlsb_sheet.name, 'macrosheet')
                self._load_cells(macrosheet)
                self._macrosheets[macrosheet.name] = macrosheet
    return self._macrosheets

def get_worksheets(self)

Expand source code Browse git

def get_worksheets(self) -> dict[str, Boundsheet]:
    if self._worksheets is None:
        self._worksheets = {}
        for xlsb_sheet in self._xlsb_workbook.sheets:
            if xlsb_sheet.type == 'worksheet':
                worksheet = Boundsheet(xlsb_sheet.name, 'worksheet')
                self._load_cells(worksheet)
                self._worksheets[worksheet.name] = worksheet
    return self._worksheets

def get_cell_info(self, sheet_name, col, row, info_type_id)

Expand source code Browse git

def get_cell_info(
    self, sheet_name: str, col: str, row: str, info_type_id: str,
) -> CellInfoResult:
    return None, False, True

class XLSMWrapper (data)

Expand source code Browse git

class XLSMWrapper:
    def __init__(self, data: bytes | bytearray):
        self._data = BytesIO(data)
        self._workbook_name = 'workbook.xlsm'
        self._content_types: Any = None
        self._workbook_xml: Any = None
        self._workbook_rels: dict[str, tuple[str, str]] | None = None
        self._defined_names: dict[str, str] | None = None
        self._macrosheets: dict[str, Boundsheet] | None = None
        self._worksheets: dict[str, Boundsheet] | None = None
        self._shared_strings: list[str] | None = None
        self._style_xml: Any = None
        self._color_map: dict[tuple[int, int, int], int] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def _get_zip_files(self, patterns: list[str] | None = None) -> dict[str, bytes]:
        result: dict[str, bytes] = {}
        if not patterns:
            patterns = ['*']
        with ZipFile(self._data) as zf:
            for name in zf.namelist():
                for pat in patterns:
                    if name == pat or fnmatch(name, pat):
                        result[name] = zf.read(name)
            if not result:
                for name in zf.namelist():
                    for pat in patterns:
                        alt_pat = pat.replace('/', '\\')
                        if name == alt_pat or fnmatch(name, alt_pat):
                            result[name.replace('\\', '/')] = zf.read(name)
        return result

    def _parse_xml(self, path: str, ignore_pattern: str | None = None) -> Any:
        try:
            from defusedxml.ElementTree import fromstring
        except ImportError:
            from xml.etree.ElementTree import fromstring
        if path.startswith('/'):
            path = path[1:]
        files = self._get_zip_files([path])
        if len(files) != 1:
            return None
        content = next(iter(files.values())).decode('utf-8')
        if ignore_pattern:
            content = re.sub(ignore_pattern, '', content)
        return fromstring(content)

    def _get_workbook_path(self) -> tuple[str, str, str]:
        wb_path = 'xl/workbook.xml'
        ct = self._get_content_types()
        if ct is not None:
            for override in _etfindall(ct, './/ct:Override', _OOXML_NS) + _etfindall(ct, './/{%s}Override' % _OOXML_NS['ct']):
                ctype = override.get('ContentType', '')
                if 'sheet.main+xml' in ctype:
                    wb_path = override.get('PartName', wb_path).lstrip('/')
                    break
        wb_path = wb_path.lstrip('/')
        if '/' in wb_path:
            base_dir = wb_path[:wb_path.index('/')]
            name = wb_path[wb_path.index('/') + 1:]
        else:
            base_dir = ''
            name = wb_path
        return wb_path, base_dir, name

    def _get_content_types(self) -> Any:
        if self._content_types is None:
            self._content_types = self._parse_xml('[Content_Types].xml')
        return self._content_types

    def _get_workbook(self) -> Any:
        if self._workbook_xml is None:
            wb_path, _, _ = self._get_workbook_path()
            self._workbook_xml = self._parse_xml(wb_path)
        return self._workbook_xml

    def _get_rels(self) -> dict[str, tuple[str, str]]:
        if self._workbook_rels is None:
            self._workbook_rels = {}
            _, base_dir, name = self._get_workbook_path()
            rels_path = f'{base_dir}/_rels/{name}.rels'
            rels_xml = self._parse_xml(rels_path)
            if rels_xml is not None:
                ns = _OOXML_NS['pr']
                for rel in list(rels_xml):
                    tag = rel.tag
                    if tag == f'{{{ns}}}Relationship' or tag == 'Relationship':
                        rid = rel.get('Id', '')
                        target = rel.get('Target', '')
                        rtype = rel.get('Type', '')
                        self._workbook_rels[rid] = (target, rtype)
        return self._workbook_rels

    def _get_sheet_info(self, rid: str) -> tuple[str | None, str | None]:
        rels = self._get_rels()
        if rid not in rels:
            return None, None
        target, rtype = rels[rid]
        if rtype in _MACRO_REL_TYPES:
            return 'Macrosheet', target
        elif rtype == _WORKSHEET_REL_TYPE:
            return 'Worksheet', target
        return 'Unknown', target

    def get_defined_names(self) -> dict[str, str]:
        if self._defined_names is None:
            self._defined_names = {}
            wb = self._get_workbook()
            if wb is None:
                return self._defined_names
            sn = _OOXML_NS['s']
            dn_container = _etfind(wb, f'{{{sn}}}definedNames')
            if dn_container is None:
                return self._defined_names
            for dn in list(dn_container):
                name_attr = dn.get('name', '')
                name_key = name_attr.replace('_xlnm.', '').lower()
                cdata = dn.text or ''
                self._defined_names[name_key] = cdata
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        name = name.lower()
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
        return result

    def _get_shared_strings(self) -> list[str] | None:
        if self._shared_strings is None:
            _, base_dir, _ = self._get_workbook_path()
            ss_xml = self._parse_xml(f'{base_dir}/sharedStrings.xml')
            if ss_xml is not None:
                sn = _OOXML_NS['s']
                self._shared_strings = []
                for si in _etfindall(ss_xml, f'{{{sn}}}si'):
                    t_elem = si.find(f'{{{sn}}}t')
                    if t_elem is not None and t_elem.text is not None:
                        self._shared_strings.append(t_elem.text)
                    else:
                        r_elems = si.findall(f'{{{sn}}}r')
                        if r_elems:
                            t_in_r = r_elems[0].find(f'{{{sn}}}t')
                            if t_in_r is not None and t_in_r.text is not None:
                                self._shared_strings.append(t_in_r.text)
                            else:
                                self._shared_strings.append('')
                        else:
                            self._shared_strings.append('')
        return self._shared_strings

    def _get_sheet_infos(self, types: list[str]) -> list[dict[str, Any]]:
        result: list[dict[str, Any]] = []
        wb = self._get_workbook()
        if wb is None:
            return result
        sn = _OOXML_NS['s']
        sheets_el = _etfind(wb, f'{{{sn}}}sheets')
        if sheets_el is None:
            return result
        _, base_dir, _ = self._get_workbook_path()
        seen: set[str] = set()
        for sheet_el in list(sheets_el):
            rid = sheet_el.get(f'{{{_OOXML_NS["r"]}}}id', '')
            name = sheet_el.get('name', '')
            sheet_type, rel_path = self._get_sheet_info(rid)
            if rel_path is not None and sheet_type in types and name not in seen:
                path = f'{base_dir}/{rel_path}'
                sheet = Boundsheet(name, sheet_type)
                sheet_xml = self._parse_xml(path, ignore_pattern=r'<c[^>]+/>')
                result.append({'sheet': sheet, 'sheet_path': path, 'sheet_xml': sheet_xml})
                seen.add(name)
        return result

    def _load_macro_cells(
        self, sheet: Boundsheet, sheet_xml: Any, macrosheet_names: list[str],
    ) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        xm_ns = _OOXML_NS['xm']
        root = sheet_xml
        sheet_data = (
            _etfind(root, f'{{{xm_ns}}}sheetData')
            or _etfind(root, f'{{{sn}}}sheetData')
        )
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f') or cell_el.find(f'{{{xm_ns}}}f')
                if f_el is not None:
                    if f_el.get('bx') == '1':
                        text = f_el.text or ''
                        if text:
                            eq_pos = text.find('=')
                            if eq_pos > 0:
                                formula_text = f'=SET.NAME("{text[:eq_pos]}",{text[eq_pos + 1:]})'
                    else:
                        if f_el.text:
                            formula_text = f'={f_el.text}'
                if formula_text:
                    for ms_name in macrosheet_names:
                        if f'{ms_name.lower()}!' in formula_text.lower():
                            formula_text = re.sub(
                                f'{re.escape(ms_name)}!',
                                f"'{ms_name}'!",
                                formula_text,
                                flags=re.IGNORECASE,
                            )
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                cached_str = cell_el.get('t') == 'str'
                v_el = cell_el.find(f'{{{sn}}}v') or cell_el.find(f'{{{xm_ns}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                if formula_text or value_text:
                    cell = Cell()
                    _, cell.column, row_str = Cell.parse_cell_addr(location)
                    if row_str is not None:
                        cell.row = int(row_str)
                    cell.sheet = sheet
                    if not cached_str:
                        cell.formula = formula_text
                    cell.value = value_text
                    sheet.cells[location] = cell
                    for attr_name in cell_el.attrib:
                        if attr_name != 'r':
                            cell.attributes[attr_name] = cell_el.get(attr_name)

    def _load_worksheet_cells(self, sheet: Boundsheet, sheet_xml: Any) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        root = sheet_xml
        sheet_data = _etfind(root, f'{{{sn}}}sheetData')
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f')
                if f_el is not None and f_el.text:
                    formula_text = f'={f_el.text}'
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                v_el = cell_el.find(f'{{{sn}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                cell = Cell()
                _, cell.column, row_str = Cell.parse_cell_addr(location)
                if row_str is not None:
                    cell.row = int(row_str)
                cell.sheet = sheet
                cell.formula = formula_text
                cell.value = value_text
                sheet.cells[location] = cell
                for attr_name in cell_el.attrib:
                    if attr_name != 'r':
                        cell.attributes[attr_name] = cell_el.get(attr_name)

    def get_macrosheets(self) -> dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            infos = self._get_sheet_infos(['Macrosheet'])
            macrosheet_names = [info['sheet'].name for info in infos]
            sn = _OOXML_NS['s']
            xm_ns = _OOXML_NS['xm']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_macro_cells(info['sheet'], info['sheet_xml'], macrosheet_names)
                    root = info['sheet_xml']
                    fmt_pr = (
                        _etfind(root, f'{{{xm_ns}}}sheetFormatPr')
                        or _etfind(root, f'{{{sn}}}sheetFormatPr')
                    )
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._macrosheets[info['sheet'].name] = info['sheet']
        return self._macrosheets

    def get_worksheets(self) -> dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            infos = self._get_sheet_infos(['Worksheet'])
            sn = _OOXML_NS['s']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_worksheet_cells(info['sheet'], info['sheet_xml'])
                    root = info['sheet_xml']
                    fmt_pr = _etfind(root, f'{{{sn}}}sheetFormatPr')
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._worksheets[info['sheet'].name] = info['sheet']
        return self._worksheets

    def _get_style(self) -> Any:
        if self._style_xml is None:
            _, base_dir, _ = self._get_workbook_path()
            rels = self._get_rels()
            style_target = None
            for _, (target, rtype) in rels.items():
                if 'styles' in rtype:
                    style_target = target
                    break
            if style_target:
                self._style_xml = self._parse_xml(f'{base_dir}/{style_target}')
        return self._style_xml

    def _get_color_index(self, rgba_str: str) -> int | None:
        r = int(rgba_str[2:4], 16)
        g = int(rgba_str[4:6], 16)
        b = int(rgba_str[6:8], 16)
        if self._color_map is None:
            self._color_map = {}
            for cr, cg, cb, idx in _XLSM_COLOR_TABLE:
                if (cr, cg, cb) not in self._color_map:
                    self._color_map[(cr, cg, cb)] = idx
        return self._color_map.get((r, g, b))

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        data: Any = None
        not_exist = True
        not_implemented = False

        ms = self.get_macrosheets()
        if sheet_name not in ms:
            return data, not_exist, not_implemented
        sheet = ms[sheet_name]
        cell_addr = f'{col}{row}'
        tid = int(float(info_type_id))
        sn = _OOXML_NS['s']

        if tid == 17:
            style_xml = self._get_style()
            if row in sheet.row_attributes and RowAttribute.Height in sheet.row_attributes.get(row, {}):
                not_exist = False
                data = sheet.row_attributes[row][RowAttribute.Height]
            elif sheet.default_height is not None:
                data = sheet.default_height
            if data is not None:
                data = round(float(data) * 4) / 4
        else:
            style_xml = self._get_style()
            if style_xml is None:
                return data, not_exist, True
            not_exist = False
            cell_format = None
            font = None
            xfs = _etfind(style_xml, f'{{{sn}}}cellXfs')
            fonts_el = _etfind(style_xml, f'{{{sn}}}fonts')
            styles_el = _etfind(style_xml, f'{{{sn}}}cellStyles')
            style_xfs = _etfind(style_xml, f'{{{sn}}}cellStyleXfs')

            if cell_addr in sheet.cells:
                cell = sheet.cells[cell_addr]
                if 's' in cell.attributes and xfs is not None:
                    index = int(cell.attributes['s'])
                    xf_list = _etfindall(xfs, f'{{{sn}}}xf')
                    if index < len(xf_list):
                        cell_format = xf_list[index]
                        font_id_str = cell_format.get('fontId')
                        if font_id_str is not None and fonts_el is not None:
                            font_index = int(font_id_str)
                            font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                            if font_index < len(font_list):
                                font = font_list[font_index]

            if cell_format is None and styles_el is not None and style_xfs is not None:
                for cs in _etfindall(styles_el, f'{{{sn}}}cellStyle'):
                    if cs.get('name') == 'Normal':
                        xf_id = int(cs.get('xfId', '0'))
                        xf_list = _etfindall(style_xfs, f'{{{sn}}}xf')
                        if xf_id < len(xf_list):
                            cell_format = xf_list[xf_id]
                            font_id_str = cell_format.get('fontId')
                            if font_id_str is not None and fonts_el is not None:
                                font_index = int(font_id_str)
                                font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                                if font_index < len(font_list):
                                    font = font_list[font_index]
                        break

            if tid == 8:
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        h_map = {
                            'general': 1, 'left': 2, 'center': 3, 'right': 4,
                            'fill': 5, 'justify': 6, 'centercontinuous': 7, 'distributed': 8,
                        }
                        data = h_map.get(align.get('horizontal', 'general').lower(), 1)
                    else:
                        data = 1
            elif tid == 19:
                if font is not None:
                    sz = _etfind(font, f'{{{sn}}}sz')
                    if sz is not None:
                        data = float(sz.get('val', '11'))
            elif tid == 24:
                if font is not None:
                    color_el = _etfind(font, f'{{{sn}}}color')
                    if color_el is not None:
                        rgb = color_el.get('rgb')
                        if rgb:
                            data = self._get_color_index(rgb)
                        else:
                            data = 1
                    else:
                        data = 1
            elif tid == 38:
                if cell_format is not None:
                    fill_id_str = cell_format.get('fillId')
                    if fill_id_str is not None:
                        fills_el = _etfind(style_xml, f'{{{sn}}}fills')
                        if fills_el is not None:
                            fill_list = _etfindall(fills_el, f'{{{sn}}}fill')
                            fill_id = int(fill_id_str)
                            if fill_id < len(fill_list):
                                pf = _etfind(fill_list[fill_id], f'{{{sn}}}patternFill')
                                if pf is not None:
                                    fg = _etfind(pf, f'{{{sn}}}fgColor')
                                    if fg is not None:
                                        rgb = fg.get('rgb')
                                        if rgb:
                                            data = self._get_color_index(rgb)
                                        else:
                                            data = 0
                                    else:
                                        data = 0
            elif tid == 50:
                v_map = {'top': 1, 'center': 2, 'bottom': 3, 'justify': 4, 'distributed': 5}
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        data = v_map.get(align.get('vertical', 'bottom').lower(), 3)
                    else:
                        data = 3
                else:
                    data = 3
            else:
                not_implemented = True

        return data, not_exist, not_implemented

Methods

def get_xl_international_char(self, flag_name)

Expand source code Browse git

def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
    return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

def get_workbook_name(self)

Expand source code Browse git

def get_workbook_name(self) -> str:
    return self._workbook_name

def get_defined_names(self)

Expand source code Browse git

def get_defined_names(self) -> dict[str, str]:
    if self._defined_names is None:
        self._defined_names = {}
        wb = self._get_workbook()
        if wb is None:
            return self._defined_names
        sn = _OOXML_NS['s']
        dn_container = _etfind(wb, f'{{{sn}}}definedNames')
        if dn_container is None:
            return self._defined_names
        for dn in list(dn_container):
            name_attr = dn.get('name', '')
            name_key = name_attr.replace('_xlnm.', '').lower()
            cdata = dn.text or ''
            self._defined_names[name_key] = cdata
    return self._defined_names

def get_defined_name(self, name, full_match=True)

Expand source code Browse git

def get_defined_name(self, name: str, full_match: bool = True) -> Any:
    result: list[Any] = []
    name = name.lower()
    if full_match:
        if name in self.get_defined_names():
            result = self._defined_names[name]  # type: ignore
    else:
        for defined_name, cell_address in self.get_defined_names().items():
            if defined_name.startswith(name):
                result.append((defined_name, cell_address))
    return result

def get_macrosheets(self)

Expand source code Browse git

def get_macrosheets(self) -> dict[str, Boundsheet]:
    if self._macrosheets is None:
        self._macrosheets = {}
        infos = self._get_sheet_infos(['Macrosheet'])
        macrosheet_names = [info['sheet'].name for info in infos]
        sn = _OOXML_NS['s']
        xm_ns = _OOXML_NS['xm']
        for info in infos:
            if info['sheet_xml'] is not None:
                self._load_macro_cells(info['sheet'], info['sheet_xml'], macrosheet_names)
                root = info['sheet_xml']
                fmt_pr = (
                    _etfind(root, f'{{{xm_ns}}}sheetFormatPr')
                    or _etfind(root, f'{{{sn}}}sheetFormatPr')
                )
                if fmt_pr is not None:
                    info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
            self._macrosheets[info['sheet'].name] = info['sheet']
    return self._macrosheets

def get_worksheets(self)

Expand source code Browse git

def get_worksheets(self) -> dict[str, Boundsheet]:
    if self._worksheets is None:
        self._worksheets = {}
        infos = self._get_sheet_infos(['Worksheet'])
        sn = _OOXML_NS['s']
        for info in infos:
            if info['sheet_xml'] is not None:
                self._load_worksheet_cells(info['sheet'], info['sheet_xml'])
                root = info['sheet_xml']
                fmt_pr = _etfind(root, f'{{{sn}}}sheetFormatPr')
                if fmt_pr is not None:
                    info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
            self._worksheets[info['sheet'].name] = info['sheet']
    return self._worksheets

def get_cell_info(self, sheet_name, col, row, info_type_id)

Expand source code Browse git

def get_cell_info(
    self, sheet_name: str, col: str, row: str, info_type_id: str,
) -> CellInfoResult:
    data: Any = None
    not_exist = True
    not_implemented = False

    ms = self.get_macrosheets()
    if sheet_name not in ms:
        return data, not_exist, not_implemented
    sheet = ms[sheet_name]
    cell_addr = f'{col}{row}'
    tid = int(float(info_type_id))
    sn = _OOXML_NS['s']

    if tid == 17:
        style_xml = self._get_style()
        if row in sheet.row_attributes and RowAttribute.Height in sheet.row_attributes.get(row, {}):
            not_exist = False
            data = sheet.row_attributes[row][RowAttribute.Height]
        elif sheet.default_height is not None:
            data = sheet.default_height
        if data is not None:
            data = round(float(data) * 4) / 4
    else:
        style_xml = self._get_style()
        if style_xml is None:
            return data, not_exist, True
        not_exist = False
        cell_format = None
        font = None
        xfs = _etfind(style_xml, f'{{{sn}}}cellXfs')
        fonts_el = _etfind(style_xml, f'{{{sn}}}fonts')
        styles_el = _etfind(style_xml, f'{{{sn}}}cellStyles')
        style_xfs = _etfind(style_xml, f'{{{sn}}}cellStyleXfs')

        if cell_addr in sheet.cells:
            cell = sheet.cells[cell_addr]
            if 's' in cell.attributes and xfs is not None:
                index = int(cell.attributes['s'])
                xf_list = _etfindall(xfs, f'{{{sn}}}xf')
                if index < len(xf_list):
                    cell_format = xf_list[index]
                    font_id_str = cell_format.get('fontId')
                    if font_id_str is not None and fonts_el is not None:
                        font_index = int(font_id_str)
                        font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                        if font_index < len(font_list):
                            font = font_list[font_index]

        if cell_format is None and styles_el is not None and style_xfs is not None:
            for cs in _etfindall(styles_el, f'{{{sn}}}cellStyle'):
                if cs.get('name') == 'Normal':
                    xf_id = int(cs.get('xfId', '0'))
                    xf_list = _etfindall(style_xfs, f'{{{sn}}}xf')
                    if xf_id < len(xf_list):
                        cell_format = xf_list[xf_id]
                        font_id_str = cell_format.get('fontId')
                        if font_id_str is not None and fonts_el is not None:
                            font_index = int(font_id_str)
                            font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                            if font_index < len(font_list):
                                font = font_list[font_index]
                    break

        if tid == 8:
            if cell_format is not None:
                align = _etfind(cell_format, f'{{{sn}}}alignment')
                if align is not None:
                    h_map = {
                        'general': 1, 'left': 2, 'center': 3, 'right': 4,
                        'fill': 5, 'justify': 6, 'centercontinuous': 7, 'distributed': 8,
                    }
                    data = h_map.get(align.get('horizontal', 'general').lower(), 1)
                else:
                    data = 1
        elif tid == 19:
            if font is not None:
                sz = _etfind(font, f'{{{sn}}}sz')
                if sz is not None:
                    data = float(sz.get('val', '11'))
        elif tid == 24:
            if font is not None:
                color_el = _etfind(font, f'{{{sn}}}color')
                if color_el is not None:
                    rgb = color_el.get('rgb')
                    if rgb:
                        data = self._get_color_index(rgb)
                    else:
                        data = 1
                else:
                    data = 1
        elif tid == 38:
            if cell_format is not None:
                fill_id_str = cell_format.get('fillId')
                if fill_id_str is not None:
                    fills_el = _etfind(style_xml, f'{{{sn}}}fills')
                    if fills_el is not None:
                        fill_list = _etfindall(fills_el, f'{{{sn}}}fill')
                        fill_id = int(fill_id_str)
                        if fill_id < len(fill_list):
                            pf = _etfind(fill_list[fill_id], f'{{{sn}}}patternFill')
                            if pf is not None:
                                fg = _etfind(pf, f'{{{sn}}}fgColor')
                                if fg is not None:
                                    rgb = fg.get('rgb')
                                    if rgb:
                                        data = self._get_color_index(rgb)
                                    else:
                                        data = 0
                                else:
                                    data = 0
        elif tid == 50:
            v_map = {'top': 1, 'center': 2, 'bottom': 3, 'justify': 4, 'distributed': 5}
            if cell_format is not None:
                align = _etfind(cell_format, f'{{{sn}}}alignment')
                if align is not None:
                    data = v_map.get(align.get('vertical', 'bottom').lower(), 3)
                else:
                    data = 3
            else:
                data = 3
        else:
            not_implemented = True

    return data, not_exist, not_implemented