Module refinery.lib.thirdparty.xlm.wrappers

Expand source code Browse git
from __future__ import annotations

import re

from fnmatch import fnmatch
from io import BytesIO
from typing import Any, Dict, List, Tuple
from zipfile import ZipFile

from refinery.lib.thirdparty.xlm.model import (
    Boundsheet,
    Cell,
    RowAttribute,
    XlApplicationInternational,
)

_XL_INTERNATIONAL_DEFAULTS = {
    XlApplicationInternational.xlLeftBracket : '[',
    XlApplicationInternational.xlListSeparator: ',',
    XlApplicationInternational.xlRightBracket : ']',
}

_XLSM_COLOR_TABLE: List[Tuple[int, int, int, int]] = [
    (  0,   0,   0,  1), (255, 255, 255,  2), (255,   0,   0,  3), (  0, 255,   0,  4),
    (  0,   0, 255,  5), (255, 255,   0,  6), (255,   0, 255,  7), (  0, 255, 255,  8),
    (128,   0,   0,  9), (  0, 128,   0, 10), (  0,   0, 128, 11), (128, 128,   0, 12),
    (128,   0, 128, 13), (  0, 128, 128, 14), (192, 192, 192, 15), (128, 128, 128, 16),
    (153, 153, 255, 17), (153,  51, 102, 18), (255, 255, 204, 19), (204, 255, 255, 20),
    (102,   0, 102, 21), (255, 128, 128, 22), (  0, 102, 204, 23), (204, 204, 255, 24),
    (  0,   0, 128, 25), (255,   0, 255, 26), (255, 255,   0, 27), (  0, 255, 255, 28),
    (128,   0, 128, 29), (128,   0,   0, 30), (  0, 128, 128, 31), (  0,   0, 255, 32),
    (  0, 204, 255, 33), (204, 255, 255, 34), (204, 255, 204, 35), (255, 255, 153, 36),
    (153, 204, 255, 37), (255, 153, 204, 38), (204, 153, 255, 39), (255, 204, 153, 40),
    ( 51, 102, 255, 41), ( 51, 204, 204, 42), (153, 204,   0, 43), (255, 204,   0, 44),
    (255, 153,   0, 45), (255, 102,   0, 46), (102, 102, 153, 47), (150, 150, 150, 48),
    (  0,  51, 102, 49), ( 51, 153, 102, 50), (  0,  51,   0, 51), ( 51,  51,   0, 52),
    (153,  51,   0, 53), (153,  51, 102, 54), ( 51,  51, 153, 55), ( 51,  51,  51, 56),
]

CellInfoResult = Tuple[Any, bool, bool]


class XLSWrapper:
    def __init__(self, data: bytes | bytearray):
        import xlrd2
        self._xlrd2 = xlrd2
        self.xls_workbook = xlrd2.open_workbook(file_contents=bytes(data), formatting_info=True)
        self._workbook_name = 'workbook.xls'
        self._macrosheets: Dict[str, Boundsheet] | None = None
        self._worksheets: Dict[str, Boundsheet] | None = None
        self._defined_names: Dict[str, Any] | None = None
        control_chars = ''.join(map(chr, range(0, 32)))
        control_chars += ''.join(map(chr, range(127, 160)))
        control_chars += '\ufefe\uffff\ufeff\ufffe\uffef\ufff0\ufff1\ufff6\ufefd\udddd\ufffd'
        self._control_char_re = re.compile(f'[{re.escape(control_chars)}]')

    oNUM = 2
    oSTRG = 1
    oREF = -1
    oARR = 6

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> Dict[str, Any]:
        if self._defined_names is None:
            self._defined_names = {}
            for name_obj, cells in self.xls_workbook.name_map.items():
                name = name_obj.lower()
                index = 1 if len(cells) > 1 else 0
                filtered_name = name.lower()
                if name != filtered_name:
                    if filtered_name in self._defined_names:
                        filtered_name = filtered_name + str(index)
                    if cells[0].result is not None:
                        self._defined_names[filtered_name] = cells[0].result.text

                if name in self._defined_names:
                    name = name + str(index)
                if cells[0].result is not None:
                    cell_location = cells[0].result.text
                    if cells[0].result.kind == self.oNUM:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oSTRG:
                        self._defined_names[name] = cells[0].result.text
                    elif cells[0].result.kind == self.oARR:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oREF:
                        if '$' in cell_location:
                            self._defined_names[name] = cells[0].result.text
                        else:
                            curr_cell = cells[0].result
                            if 'auto_open' in name:
                                coords = curr_cell.value[0].coords
                                r = int(coords[3])
                                c = int(coords[5])
                                sheet_name = curr_cell.text.split('!')[0].replace("'", '')
                                cell_ref = f'${Cell.convert_to_column_name(c)}${r}'
                                self._defined_names[name] = f'{sheet_name}!{cell_ref}'
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: Any = []
        name = name.lower().replace('[', '')
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
            if isinstance(result, list) and len(result) == 0:
                for defined_name, cell_address in self.get_defined_names().items():
                    lastidx = 0
                    match = True
                    for c in name:
                        idx = defined_name.find(c, lastidx)
                        if idx == -1:
                            match = False
                            break
                        lastidx = idx
                    if match:
                        result.append((defined_name, cell_address))
        return result

    def _load_cells(self, boundsheet: Boundsheet, xls_sheet: Any) -> None:
        try:
            for xls_cell in xls_sheet.get_used_cells():
                cell = Cell()
                cell.sheet = boundsheet
                if xls_cell.formula is not None and len(xls_cell.formula) > 0:
                    cell.formula = f'={xls_cell.formula}'
                cell.value = xls_cell.value
                cell.row = xls_cell.row + 1
                cell.column = Cell.convert_to_column_name(xls_cell.column + 1)
                if cell.value is not None or cell.formula is not None:
                    boundsheet.add_cell(cell)
        except Exception:
            pass

    def get_macrosheets(self) -> Dict[str, Boundsheet]:
        if self._macrosheets is None:
            import xlrd2
            self._macrosheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_MACROSHEET:
                    macrosheet = Boundsheet(sheet.name, 'Macrosheet')
                    self._load_cells(macrosheet, sheet)
                    self._macrosheets[sheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> Dict[str, Boundsheet]:
        if self._worksheets is None:
            import xlrd2
            self._worksheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_WORKSHEET:
                    worksheet = Boundsheet(sheet.name, 'Worksheet')
                    self._load_cells(worksheet, sheet)
                    self._worksheets[sheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        sheet = self.xls_workbook.sheet_by_name(sheet_name)
        irow = int(row) - 1
        column = Cell.convert_to_column_index(col) - 1
        tid = int(float(info_type_id))
        data: Any = None
        not_exist = False
        not_implemented = False

        if tid == 5:
            data = sheet.cell(irow, column).value
        elif tid == 17:
            if irow in sheet.rowinfo_map:
                data = sheet.rowinfo_map[irow].height
            else:
                data = sheet.default_row_height
            data = round(Cell.convert_twip_to_point(data) * 4) / 4
        else:
            if (irow, column) in sheet.used_cells:
                cell = sheet.cell(irow, column)
                if cell.xf_index is not None and cell.xf_index < len(self.xls_workbook.xf_list):
                    fmt = self.xls_workbook.xf_list[cell.xf_index]
                    font = self.xls_workbook.font_list[fmt.font_index]
                else:
                    normal_style = self.xls_workbook.style_name_map['Normal'][1]
                    fmt = self.xls_workbook.xf_list[normal_style]
                    font = self.xls_workbook.font_list[fmt.font_index]
            else:
                normal_style = self.xls_workbook.style_name_map['Normal'][1]
                fmt = self.xls_workbook.xf_list[normal_style]
                font = self.xls_workbook.font_list[fmt.font_index]

            if tid == 8:
                data = fmt.alignment.hor_align + 1
            elif tid == 19:
                data = Cell.convert_twip_to_point(font.height)
            elif tid == 24:
                data = font.colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 38:
                data = fmt.background.pattern_colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 50:
                data = fmt.alignment.vert_align + 1
            else:
                not_implemented = True

        return data, not_exist, not_implemented


class XLSBWrapper:
    def __init__(self, data: bytes | bytearray):
        from pyxlsb2 import open_workbook
        self._xlsb_workbook = open_workbook(BytesIO(data))
        self._workbook_name = 'workbook.xlsb'
        self._macrosheets: Dict[str, Boundsheet] | None = None
        self._worksheets: Dict[str, Boundsheet] | None = None
        self._defined_names: Dict[str, Any] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> Dict[str, Any]:
        if self._defined_names is None:
            names: Dict[str, Any] = {}
            for key, val in self._xlsb_workbook.defined_names.items():
                names[key.lower()] = key.lower(), val.formula
            self._defined_names = names
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        if full_match:
            if name.lower() in self.get_defined_names():
                result.append(self.get_defined_names()[name.lower()])
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name.lower()):
                    result.append(cell_address)
        return result

    def _load_cells(self, boundsheet: Boundsheet) -> None:
        from pyxlsb2.formula import Formula
        row_cnt = 0
        with self._xlsb_workbook.get_sheet_by_name(boundsheet.name) as sheet:
            for row in sheet:
                if row_cnt > 1048576:
                    break
                row_cnt += 1
                column_cnt = 0
                for cell in row:
                    if column_cnt > 16384:
                        break
                    tmp_cell = Cell()
                    tmp_cell.row = cell.row_num + 1
                    tmp_cell.column = Cell.convert_to_column_name(cell.col + 1)
                    tmp_cell.value = cell.value
                    tmp_cell.sheet = boundsheet
                    formula_str = Formula.parse(cell.formula)
                    if formula_str._tokens:
                        try:
                            tmp_cell.formula = f'={formula_str.stringify(self._xlsb_workbook)}'
                        except NotImplementedError:
                            pass
                        except Exception:
                            pass
                    if tmp_cell.value is not None or tmp_cell.formula is not None:
                        boundsheet.cells[tmp_cell.get_local_address()] = tmp_cell
                    column_cnt += 1

    def get_macrosheets(self) -> Dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'macrosheet':
                    macrosheet = Boundsheet(xlsb_sheet.name, 'macrosheet')
                    self._load_cells(macrosheet)
                    self._macrosheets[macrosheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> Dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'worksheet':
                    worksheet = Boundsheet(xlsb_sheet.name, 'worksheet')
                    self._load_cells(worksheet)
                    self._worksheets[worksheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        return None, False, True


_OOXML_NS = {
    's' : 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
    'r' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
    'xm': 'http://schemas.microsoft.com/office/excel/2006/main',
    'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
    'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
    'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
}

_MACRO_REL_TYPES = {
    'http://schemas.microsoft.com/office/2006/relationships/xlMacrosheet',
    'http://schemas.microsoft.com/office/2006/relationships/xlIntlMacrosheet',
}
_WORKSHEET_REL_TYPE = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'


def _etfind(root: Any, path: str, ns: dict[str, str] = _OOXML_NS) -> Any:
    return root.find(path, ns)


def _etfindall(root: Any, path: str, ns: dict[str, str] = _OOXML_NS) -> list[Any]:
    return root.findall(path, ns)


class XLSMWrapper:
    def __init__(self, data: bytes | bytearray):
        self._data = BytesIO(data)
        self._workbook_name = 'workbook.xlsm'
        self._content_types: Any = None
        self._workbook_xml: Any = None
        self._workbook_rels: Dict[str, Tuple[str, str]] | None = None
        self._defined_names: Dict[str, str] | None = None
        self._macrosheets: Dict[str, Boundsheet] | None = None
        self._worksheets: Dict[str, Boundsheet] | None = None
        self._shared_strings: List[str] | None = None
        self._style_xml: Any = None
        self._color_map: Dict[Tuple[int, int, int], int] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def _get_zip_files(self, patterns: List[str] | None = None) -> Dict[str, bytes]:
        result: Dict[str, bytes] = {}
        if not patterns:
            patterns = ['*']
        with ZipFile(self._data) as zf:
            for name in zf.namelist():
                for pat in patterns:
                    if name == pat or fnmatch(name, pat):
                        result[name] = zf.read(name)
            if not result:
                for name in zf.namelist():
                    for pat in patterns:
                        alt_pat = pat.replace('/', '\\')
                        if name == alt_pat or fnmatch(name, alt_pat):
                            result[name.replace('\\', '/')] = zf.read(name)
        return result

    def _parse_xml(self, path: str, ignore_pattern: str | None = None) -> Any:
        try:
            from defusedxml.ElementTree import fromstring
        except ImportError:
            from xml.etree.ElementTree import fromstring
        if path.startswith('/'):
            path = path[1:]
        files = self._get_zip_files([path])
        if len(files) != 1:
            return None
        content = next(iter(files.values())).decode('utf-8')
        if ignore_pattern:
            content = re.sub(ignore_pattern, '', content)
        return fromstring(content)

    def _get_workbook_path(self) -> Tuple[str, str, str]:
        wb_path = 'xl/workbook.xml'
        ct = self._get_content_types()
        if ct is not None:
            for override in _etfindall(ct, './/ct:Override', _OOXML_NS) + _etfindall(ct, './/{%s}Override' % _OOXML_NS['ct']):
                ctype = override.get('ContentType', '')
                if 'sheet.main+xml' in ctype:
                    wb_path = override.get('PartName', wb_path).lstrip('/')
                    break
        wb_path = wb_path.lstrip('/')
        if '/' in wb_path:
            base_dir = wb_path[:wb_path.index('/')]
            name = wb_path[wb_path.index('/') + 1:]
        else:
            base_dir = ''
            name = wb_path
        return wb_path, base_dir, name

    def _get_content_types(self) -> Any:
        if self._content_types is None:
            self._content_types = self._parse_xml('[Content_Types].xml')
        return self._content_types

    def _get_workbook(self) -> Any:
        if self._workbook_xml is None:
            wb_path, _, _ = self._get_workbook_path()
            self._workbook_xml = self._parse_xml(wb_path)
        return self._workbook_xml

    def _get_rels(self) -> Dict[str, Tuple[str, str]]:
        if self._workbook_rels is None:
            self._workbook_rels = {}
            _, base_dir, name = self._get_workbook_path()
            rels_path = f'{base_dir}/_rels/{name}.rels'
            rels_xml = self._parse_xml(rels_path)
            if rels_xml is not None:
                ns = _OOXML_NS['pr']
                for rel in list(rels_xml):
                    tag = rel.tag
                    if tag == f'{{{ns}}}Relationship' or tag == 'Relationship':
                        rid = rel.get('Id', '')
                        target = rel.get('Target', '')
                        rtype = rel.get('Type', '')
                        self._workbook_rels[rid] = (target, rtype)
        return self._workbook_rels

    def _get_sheet_info(self, rid: str) -> Tuple[str | None, str | None]:
        rels = self._get_rels()
        if rid not in rels:
            return None, None
        target, rtype = rels[rid]
        if rtype in _MACRO_REL_TYPES:
            return 'Macrosheet', target
        elif rtype == _WORKSHEET_REL_TYPE:
            return 'Worksheet', target
        return 'Unknown', target

    def get_defined_names(self) -> Dict[str, str]:
        if self._defined_names is None:
            self._defined_names = {}
            wb = self._get_workbook()
            if wb is None:
                return self._defined_names
            sn = _OOXML_NS['s']
            dn_container = _etfind(wb, f'{{{sn}}}definedNames')
            if dn_container is None:
                return self._defined_names
            for dn in list(dn_container):
                name_attr = dn.get('name', '')
                name_key = name_attr.replace('_xlnm.', '').lower()
                cdata = dn.text or ''
                self._defined_names[name_key] = cdata
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        name = name.lower()
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
        return result

    def _get_shared_strings(self) -> List[str] | None:
        if self._shared_strings is None:
            _, base_dir, _ = self._get_workbook_path()
            ss_xml = self._parse_xml(f'{base_dir}/sharedStrings.xml')
            if ss_xml is not None:
                sn = _OOXML_NS['s']
                self._shared_strings = []
                for si in _etfindall(ss_xml, f'{{{sn}}}si'):
                    t_elem = si.find(f'{{{sn}}}t')
                    if t_elem is not None and t_elem.text is not None:
                        self._shared_strings.append(t_elem.text)
                    else:
                        r_elems = si.findall(f'{{{sn}}}r')
                        if r_elems:
                            t_in_r = r_elems[0].find(f'{{{sn}}}t')
                            if t_in_r is not None and t_in_r.text is not None:
                                self._shared_strings.append(t_in_r.text)
                            else:
                                self._shared_strings.append('')
                        else:
                            self._shared_strings.append('')
        return self._shared_strings

    def _get_sheet_infos(self, types: List[str]) -> List[Dict[str, Any]]:
        result: list[Dict[str, Any]] = []
        wb = self._get_workbook()
        if wb is None:
            return result
        sn = _OOXML_NS['s']
        sheets_el = _etfind(wb, f'{{{sn}}}sheets')
        if sheets_el is None:
            return result
        _, base_dir, _ = self._get_workbook_path()
        seen: set[str] = set()
        for sheet_el in list(sheets_el):
            rid = sheet_el.get(f'{{{_OOXML_NS["r"]}}}id', '')
            name = sheet_el.get('name', '')
            sheet_type, rel_path = self._get_sheet_info(rid)
            if rel_path is not None and sheet_type in types and name not in seen:
                path = f'{base_dir}/{rel_path}'
                sheet = Boundsheet(name, sheet_type)
                sheet_xml = self._parse_xml(path, ignore_pattern=r'<c[^>]+/>')
                result.append({'sheet': sheet, 'sheet_path': path, 'sheet_xml': sheet_xml})
                seen.add(name)
        return result

    def _load_macro_cells(
        self, sheet: Boundsheet, sheet_xml: Any, macrosheet_names: List[str],
    ) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        xm_ns = _OOXML_NS['xm']
        root = sheet_xml
        sheet_data = (
            _etfind(root, f'{{{xm_ns}}}sheetData')
            or _etfind(root, f'{{{sn}}}sheetData')
        )
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: Dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f') or cell_el.find(f'{{{xm_ns}}}f')
                if f_el is not None:
                    if f_el.get('bx') == '1':
                        text = f_el.text or ''
                        if text:
                            eq_pos = text.find('=')
                            if eq_pos > 0:
                                formula_text = f'=SET.NAME("{text[:eq_pos]}",{text[eq_pos + 1:]})'
                    else:
                        if f_el.text:
                            formula_text = f'={f_el.text}'
                if formula_text:
                    for ms_name in macrosheet_names:
                        if f'{ms_name.lower()}!' in formula_text.lower():
                            formula_text = re.sub(
                                f'{re.escape(ms_name)}!',
                                f"'{ms_name}'!",
                                formula_text,
                                flags=re.IGNORECASE,
                            )
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                cached_str = cell_el.get('t') == 'str'
                v_el = cell_el.find(f'{{{sn}}}v') or cell_el.find(f'{{{xm_ns}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                if formula_text or value_text:
                    cell = Cell()
                    _, cell.column, row_str = Cell.parse_cell_addr(location)
                    if row_str is not None:
                        cell.row = int(row_str)
                    cell.sheet = sheet
                    if not cached_str:
                        cell.formula = formula_text
                    cell.value = value_text
                    sheet.cells[location] = cell
                    for attr_name in cell_el.attrib:
                        if attr_name != 'r':
                            cell.attributes[attr_name] = cell_el.get(attr_name)

    def _load_worksheet_cells(self, sheet: Boundsheet, sheet_xml: Any) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        root = sheet_xml
        sheet_data = _etfind(root, f'{{{sn}}}sheetData')
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: Dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f')
                if f_el is not None and f_el.text:
                    formula_text = f'={f_el.text}'
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                v_el = cell_el.find(f'{{{sn}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                cell = Cell()
                _, cell.column, row_str = Cell.parse_cell_addr(location)
                if row_str is not None:
                    cell.row = int(row_str)
                cell.sheet = sheet
                cell.formula = formula_text
                cell.value = value_text
                sheet.cells[location] = cell
                for attr_name in cell_el.attrib:
                    if attr_name != 'r':
                        cell.attributes[attr_name] = cell_el.get(attr_name)

    def get_macrosheets(self) -> Dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            infos = self._get_sheet_infos(['Macrosheet'])
            macrosheet_names = [info['sheet'].name for info in infos]
            sn = _OOXML_NS['s']
            xm_ns = _OOXML_NS['xm']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_macro_cells(info['sheet'], info['sheet_xml'], macrosheet_names)
                    root = info['sheet_xml']
                    fmt_pr = (
                        _etfind(root, f'{{{xm_ns}}}sheetFormatPr')
                        or _etfind(root, f'{{{sn}}}sheetFormatPr')
                    )
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._macrosheets[info['sheet'].name] = info['sheet']
        return self._macrosheets

    def get_worksheets(self) -> Dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            infos = self._get_sheet_infos(['Worksheet'])
            sn = _OOXML_NS['s']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_worksheet_cells(info['sheet'], info['sheet_xml'])
                    root = info['sheet_xml']
                    fmt_pr = _etfind(root, f'{{{sn}}}sheetFormatPr')
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._worksheets[info['sheet'].name] = info['sheet']
        return self._worksheets

    def _get_style(self) -> Any:
        if self._style_xml is None:
            _, base_dir, _ = self._get_workbook_path()
            rels = self._get_rels()
            style_target = None
            for _, (target, rtype) in rels.items():
                if 'styles' in rtype:
                    style_target = target
                    break
            if style_target:
                self._style_xml = self._parse_xml(f'{base_dir}/{style_target}')
        return self._style_xml

    def _get_color_index(self, rgba_str: str) -> int | None:
        r = int(rgba_str[2:4], 16)
        g = int(rgba_str[4:6], 16)
        b = int(rgba_str[6:8], 16)
        if self._color_map is None:
            self._color_map = {}
            for cr, cg, cb, idx in _XLSM_COLOR_TABLE:
                if (cr, cg, cb) not in self._color_map:
                    self._color_map[(cr, cg, cb)] = idx
        return self._color_map.get((r, g, b))

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        data: Any = None
        not_exist = True
        not_implemented = False

        ms = self.get_macrosheets()
        if sheet_name not in ms:
            return data, not_exist, not_implemented
        sheet = ms[sheet_name]
        cell_addr = f'{col}{row}'
        tid = int(float(info_type_id))
        sn = _OOXML_NS['s']

        if tid == 17:
            style_xml = self._get_style()
            if row in sheet.row_attributes and RowAttribute.Height in sheet.row_attributes.get(row, {}):
                not_exist = False
                data = sheet.row_attributes[row][RowAttribute.Height]
            elif sheet.default_height is not None:
                data = sheet.default_height
            if data is not None:
                data = round(float(data) * 4) / 4
        else:
            style_xml = self._get_style()
            if style_xml is None:
                return data, not_exist, True
            not_exist = False
            cell_format = None
            font = None
            xfs = _etfind(style_xml, f'{{{sn}}}cellXfs')
            fonts_el = _etfind(style_xml, f'{{{sn}}}fonts')
            styles_el = _etfind(style_xml, f'{{{sn}}}cellStyles')
            style_xfs = _etfind(style_xml, f'{{{sn}}}cellStyleXfs')

            if cell_addr in sheet.cells:
                cell = sheet.cells[cell_addr]
                if 's' in cell.attributes and xfs is not None:
                    index = int(cell.attributes['s'])
                    xf_list = _etfindall(xfs, f'{{{sn}}}xf')
                    if index < len(xf_list):
                        cell_format = xf_list[index]
                        font_id_str = cell_format.get('fontId')
                        if font_id_str is not None and fonts_el is not None:
                            font_index = int(font_id_str)
                            font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                            if font_index < len(font_list):
                                font = font_list[font_index]

            if cell_format is None and styles_el is not None and style_xfs is not None:
                for cs in _etfindall(styles_el, f'{{{sn}}}cellStyle'):
                    if cs.get('name') == 'Normal':
                        xf_id = int(cs.get('xfId', '0'))
                        xf_list = _etfindall(style_xfs, f'{{{sn}}}xf')
                        if xf_id < len(xf_list):
                            cell_format = xf_list[xf_id]
                            font_id_str = cell_format.get('fontId')
                            if font_id_str is not None and fonts_el is not None:
                                font_index = int(font_id_str)
                                font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                                if font_index < len(font_list):
                                    font = font_list[font_index]
                        break

            if tid == 8:
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        h_map = {
                            'general': 1, 'left': 2, 'center': 3, 'right': 4,
                            'fill': 5, 'justify': 6, 'centercontinuous': 7, 'distributed': 8,
                        }
                        data = h_map.get(align.get('horizontal', 'general').lower(), 1)
                    else:
                        data = 1
            elif tid == 19:
                if font is not None:
                    sz = _etfind(font, f'{{{sn}}}sz')
                    if sz is not None:
                        data = float(sz.get('val', '11'))
            elif tid == 24:
                if font is not None:
                    color_el = _etfind(font, f'{{{sn}}}color')
                    if color_el is not None:
                        rgb = color_el.get('rgb')
                        if rgb:
                            data = self._get_color_index(rgb)
                        else:
                            data = 1
                    else:
                        data = 1
            elif tid == 38:
                if cell_format is not None:
                    fill_id_str = cell_format.get('fillId')
                    if fill_id_str is not None:
                        fills_el = _etfind(style_xml, f'{{{sn}}}fills')
                        if fills_el is not None:
                            fill_list = _etfindall(fills_el, f'{{{sn}}}fill')
                            fill_id = int(fill_id_str)
                            if fill_id < len(fill_list):
                                pf = _etfind(fill_list[fill_id], f'{{{sn}}}patternFill')
                                if pf is not None:
                                    fg = _etfind(pf, f'{{{sn}}}fgColor')
                                    if fg is not None:
                                        rgb = fg.get('rgb')
                                        if rgb:
                                            data = self._get_color_index(rgb)
                                        else:
                                            data = 0
                                    else:
                                        data = 0
            elif tid == 50:
                v_map = {'top': 1, 'center': 2, 'bottom': 3, 'justify': 4, 'distributed': 5}
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        data = v_map.get(align.get('vertical', 'bottom').lower(), 3)
                    else:
                        data = 3
                else:
                    data = 3
            else:
                not_implemented = True

        return data, not_exist, not_implemented

Classes

class XLSWrapper (data)
Expand source code Browse git
class XLSWrapper:
    def __init__(self, data: bytes | bytearray):
        import xlrd2
        self._xlrd2 = xlrd2
        self.xls_workbook = xlrd2.open_workbook(file_contents=bytes(data), formatting_info=True)
        self._workbook_name = 'workbook.xls'
        self._macrosheets: Dict[str, Boundsheet] | None = None
        self._worksheets: Dict[str, Boundsheet] | None = None
        self._defined_names: Dict[str, Any] | None = None
        control_chars = ''.join(map(chr, range(0, 32)))
        control_chars += ''.join(map(chr, range(127, 160)))
        control_chars += '\ufefe\uffff\ufeff\ufffe\uffef\ufff0\ufff1\ufff6\ufefd\udddd\ufffd'
        self._control_char_re = re.compile(f'[{re.escape(control_chars)}]')

    oNUM = 2
    oSTRG = 1
    oREF = -1
    oARR = 6

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> Dict[str, Any]:
        if self._defined_names is None:
            self._defined_names = {}
            for name_obj, cells in self.xls_workbook.name_map.items():
                name = name_obj.lower()
                index = 1 if len(cells) > 1 else 0
                filtered_name = name.lower()
                if name != filtered_name:
                    if filtered_name in self._defined_names:
                        filtered_name = filtered_name + str(index)
                    if cells[0].result is not None:
                        self._defined_names[filtered_name] = cells[0].result.text

                if name in self._defined_names:
                    name = name + str(index)
                if cells[0].result is not None:
                    cell_location = cells[0].result.text
                    if cells[0].result.kind == self.oNUM:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oSTRG:
                        self._defined_names[name] = cells[0].result.text
                    elif cells[0].result.kind == self.oARR:
                        self._defined_names[name] = cells[0].result.value
                    elif cells[0].result.kind == self.oREF:
                        if '$' in cell_location:
                            self._defined_names[name] = cells[0].result.text
                        else:
                            curr_cell = cells[0].result
                            if 'auto_open' in name:
                                coords = curr_cell.value[0].coords
                                r = int(coords[3])
                                c = int(coords[5])
                                sheet_name = curr_cell.text.split('!')[0].replace("'", '')
                                cell_ref = f'${Cell.convert_to_column_name(c)}${r}'
                                self._defined_names[name] = f'{sheet_name}!{cell_ref}'
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: Any = []
        name = name.lower().replace('[', '')
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
            if isinstance(result, list) and len(result) == 0:
                for defined_name, cell_address in self.get_defined_names().items():
                    lastidx = 0
                    match = True
                    for c in name:
                        idx = defined_name.find(c, lastidx)
                        if idx == -1:
                            match = False
                            break
                        lastidx = idx
                    if match:
                        result.append((defined_name, cell_address))
        return result

    def _load_cells(self, boundsheet: Boundsheet, xls_sheet: Any) -> None:
        try:
            for xls_cell in xls_sheet.get_used_cells():
                cell = Cell()
                cell.sheet = boundsheet
                if xls_cell.formula is not None and len(xls_cell.formula) > 0:
                    cell.formula = f'={xls_cell.formula}'
                cell.value = xls_cell.value
                cell.row = xls_cell.row + 1
                cell.column = Cell.convert_to_column_name(xls_cell.column + 1)
                if cell.value is not None or cell.formula is not None:
                    boundsheet.add_cell(cell)
        except Exception:
            pass

    def get_macrosheets(self) -> Dict[str, Boundsheet]:
        if self._macrosheets is None:
            import xlrd2
            self._macrosheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_MACROSHEET:
                    macrosheet = Boundsheet(sheet.name, 'Macrosheet')
                    self._load_cells(macrosheet, sheet)
                    self._macrosheets[sheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> Dict[str, Boundsheet]:
        if self._worksheets is None:
            import xlrd2
            self._worksheets = {}
            for sheet in self.xls_workbook.sheets():
                if sheet.boundsheet_type == xlrd2.biffh.XL_WORKSHEET:
                    worksheet = Boundsheet(sheet.name, 'Worksheet')
                    self._load_cells(worksheet, sheet)
                    self._worksheets[sheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        sheet = self.xls_workbook.sheet_by_name(sheet_name)
        irow = int(row) - 1
        column = Cell.convert_to_column_index(col) - 1
        tid = int(float(info_type_id))
        data: Any = None
        not_exist = False
        not_implemented = False

        if tid == 5:
            data = sheet.cell(irow, column).value
        elif tid == 17:
            if irow in sheet.rowinfo_map:
                data = sheet.rowinfo_map[irow].height
            else:
                data = sheet.default_row_height
            data = round(Cell.convert_twip_to_point(data) * 4) / 4
        else:
            if (irow, column) in sheet.used_cells:
                cell = sheet.cell(irow, column)
                if cell.xf_index is not None and cell.xf_index < len(self.xls_workbook.xf_list):
                    fmt = self.xls_workbook.xf_list[cell.xf_index]
                    font = self.xls_workbook.font_list[fmt.font_index]
                else:
                    normal_style = self.xls_workbook.style_name_map['Normal'][1]
                    fmt = self.xls_workbook.xf_list[normal_style]
                    font = self.xls_workbook.font_list[fmt.font_index]
            else:
                normal_style = self.xls_workbook.style_name_map['Normal'][1]
                fmt = self.xls_workbook.xf_list[normal_style]
                font = self.xls_workbook.font_list[fmt.font_index]

            if tid == 8:
                data = fmt.alignment.hor_align + 1
            elif tid == 19:
                data = Cell.convert_twip_to_point(font.height)
            elif tid == 24:
                data = font.colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 38:
                data = fmt.background.pattern_colour_index - 7 if font.colour_index > 7 else font.colour_index
            elif tid == 50:
                data = fmt.alignment.vert_align + 1
            else:
                not_implemented = True

        return data, not_exist, not_implemented

Class variables

var oNUM

The type of the None singleton.

var oSTRG

The type of the None singleton.

var oREF

The type of the None singleton.

var oARR

The type of the None singleton.

Methods

def get_xl_international_char(self, flag_name)
Expand source code Browse git
def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
    return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)
def get_workbook_name(self)
Expand source code Browse git
def get_workbook_name(self) -> str:
    return self._workbook_name
def get_defined_names(self)
Expand source code Browse git
def get_defined_names(self) -> Dict[str, Any]:
    if self._defined_names is None:
        self._defined_names = {}
        for name_obj, cells in self.xls_workbook.name_map.items():
            name = name_obj.lower()
            index = 1 if len(cells) > 1 else 0
            filtered_name = name.lower()
            if name != filtered_name:
                if filtered_name in self._defined_names:
                    filtered_name = filtered_name + str(index)
                if cells[0].result is not None:
                    self._defined_names[filtered_name] = cells[0].result.text

            if name in self._defined_names:
                name = name + str(index)
            if cells[0].result is not None:
                cell_location = cells[0].result.text
                if cells[0].result.kind == self.oNUM:
                    self._defined_names[name] = cells[0].result.value
                elif cells[0].result.kind == self.oSTRG:
                    self._defined_names[name] = cells[0].result.text
                elif cells[0].result.kind == self.oARR:
                    self._defined_names[name] = cells[0].result.value
                elif cells[0].result.kind == self.oREF:
                    if '$' in cell_location:
                        self._defined_names[name] = cells[0].result.text
                    else:
                        curr_cell = cells[0].result
                        if 'auto_open' in name:
                            coords = curr_cell.value[0].coords
                            r = int(coords[3])
                            c = int(coords[5])
                            sheet_name = curr_cell.text.split('!')[0].replace("'", '')
                            cell_ref = f'${Cell.convert_to_column_name(c)}${r}'
                            self._defined_names[name] = f'{sheet_name}!{cell_ref}'
    return self._defined_names
def get_defined_name(self, name, full_match=True)
Expand source code Browse git
def get_defined_name(self, name: str, full_match: bool = True) -> Any:
    result: Any = []
    name = name.lower().replace('[', '')
    if full_match:
        if name in self.get_defined_names():
            result = self._defined_names[name]  # type: ignore
    else:
        for defined_name, cell_address in self.get_defined_names().items():
            if defined_name.startswith(name):
                result.append((defined_name, cell_address))
        if isinstance(result, list) and len(result) == 0:
            for defined_name, cell_address in self.get_defined_names().items():
                lastidx = 0
                match = True
                for c in name:
                    idx = defined_name.find(c, lastidx)
                    if idx == -1:
                        match = False
                        break
                    lastidx = idx
                if match:
                    result.append((defined_name, cell_address))
    return result
def get_macrosheets(self)
Expand source code Browse git
def get_macrosheets(self) -> Dict[str, Boundsheet]:
    if self._macrosheets is None:
        import xlrd2
        self._macrosheets = {}
        for sheet in self.xls_workbook.sheets():
            if sheet.boundsheet_type == xlrd2.biffh.XL_MACROSHEET:
                macrosheet = Boundsheet(sheet.name, 'Macrosheet')
                self._load_cells(macrosheet, sheet)
                self._macrosheets[sheet.name] = macrosheet
    return self._macrosheets
def get_worksheets(self)
Expand source code Browse git
def get_worksheets(self) -> Dict[str, Boundsheet]:
    if self._worksheets is None:
        import xlrd2
        self._worksheets = {}
        for sheet in self.xls_workbook.sheets():
            if sheet.boundsheet_type == xlrd2.biffh.XL_WORKSHEET:
                worksheet = Boundsheet(sheet.name, 'Worksheet')
                self._load_cells(worksheet, sheet)
                self._worksheets[sheet.name] = worksheet
    return self._worksheets
def get_cell_info(self, sheet_name, col, row, info_type_id)
Expand source code Browse git
def get_cell_info(
    self, sheet_name: str, col: str, row: str, info_type_id: str,
) -> CellInfoResult:
    sheet = self.xls_workbook.sheet_by_name(sheet_name)
    irow = int(row) - 1
    column = Cell.convert_to_column_index(col) - 1
    tid = int(float(info_type_id))
    data: Any = None
    not_exist = False
    not_implemented = False

    if tid == 5:
        data = sheet.cell(irow, column).value
    elif tid == 17:
        if irow in sheet.rowinfo_map:
            data = sheet.rowinfo_map[irow].height
        else:
            data = sheet.default_row_height
        data = round(Cell.convert_twip_to_point(data) * 4) / 4
    else:
        if (irow, column) in sheet.used_cells:
            cell = sheet.cell(irow, column)
            if cell.xf_index is not None and cell.xf_index < len(self.xls_workbook.xf_list):
                fmt = self.xls_workbook.xf_list[cell.xf_index]
                font = self.xls_workbook.font_list[fmt.font_index]
            else:
                normal_style = self.xls_workbook.style_name_map['Normal'][1]
                fmt = self.xls_workbook.xf_list[normal_style]
                font = self.xls_workbook.font_list[fmt.font_index]
        else:
            normal_style = self.xls_workbook.style_name_map['Normal'][1]
            fmt = self.xls_workbook.xf_list[normal_style]
            font = self.xls_workbook.font_list[fmt.font_index]

        if tid == 8:
            data = fmt.alignment.hor_align + 1
        elif tid == 19:
            data = Cell.convert_twip_to_point(font.height)
        elif tid == 24:
            data = font.colour_index - 7 if font.colour_index > 7 else font.colour_index
        elif tid == 38:
            data = fmt.background.pattern_colour_index - 7 if font.colour_index > 7 else font.colour_index
        elif tid == 50:
            data = fmt.alignment.vert_align + 1
        else:
            not_implemented = True

    return data, not_exist, not_implemented
class XLSBWrapper (data)
Expand source code Browse git
class XLSBWrapper:
    def __init__(self, data: bytes | bytearray):
        from pyxlsb2 import open_workbook
        self._xlsb_workbook = open_workbook(BytesIO(data))
        self._workbook_name = 'workbook.xlsb'
        self._macrosheets: Dict[str, Boundsheet] | None = None
        self._worksheets: Dict[str, Boundsheet] | None = None
        self._defined_names: Dict[str, Any] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def get_defined_names(self) -> Dict[str, Any]:
        if self._defined_names is None:
            names: Dict[str, Any] = {}
            for key, val in self._xlsb_workbook.defined_names.items():
                names[key.lower()] = key.lower(), val.formula
            self._defined_names = names
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        if full_match:
            if name.lower() in self.get_defined_names():
                result.append(self.get_defined_names()[name.lower()])
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name.lower()):
                    result.append(cell_address)
        return result

    def _load_cells(self, boundsheet: Boundsheet) -> None:
        from pyxlsb2.formula import Formula
        row_cnt = 0
        with self._xlsb_workbook.get_sheet_by_name(boundsheet.name) as sheet:
            for row in sheet:
                if row_cnt > 1048576:
                    break
                row_cnt += 1
                column_cnt = 0
                for cell in row:
                    if column_cnt > 16384:
                        break
                    tmp_cell = Cell()
                    tmp_cell.row = cell.row_num + 1
                    tmp_cell.column = Cell.convert_to_column_name(cell.col + 1)
                    tmp_cell.value = cell.value
                    tmp_cell.sheet = boundsheet
                    formula_str = Formula.parse(cell.formula)
                    if formula_str._tokens:
                        try:
                            tmp_cell.formula = f'={formula_str.stringify(self._xlsb_workbook)}'
                        except NotImplementedError:
                            pass
                        except Exception:
                            pass
                    if tmp_cell.value is not None or tmp_cell.formula is not None:
                        boundsheet.cells[tmp_cell.get_local_address()] = tmp_cell
                    column_cnt += 1

    def get_macrosheets(self) -> Dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'macrosheet':
                    macrosheet = Boundsheet(xlsb_sheet.name, 'macrosheet')
                    self._load_cells(macrosheet)
                    self._macrosheets[macrosheet.name] = macrosheet
        return self._macrosheets

    def get_worksheets(self) -> Dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            for xlsb_sheet in self._xlsb_workbook.sheets:
                if xlsb_sheet.type == 'worksheet':
                    worksheet = Boundsheet(xlsb_sheet.name, 'worksheet')
                    self._load_cells(worksheet)
                    self._worksheets[worksheet.name] = worksheet
        return self._worksheets

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        return None, False, True

Methods

def get_xl_international_char(self, flag_name)
Expand source code Browse git
def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
    return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)
def get_workbook_name(self)
Expand source code Browse git
def get_workbook_name(self) -> str:
    return self._workbook_name
def get_defined_names(self)
Expand source code Browse git
def get_defined_names(self) -> Dict[str, Any]:
    if self._defined_names is None:
        names: Dict[str, Any] = {}
        for key, val in self._xlsb_workbook.defined_names.items():
            names[key.lower()] = key.lower(), val.formula
        self._defined_names = names
    return self._defined_names
def get_defined_name(self, name, full_match=True)
Expand source code Browse git
def get_defined_name(self, name: str, full_match: bool = True) -> Any:
    result: list[Any] = []
    if full_match:
        if name.lower() in self.get_defined_names():
            result.append(self.get_defined_names()[name.lower()])
    else:
        for defined_name, cell_address in self.get_defined_names().items():
            if defined_name.startswith(name.lower()):
                result.append(cell_address)
    return result
def get_macrosheets(self)
Expand source code Browse git
def get_macrosheets(self) -> Dict[str, Boundsheet]:
    if self._macrosheets is None:
        self._macrosheets = {}
        for xlsb_sheet in self._xlsb_workbook.sheets:
            if xlsb_sheet.type == 'macrosheet':
                macrosheet = Boundsheet(xlsb_sheet.name, 'macrosheet')
                self._load_cells(macrosheet)
                self._macrosheets[macrosheet.name] = macrosheet
    return self._macrosheets
def get_worksheets(self)
Expand source code Browse git
def get_worksheets(self) -> Dict[str, Boundsheet]:
    if self._worksheets is None:
        self._worksheets = {}
        for xlsb_sheet in self._xlsb_workbook.sheets:
            if xlsb_sheet.type == 'worksheet':
                worksheet = Boundsheet(xlsb_sheet.name, 'worksheet')
                self._load_cells(worksheet)
                self._worksheets[worksheet.name] = worksheet
    return self._worksheets
def get_cell_info(self, sheet_name, col, row, info_type_id)
Expand source code Browse git
def get_cell_info(
    self, sheet_name: str, col: str, row: str, info_type_id: str,
) -> CellInfoResult:
    return None, False, True
class XLSMWrapper (data)
Expand source code Browse git
class XLSMWrapper:
    def __init__(self, data: bytes | bytearray):
        self._data = BytesIO(data)
        self._workbook_name = 'workbook.xlsm'
        self._content_types: Any = None
        self._workbook_xml: Any = None
        self._workbook_rels: Dict[str, Tuple[str, str]] | None = None
        self._defined_names: Dict[str, str] | None = None
        self._macrosheets: Dict[str, Boundsheet] | None = None
        self._worksheets: Dict[str, Boundsheet] | None = None
        self._shared_strings: List[str] | None = None
        self._style_xml: Any = None
        self._color_map: Dict[Tuple[int, int, int], int] | None = None

    def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
        return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)

    def get_workbook_name(self) -> str:
        return self._workbook_name

    def _get_zip_files(self, patterns: List[str] | None = None) -> Dict[str, bytes]:
        result: Dict[str, bytes] = {}
        if not patterns:
            patterns = ['*']
        with ZipFile(self._data) as zf:
            for name in zf.namelist():
                for pat in patterns:
                    if name == pat or fnmatch(name, pat):
                        result[name] = zf.read(name)
            if not result:
                for name in zf.namelist():
                    for pat in patterns:
                        alt_pat = pat.replace('/', '\\')
                        if name == alt_pat or fnmatch(name, alt_pat):
                            result[name.replace('\\', '/')] = zf.read(name)
        return result

    def _parse_xml(self, path: str, ignore_pattern: str | None = None) -> Any:
        try:
            from defusedxml.ElementTree import fromstring
        except ImportError:
            from xml.etree.ElementTree import fromstring
        if path.startswith('/'):
            path = path[1:]
        files = self._get_zip_files([path])
        if len(files) != 1:
            return None
        content = next(iter(files.values())).decode('utf-8')
        if ignore_pattern:
            content = re.sub(ignore_pattern, '', content)
        return fromstring(content)

    def _get_workbook_path(self) -> Tuple[str, str, str]:
        wb_path = 'xl/workbook.xml'
        ct = self._get_content_types()
        if ct is not None:
            for override in _etfindall(ct, './/ct:Override', _OOXML_NS) + _etfindall(ct, './/{%s}Override' % _OOXML_NS['ct']):
                ctype = override.get('ContentType', '')
                if 'sheet.main+xml' in ctype:
                    wb_path = override.get('PartName', wb_path).lstrip('/')
                    break
        wb_path = wb_path.lstrip('/')
        if '/' in wb_path:
            base_dir = wb_path[:wb_path.index('/')]
            name = wb_path[wb_path.index('/') + 1:]
        else:
            base_dir = ''
            name = wb_path
        return wb_path, base_dir, name

    def _get_content_types(self) -> Any:
        if self._content_types is None:
            self._content_types = self._parse_xml('[Content_Types].xml')
        return self._content_types

    def _get_workbook(self) -> Any:
        if self._workbook_xml is None:
            wb_path, _, _ = self._get_workbook_path()
            self._workbook_xml = self._parse_xml(wb_path)
        return self._workbook_xml

    def _get_rels(self) -> Dict[str, Tuple[str, str]]:
        if self._workbook_rels is None:
            self._workbook_rels = {}
            _, base_dir, name = self._get_workbook_path()
            rels_path = f'{base_dir}/_rels/{name}.rels'
            rels_xml = self._parse_xml(rels_path)
            if rels_xml is not None:
                ns = _OOXML_NS['pr']
                for rel in list(rels_xml):
                    tag = rel.tag
                    if tag == f'{{{ns}}}Relationship' or tag == 'Relationship':
                        rid = rel.get('Id', '')
                        target = rel.get('Target', '')
                        rtype = rel.get('Type', '')
                        self._workbook_rels[rid] = (target, rtype)
        return self._workbook_rels

    def _get_sheet_info(self, rid: str) -> Tuple[str | None, str | None]:
        rels = self._get_rels()
        if rid not in rels:
            return None, None
        target, rtype = rels[rid]
        if rtype in _MACRO_REL_TYPES:
            return 'Macrosheet', target
        elif rtype == _WORKSHEET_REL_TYPE:
            return 'Worksheet', target
        return 'Unknown', target

    def get_defined_names(self) -> Dict[str, str]:
        if self._defined_names is None:
            self._defined_names = {}
            wb = self._get_workbook()
            if wb is None:
                return self._defined_names
            sn = _OOXML_NS['s']
            dn_container = _etfind(wb, f'{{{sn}}}definedNames')
            if dn_container is None:
                return self._defined_names
            for dn in list(dn_container):
                name_attr = dn.get('name', '')
                name_key = name_attr.replace('_xlnm.', '').lower()
                cdata = dn.text or ''
                self._defined_names[name_key] = cdata
        return self._defined_names

    def get_defined_name(self, name: str, full_match: bool = True) -> Any:
        result: list[Any] = []
        name = name.lower()
        if full_match:
            if name in self.get_defined_names():
                result = self._defined_names[name]  # type: ignore
        else:
            for defined_name, cell_address in self.get_defined_names().items():
                if defined_name.startswith(name):
                    result.append((defined_name, cell_address))
        return result

    def _get_shared_strings(self) -> List[str] | None:
        if self._shared_strings is None:
            _, base_dir, _ = self._get_workbook_path()
            ss_xml = self._parse_xml(f'{base_dir}/sharedStrings.xml')
            if ss_xml is not None:
                sn = _OOXML_NS['s']
                self._shared_strings = []
                for si in _etfindall(ss_xml, f'{{{sn}}}si'):
                    t_elem = si.find(f'{{{sn}}}t')
                    if t_elem is not None and t_elem.text is not None:
                        self._shared_strings.append(t_elem.text)
                    else:
                        r_elems = si.findall(f'{{{sn}}}r')
                        if r_elems:
                            t_in_r = r_elems[0].find(f'{{{sn}}}t')
                            if t_in_r is not None and t_in_r.text is not None:
                                self._shared_strings.append(t_in_r.text)
                            else:
                                self._shared_strings.append('')
                        else:
                            self._shared_strings.append('')
        return self._shared_strings

    def _get_sheet_infos(self, types: List[str]) -> List[Dict[str, Any]]:
        result: list[Dict[str, Any]] = []
        wb = self._get_workbook()
        if wb is None:
            return result
        sn = _OOXML_NS['s']
        sheets_el = _etfind(wb, f'{{{sn}}}sheets')
        if sheets_el is None:
            return result
        _, base_dir, _ = self._get_workbook_path()
        seen: set[str] = set()
        for sheet_el in list(sheets_el):
            rid = sheet_el.get(f'{{{_OOXML_NS["r"]}}}id', '')
            name = sheet_el.get('name', '')
            sheet_type, rel_path = self._get_sheet_info(rid)
            if rel_path is not None and sheet_type in types and name not in seen:
                path = f'{base_dir}/{rel_path}'
                sheet = Boundsheet(name, sheet_type)
                sheet_xml = self._parse_xml(path, ignore_pattern=r'<c[^>]+/>')
                result.append({'sheet': sheet, 'sheet_path': path, 'sheet_xml': sheet_xml})
                seen.add(name)
        return result

    def _load_macro_cells(
        self, sheet: Boundsheet, sheet_xml: Any, macrosheet_names: List[str],
    ) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        xm_ns = _OOXML_NS['xm']
        root = sheet_xml
        sheet_data = (
            _etfind(root, f'{{{xm_ns}}}sheetData')
            or _etfind(root, f'{{{sn}}}sheetData')
        )
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: Dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f') or cell_el.find(f'{{{xm_ns}}}f')
                if f_el is not None:
                    if f_el.get('bx') == '1':
                        text = f_el.text or ''
                        if text:
                            eq_pos = text.find('=')
                            if eq_pos > 0:
                                formula_text = f'=SET.NAME("{text[:eq_pos]}",{text[eq_pos + 1:]})'
                    else:
                        if f_el.text:
                            formula_text = f'={f_el.text}'
                if formula_text:
                    for ms_name in macrosheet_names:
                        if f'{ms_name.lower()}!' in formula_text.lower():
                            formula_text = re.sub(
                                f'{re.escape(ms_name)}!',
                                f"'{ms_name}'!",
                                formula_text,
                                flags=re.IGNORECASE,
                            )
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                cached_str = cell_el.get('t') == 'str'
                v_el = cell_el.find(f'{{{sn}}}v') or cell_el.find(f'{{{xm_ns}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                if formula_text or value_text:
                    cell = Cell()
                    _, cell.column, row_str = Cell.parse_cell_addr(location)
                    if row_str is not None:
                        cell.row = int(row_str)
                    cell.sheet = sheet
                    if not cached_str:
                        cell.formula = formula_text
                    cell.value = value_text
                    sheet.cells[location] = cell
                    for attr_name in cell_el.attrib:
                        if attr_name != 'r':
                            cell.attributes[attr_name] = cell_el.get(attr_name)

    def _load_worksheet_cells(self, sheet: Boundsheet, sheet_xml: Any) -> None:
        strings = self._get_shared_strings()
        sn = _OOXML_NS['s']
        root = sheet_xml
        sheet_data = _etfind(root, f'{{{sn}}}sheetData')
        if sheet_data is None:
            return
        for row_el in list(sheet_data):
            row_r = row_el.get('r', '')
            row_attribs: Dict[RowAttribute, str] = {}
            ht = row_el.get('ht')
            if ht is not None:
                row_attribs[RowAttribute.Height] = ht
            spans = row_el.get('spans')
            if spans is not None:
                row_attribs[RowAttribute.Spans] = spans
            if row_attribs:
                sheet.row_attributes[row_r] = row_attribs  # type: ignore
            for cell_el in list(row_el):
                tag = cell_el.tag
                if not (tag.endswith('}c') or tag == 'c'):
                    continue
                formula_text: str | None = None
                f_el = cell_el.find(f'{{{sn}}}f')
                if f_el is not None and f_el.text:
                    formula_text = f'={f_el.text}'
                value_text: str | None = None
                is_string = cell_el.get('t') == 's'
                v_el = cell_el.find(f'{{{sn}}}v')
                if v_el is not None and v_el.text is not None:
                    value_text = v_el.text
                    if is_string and strings is not None:
                        try:
                            value_text = strings[int(value_text)]
                        except (ValueError, IndexError):
                            pass
                location = cell_el.get('r', '')
                cell = Cell()
                _, cell.column, row_str = Cell.parse_cell_addr(location)
                if row_str is not None:
                    cell.row = int(row_str)
                cell.sheet = sheet
                cell.formula = formula_text
                cell.value = value_text
                sheet.cells[location] = cell
                for attr_name in cell_el.attrib:
                    if attr_name != 'r':
                        cell.attributes[attr_name] = cell_el.get(attr_name)

    def get_macrosheets(self) -> Dict[str, Boundsheet]:
        if self._macrosheets is None:
            self._macrosheets = {}
            infos = self._get_sheet_infos(['Macrosheet'])
            macrosheet_names = [info['sheet'].name for info in infos]
            sn = _OOXML_NS['s']
            xm_ns = _OOXML_NS['xm']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_macro_cells(info['sheet'], info['sheet_xml'], macrosheet_names)
                    root = info['sheet_xml']
                    fmt_pr = (
                        _etfind(root, f'{{{xm_ns}}}sheetFormatPr')
                        or _etfind(root, f'{{{sn}}}sheetFormatPr')
                    )
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._macrosheets[info['sheet'].name] = info['sheet']
        return self._macrosheets

    def get_worksheets(self) -> Dict[str, Boundsheet]:
        if self._worksheets is None:
            self._worksheets = {}
            infos = self._get_sheet_infos(['Worksheet'])
            sn = _OOXML_NS['s']
            for info in infos:
                if info['sheet_xml'] is not None:
                    self._load_worksheet_cells(info['sheet'], info['sheet_xml'])
                    root = info['sheet_xml']
                    fmt_pr = _etfind(root, f'{{{sn}}}sheetFormatPr')
                    if fmt_pr is not None:
                        info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
                self._worksheets[info['sheet'].name] = info['sheet']
        return self._worksheets

    def _get_style(self) -> Any:
        if self._style_xml is None:
            _, base_dir, _ = self._get_workbook_path()
            rels = self._get_rels()
            style_target = None
            for _, (target, rtype) in rels.items():
                if 'styles' in rtype:
                    style_target = target
                    break
            if style_target:
                self._style_xml = self._parse_xml(f'{base_dir}/{style_target}')
        return self._style_xml

    def _get_color_index(self, rgba_str: str) -> int | None:
        r = int(rgba_str[2:4], 16)
        g = int(rgba_str[4:6], 16)
        b = int(rgba_str[6:8], 16)
        if self._color_map is None:
            self._color_map = {}
            for cr, cg, cb, idx in _XLSM_COLOR_TABLE:
                if (cr, cg, cb) not in self._color_map:
                    self._color_map[(cr, cg, cb)] = idx
        return self._color_map.get((r, g, b))

    def get_cell_info(
        self, sheet_name: str, col: str, row: str, info_type_id: str,
    ) -> CellInfoResult:
        data: Any = None
        not_exist = True
        not_implemented = False

        ms = self.get_macrosheets()
        if sheet_name not in ms:
            return data, not_exist, not_implemented
        sheet = ms[sheet_name]
        cell_addr = f'{col}{row}'
        tid = int(float(info_type_id))
        sn = _OOXML_NS['s']

        if tid == 17:
            style_xml = self._get_style()
            if row in sheet.row_attributes and RowAttribute.Height in sheet.row_attributes.get(row, {}):
                not_exist = False
                data = sheet.row_attributes[row][RowAttribute.Height]
            elif sheet.default_height is not None:
                data = sheet.default_height
            if data is not None:
                data = round(float(data) * 4) / 4
        else:
            style_xml = self._get_style()
            if style_xml is None:
                return data, not_exist, True
            not_exist = False
            cell_format = None
            font = None
            xfs = _etfind(style_xml, f'{{{sn}}}cellXfs')
            fonts_el = _etfind(style_xml, f'{{{sn}}}fonts')
            styles_el = _etfind(style_xml, f'{{{sn}}}cellStyles')
            style_xfs = _etfind(style_xml, f'{{{sn}}}cellStyleXfs')

            if cell_addr in sheet.cells:
                cell = sheet.cells[cell_addr]
                if 's' in cell.attributes and xfs is not None:
                    index = int(cell.attributes['s'])
                    xf_list = _etfindall(xfs, f'{{{sn}}}xf')
                    if index < len(xf_list):
                        cell_format = xf_list[index]
                        font_id_str = cell_format.get('fontId')
                        if font_id_str is not None and fonts_el is not None:
                            font_index = int(font_id_str)
                            font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                            if font_index < len(font_list):
                                font = font_list[font_index]

            if cell_format is None and styles_el is not None and style_xfs is not None:
                for cs in _etfindall(styles_el, f'{{{sn}}}cellStyle'):
                    if cs.get('name') == 'Normal':
                        xf_id = int(cs.get('xfId', '0'))
                        xf_list = _etfindall(style_xfs, f'{{{sn}}}xf')
                        if xf_id < len(xf_list):
                            cell_format = xf_list[xf_id]
                            font_id_str = cell_format.get('fontId')
                            if font_id_str is not None and fonts_el is not None:
                                font_index = int(font_id_str)
                                font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                                if font_index < len(font_list):
                                    font = font_list[font_index]
                        break

            if tid == 8:
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        h_map = {
                            'general': 1, 'left': 2, 'center': 3, 'right': 4,
                            'fill': 5, 'justify': 6, 'centercontinuous': 7, 'distributed': 8,
                        }
                        data = h_map.get(align.get('horizontal', 'general').lower(), 1)
                    else:
                        data = 1
            elif tid == 19:
                if font is not None:
                    sz = _etfind(font, f'{{{sn}}}sz')
                    if sz is not None:
                        data = float(sz.get('val', '11'))
            elif tid == 24:
                if font is not None:
                    color_el = _etfind(font, f'{{{sn}}}color')
                    if color_el is not None:
                        rgb = color_el.get('rgb')
                        if rgb:
                            data = self._get_color_index(rgb)
                        else:
                            data = 1
                    else:
                        data = 1
            elif tid == 38:
                if cell_format is not None:
                    fill_id_str = cell_format.get('fillId')
                    if fill_id_str is not None:
                        fills_el = _etfind(style_xml, f'{{{sn}}}fills')
                        if fills_el is not None:
                            fill_list = _etfindall(fills_el, f'{{{sn}}}fill')
                            fill_id = int(fill_id_str)
                            if fill_id < len(fill_list):
                                pf = _etfind(fill_list[fill_id], f'{{{sn}}}patternFill')
                                if pf is not None:
                                    fg = _etfind(pf, f'{{{sn}}}fgColor')
                                    if fg is not None:
                                        rgb = fg.get('rgb')
                                        if rgb:
                                            data = self._get_color_index(rgb)
                                        else:
                                            data = 0
                                    else:
                                        data = 0
            elif tid == 50:
                v_map = {'top': 1, 'center': 2, 'bottom': 3, 'justify': 4, 'distributed': 5}
                if cell_format is not None:
                    align = _etfind(cell_format, f'{{{sn}}}alignment')
                    if align is not None:
                        data = v_map.get(align.get('vertical', 'bottom').lower(), 3)
                    else:
                        data = 3
                else:
                    data = 3
            else:
                not_implemented = True

        return data, not_exist, not_implemented

Methods

def get_xl_international_char(self, flag_name)
Expand source code Browse git
def get_xl_international_char(self, flag_name: XlApplicationInternational) -> str | None:
    return _XL_INTERNATIONAL_DEFAULTS.get(flag_name)
def get_workbook_name(self)
Expand source code Browse git
def get_workbook_name(self) -> str:
    return self._workbook_name
def get_defined_names(self)
Expand source code Browse git
def get_defined_names(self) -> Dict[str, str]:
    if self._defined_names is None:
        self._defined_names = {}
        wb = self._get_workbook()
        if wb is None:
            return self._defined_names
        sn = _OOXML_NS['s']
        dn_container = _etfind(wb, f'{{{sn}}}definedNames')
        if dn_container is None:
            return self._defined_names
        for dn in list(dn_container):
            name_attr = dn.get('name', '')
            name_key = name_attr.replace('_xlnm.', '').lower()
            cdata = dn.text or ''
            self._defined_names[name_key] = cdata
    return self._defined_names
def get_defined_name(self, name, full_match=True)
Expand source code Browse git
def get_defined_name(self, name: str, full_match: bool = True) -> Any:
    result: list[Any] = []
    name = name.lower()
    if full_match:
        if name in self.get_defined_names():
            result = self._defined_names[name]  # type: ignore
    else:
        for defined_name, cell_address in self.get_defined_names().items():
            if defined_name.startswith(name):
                result.append((defined_name, cell_address))
    return result
def get_macrosheets(self)
Expand source code Browse git
def get_macrosheets(self) -> Dict[str, Boundsheet]:
    if self._macrosheets is None:
        self._macrosheets = {}
        infos = self._get_sheet_infos(['Macrosheet'])
        macrosheet_names = [info['sheet'].name for info in infos]
        sn = _OOXML_NS['s']
        xm_ns = _OOXML_NS['xm']
        for info in infos:
            if info['sheet_xml'] is not None:
                self._load_macro_cells(info['sheet'], info['sheet_xml'], macrosheet_names)
                root = info['sheet_xml']
                fmt_pr = (
                    _etfind(root, f'{{{xm_ns}}}sheetFormatPr')
                    or _etfind(root, f'{{{sn}}}sheetFormatPr')
                )
                if fmt_pr is not None:
                    info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
            self._macrosheets[info['sheet'].name] = info['sheet']
    return self._macrosheets
def get_worksheets(self)
Expand source code Browse git
def get_worksheets(self) -> Dict[str, Boundsheet]:
    if self._worksheets is None:
        self._worksheets = {}
        infos = self._get_sheet_infos(['Worksheet'])
        sn = _OOXML_NS['s']
        for info in infos:
            if info['sheet_xml'] is not None:
                self._load_worksheet_cells(info['sheet'], info['sheet_xml'])
                root = info['sheet_xml']
                fmt_pr = _etfind(root, f'{{{sn}}}sheetFormatPr')
                if fmt_pr is not None:
                    info['sheet'].default_height = fmt_pr.get('defaultRowHeight')
            self._worksheets[info['sheet'].name] = info['sheet']
    return self._worksheets
def get_cell_info(self, sheet_name, col, row, info_type_id)
Expand source code Browse git
def get_cell_info(
    self, sheet_name: str, col: str, row: str, info_type_id: str,
) -> CellInfoResult:
    data: Any = None
    not_exist = True
    not_implemented = False

    ms = self.get_macrosheets()
    if sheet_name not in ms:
        return data, not_exist, not_implemented
    sheet = ms[sheet_name]
    cell_addr = f'{col}{row}'
    tid = int(float(info_type_id))
    sn = _OOXML_NS['s']

    if tid == 17:
        style_xml = self._get_style()
        if row in sheet.row_attributes and RowAttribute.Height in sheet.row_attributes.get(row, {}):
            not_exist = False
            data = sheet.row_attributes[row][RowAttribute.Height]
        elif sheet.default_height is not None:
            data = sheet.default_height
        if data is not None:
            data = round(float(data) * 4) / 4
    else:
        style_xml = self._get_style()
        if style_xml is None:
            return data, not_exist, True
        not_exist = False
        cell_format = None
        font = None
        xfs = _etfind(style_xml, f'{{{sn}}}cellXfs')
        fonts_el = _etfind(style_xml, f'{{{sn}}}fonts')
        styles_el = _etfind(style_xml, f'{{{sn}}}cellStyles')
        style_xfs = _etfind(style_xml, f'{{{sn}}}cellStyleXfs')

        if cell_addr in sheet.cells:
            cell = sheet.cells[cell_addr]
            if 's' in cell.attributes and xfs is not None:
                index = int(cell.attributes['s'])
                xf_list = _etfindall(xfs, f'{{{sn}}}xf')
                if index < len(xf_list):
                    cell_format = xf_list[index]
                    font_id_str = cell_format.get('fontId')
                    if font_id_str is not None and fonts_el is not None:
                        font_index = int(font_id_str)
                        font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                        if font_index < len(font_list):
                            font = font_list[font_index]

        if cell_format is None and styles_el is not None and style_xfs is not None:
            for cs in _etfindall(styles_el, f'{{{sn}}}cellStyle'):
                if cs.get('name') == 'Normal':
                    xf_id = int(cs.get('xfId', '0'))
                    xf_list = _etfindall(style_xfs, f'{{{sn}}}xf')
                    if xf_id < len(xf_list):
                        cell_format = xf_list[xf_id]
                        font_id_str = cell_format.get('fontId')
                        if font_id_str is not None and fonts_el is not None:
                            font_index = int(font_id_str)
                            font_list = _etfindall(fonts_el, f'{{{sn}}}font')
                            if font_index < len(font_list):
                                font = font_list[font_index]
                    break

        if tid == 8:
            if cell_format is not None:
                align = _etfind(cell_format, f'{{{sn}}}alignment')
                if align is not None:
                    h_map = {
                        'general': 1, 'left': 2, 'center': 3, 'right': 4,
                        'fill': 5, 'justify': 6, 'centercontinuous': 7, 'distributed': 8,
                    }
                    data = h_map.get(align.get('horizontal', 'general').lower(), 1)
                else:
                    data = 1
        elif tid == 19:
            if font is not None:
                sz = _etfind(font, f'{{{sn}}}sz')
                if sz is not None:
                    data = float(sz.get('val', '11'))
        elif tid == 24:
            if font is not None:
                color_el = _etfind(font, f'{{{sn}}}color')
                if color_el is not None:
                    rgb = color_el.get('rgb')
                    if rgb:
                        data = self._get_color_index(rgb)
                    else:
                        data = 1
                else:
                    data = 1
        elif tid == 38:
            if cell_format is not None:
                fill_id_str = cell_format.get('fillId')
                if fill_id_str is not None:
                    fills_el = _etfind(style_xml, f'{{{sn}}}fills')
                    if fills_el is not None:
                        fill_list = _etfindall(fills_el, f'{{{sn}}}fill')
                        fill_id = int(fill_id_str)
                        if fill_id < len(fill_list):
                            pf = _etfind(fill_list[fill_id], f'{{{sn}}}patternFill')
                            if pf is not None:
                                fg = _etfind(pf, f'{{{sn}}}fgColor')
                                if fg is not None:
                                    rgb = fg.get('rgb')
                                    if rgb:
                                        data = self._get_color_index(rgb)
                                    else:
                                        data = 0
                                else:
                                    data = 0
        elif tid == 50:
            v_map = {'top': 1, 'center': 2, 'bottom': 3, 'justify': 4, 'distributed': 5}
            if cell_format is not None:
                align = _etfind(cell_format, f'{{{sn}}}alignment')
                if align is not None:
                    data = v_map.get(align.get('vertical', 'bottom').lower(), 3)
                else:
                    data = 3
            else:
                data = 3
        else:
            not_implemented = True

    return data, not_exist, not_implemented