Module refinery.lib.mime

File type related functions.

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
File type related functions.
"""
import functools

from refinery.lib.magic import magic, magicparse


class NoMagicAvailable(ModuleNotFoundError):
    pass


FileTypeMap = {
    'application/x-setupscript': 'ini',
    'applicaiton/x-bytecode.python': 'pyc',
    'application/acad': 'dwg',
    'application/arj': 'arj',
    'application/book': 'book',
    'application/dos-exe': 'exe',
    'application/drafting': 'drw',
    'application/dxf': 'dxf',
    'application/ecmascript': 'js',
    'application/epub+zip': 'epub',
    'application/excel': 'xls',
    'application/exe': 'exe',
    'application/gnutar': 'tgz',
    'application/gzip': 'gz',
    'application/hlp': 'hlp',
    'application/inf': 'inf',
    'application/java-archive': 'jar',
    'application/java-byte-code': 'class',
    'application/java': 'class',
    'application/javascript': 'js',
    'application/json': 'json',
    'application/ld+json': 'jsonld',
    'application/lha': 'lha',
    'application/lzx': 'lzx',
    'application/mac-binary': 'bin',
    'application/mac-compactpro': 'cpt',
    'application/macbinary': 'bin',
    'application/mime': 'aps',
    'application/msdos-windows': 'exe',
    'application/mspowerpoint': 'ppt',
    'application/msword': 'doc',
    'application/octet-stream': 'bin',
    'application/ogg': 'ogg',
    'application/pdf': 'pdf',
    'application/plain': 'text',
    'application/postscript': 'ps',
    'application/powerpoint': 'ppt',
    'application/rtf': 'rtf',
    'application/vnd.amazon.ebook': 'azw',
    'application/vnd.apple.installer+xml': 'mpkg',
    'application/vnd.hp-pcl': 'pcl',
    'application/vnd.lotus-1-2-3': '123',
    'application/vnd.microsoft.portable-executable': 'exe',
    'application/vnd.mozilla.xul+xml': 'xul',
    'application/vnd.ms-cab-compressed': 'cab',
    'application/vnd.ms-excel': 'xls',
    'application/vnd.ms-fontobject': 'eot',
    'application/vnd.ms-outlook': 'eml',
    'application/vnd.ms-powerpoint': 'ppt',
    'application/vnd.oasis.opendocument.presentation': 'odp',
    'application/vnd.oasis.opendocument.spreadsheet': 'ods',
    'application/vnd.oasis.opendocument.text': 'odt',
    'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
    'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
    'application/vnd.rar': 'rar',
    'application/vnd.rn-realmedia': 'rm',
    'application/vnd.visio': 'vsd',
    'application/vocaltec-media-desc': 'vmd',
    'application/x-7z-compressed': '7z',
    'application/x-abiword': 'abw',
    'application/x-binary': 'bin',
    'application/x-bsh': 'sh',
    'application/x-bzip': 'bz',
    'application/x-bzip2': 'bz2',
    'application/x-cdlink': 'vcd',
    'application/x-compactpro': 'cpt',
    'application/x-compress': 'z',
    'application/x-compressed': 'gz',
    'application/x-cpt': 'cpt',
    'application/x-csh': 'csh',
    'application/x-dosexec': 'exe',
    'application/x-dvi': 'dvi',
    'application/x-elf-executable': 'elf',
    'application/x-excel': 'xls',
    'application/x-exe': 'exe',
    'application/x-executable': 'elf',
    'application/x-freearc': 'arc',
    'application/x-gzip': 'gz',
    'application/x-helpfile': 'hlp',
    'application/x-httpd-php': 'php',
    'application/x-java-class': 'class',
    'application/x-java-commerce': 'jcm',
    'application/x-javascript': 'js',
    'application/x-latex': 'latex',
    'application/x-lha': 'lha',
    'application/x-lisp': 'lsp',
    'application/x-lzh': 'lzh',
    'application/x-lzx': 'lzx',
    'application/x-macbinary': 'bin',
    'application/x-midi': 'mid',
    'application/x-mplayer2': 'asx',
    'application/x-msdos-program': 'exe',
    'application/x-msdownload': 'exe',
    'application/x-msexcel': 'xls',
    'application/x-msi': 'msi',
    'application/x-mspowerpoint': 'ppt',
    'application/x-navi-animation': 'ani',
    'application/x-pcl': 'pcl',
    'application/x-pointplus': 'css',
    'application/x-rtf': 'rtf',
    'application/x-sh': 'sh',
    'application/x-shar': 'sh',
    'application/x-shockwave-flash': 'swf',
    'application/x-tar': 'tar',
    'application/x-tcl': 'tcl',
    'application/x-troff-man': 'man',
    'application/x-troff-msvideo': 'avi',
    'application/x-visio': 'vsd',
    'application/x-vrml': 'vrml',
    'application/x-winexe': 'exe',
    'application/x-winhelp': 'hlp',
    'application/x-zip-compressed': 'zip',
    'application/x-zoo': 'zoo',
    'application/xhtml+xml': 'xhtml',
    'application/xml': 'xml',
    'application/zip': 'zip',
    'audio/aac': 'aac',
    'audio/aiff': 'aiff',
    'audio/basic': 'au',
    'audio/midi': 'mid',
    'audio/mod': 'mod',
    'audio/mpeg': 'mpg',
    'audio/mpeg3': 'mp3',
    'audio/ogg': 'ogg',
    'audio/opus': 'opus',
    'audio/wav': 'wav',
    'audio/webm': 'webm',
    'audio/x-aiff': 'aiff',
    'audio/x-au': 'au',
    'audio/x-jam': 'jam',
    'audio/x-mid': 'mid',
    'audio/x-midi': 'mid',
    'audio/x-mod': 'mod',
    'audio/x-mpeg-3': 'mp3',
    'audio/x-mpeg': 'mp2',
    'audio/x-mpequrl': 'm3u',
    'audio/x-pn-realaudio-plugin': 'ra',
    'audio/x-pn-realaudio': 'rm',
    'audio/x-realaudio': 'ra',
    'audio/x-wav': 'wav',
    'audio/xm': 'xm',
    'font/otf': 'otf',
    'font/ttf': 'ttf',
    'font/woff': 'woff',
    'font/woff2': 'woff2',
    'image/bmp': 'bmp',
    'image/gif': 'gif',
    'image/jpeg': 'jpg',
    'image/pict': 'pic',
    'image/pjpeg': 'jpg',
    'image/png': 'png',
    'image/svg+xml': 'svg',
    'image/tiff': 'tif',
    'image/vnd.dwg': 'dwg',
    'image/vnd.microsoft.icon': 'ico',
    'image/webp': 'webp',
    'image/x-3ds': '3ds',
    'image/x-dwg': 'dwg',
    'image/x-icon': 'ico',
    'image/x-jg': 'art',
    'image/x-jps': 'jps',
    'image/x-pcx': 'pcx',
    'image/x-pict': 'pct',
    'image/x-quicktime': 'qtif',
    'image/x-tiff': 'tif',
    'image/x-windows-bmp': 'bmp',
    'image/x-xpixmap': 'pm',
    'model/vrml': 'vrml',
    'multipart/x-gzip': 'gz',
    'multipart/x-zip': 'zip',
    'music/crescendo': 'mid',
    'text/asp': 'asp',
    'text/calendar': 'ics',
    'text/css': 'css',
    'text/csv': 'csv',
    'text/ecmascript': 'js',
    'text/html': 'html',
    'text/javascript': 'js',
    'text/pascal': 'pas',
    'text/plain': 'txt',
    'text/richtext': 'rtf',
    'text/sgml': 'sgml',
    'text/uri-list': 'uri',
    'text/webviewhtml': 'htt',
    'text/x-asm': 'asm',
    'text/x-c': 'c',
    'text/x-component': 'htc',
    'text/x-h': 'h',
    'text/x-java-source': 'java',
    'text/x-script.lisp': 'lsp',
    'text/x-script.perl-module': 'pm',
    'text/x-script.perl': 'pl',
    'text/x-script.phyton': 'py',
    'text/x-script.sh': 'sh',
    'text/x-script.tcl': 'tcl',
    'text/x-scriptzsh': 'zsh',
    'text/x-server-parsed-html': 'shtml',
    'text/x-sgml': 'sgml',
    'text/xml': 'xml',
    'video/3gpp': '3gp',
    'video/3gpp2': '3g2',
    'video/avi': 'avi',
    'video/dl': 'dl',
    'video/mp2t': 'ts',
    'video/mp4': 'mp4',
    'video/mpeg': 'mpeg',
    'video/msvideo': 'avi',
    'video/ogg': 'ogg',
    'video/quicktime': 'mov',
    'video/vnd.rn-realvideo': 'rv',
    'video/webm': 'webm',
    'video/x-dl': 'dl',
    'video/x-dv': 'dif',
    'video/x-mpeg': 'mp4',
    'video/x-mpeq2a': 'mp2',
    'video/x-ms-asf-plugin': 'asx',
    'video/x-ms-asf': 'asf',
    'video/x-msvideo': 'avi',
    'video/x-sgi-movie': 'movie',
    'vms/exe': 'exe',
    'windows/metafile': 'wmf',
    'x-conference/x-cooltalk': 'ice',
    'x-music/x-midi': 'mid',
    'x-world/x-3dmf': '3dmf',
    'x-world/x-vrml': 'vrml',
}


def file_extension(mime, default='bin'):
    return FileTypeMap.get(mime, default)


class FileMagicInfo:
    extension: str
    description: str
    mime: str

    _GZIP_PEEK_MAXIMUM = 1024
    _GZIP_PEEK_MINIMUM = 64
    _GZIP_DC_CHUNK_LEN = 16

    def __init__(self, data, default='bin', decompress=True):
        if not magic:
            raise NoMagicAvailable
        if not isinstance(data, bytes):
            data = bytes(data)
        mime = magicparse(data, mime=True)
        self.mime = mime.split(';')[0].lower()
        self.description = magicparse(data)
        try:
            extension = FileTypeMap[self.mime]
        except KeyError:
            extension = default
        if self.description == 'Microsoft OOXML':
            extension = 'docx'
        if extension == 'exe':
            if '(DLL)' in self.description:
                extension = 'dll'
            elif '(native)' in self.description:
                extension = 'sys'
            else:
                extension = 'exe'
        elif extension in ('gz', 'gzip', 'bz2') and decompress:
            if extension == 'bz2':
                import bz2
                dc = bz2.BZ2Decompressor()
            else:
                import zlib
                dc = zlib.decompressobj(0x10)
            mv = memoryview(data)
            cursor = 0
            buffer = bytearray()
            while cursor < len(mv):
                end = cursor + self._GZIP_DC_CHUNK_LEN
                try:
                    buffer.extend(dc.decompress(mv[cursor:end]))
                except Exception:
                    break
                else:
                    cursor = end
                if len(buffer) >= self._GZIP_PEEK_MAXIMUM:
                    break
            if len(buffer) > self._GZIP_PEEK_MINIMUM:
                try:
                    inner = FileMagicInfo(buffer, default).extension
                except Exception:
                    pass
                else:
                    extension = F'{inner}.{extension}'
        self.extension = extension


@functools.lru_cache(maxsize=None)
def get_cached_file_magic_info(data):
    return FileMagicInfo(data)

Functions

def file_extension(mime, default='bin')
Expand source code Browse git
def file_extension(mime, default='bin'):
    return FileTypeMap.get(mime, default)
def get_cached_file_magic_info(data)
Expand source code Browse git
@functools.lru_cache(maxsize=None)
def get_cached_file_magic_info(data):
    return FileMagicInfo(data)

Classes

class NoMagicAvailable (*args, **kwargs)

Module not found.

Expand source code Browse git
class NoMagicAvailable(ModuleNotFoundError):
    pass

Ancestors

  • builtins.ModuleNotFoundError
  • builtins.ImportError
  • builtins.Exception
  • builtins.BaseException
class FileMagicInfo (data, default='bin', decompress=True)
Expand source code Browse git
class FileMagicInfo:
    extension: str
    description: str
    mime: str

    _GZIP_PEEK_MAXIMUM = 1024
    _GZIP_PEEK_MINIMUM = 64
    _GZIP_DC_CHUNK_LEN = 16

    def __init__(self, data, default='bin', decompress=True):
        if not magic:
            raise NoMagicAvailable
        if not isinstance(data, bytes):
            data = bytes(data)
        mime = magicparse(data, mime=True)
        self.mime = mime.split(';')[0].lower()
        self.description = magicparse(data)
        try:
            extension = FileTypeMap[self.mime]
        except KeyError:
            extension = default
        if self.description == 'Microsoft OOXML':
            extension = 'docx'
        if extension == 'exe':
            if '(DLL)' in self.description:
                extension = 'dll'
            elif '(native)' in self.description:
                extension = 'sys'
            else:
                extension = 'exe'
        elif extension in ('gz', 'gzip', 'bz2') and decompress:
            if extension == 'bz2':
                import bz2
                dc = bz2.BZ2Decompressor()
            else:
                import zlib
                dc = zlib.decompressobj(0x10)
            mv = memoryview(data)
            cursor = 0
            buffer = bytearray()
            while cursor < len(mv):
                end = cursor + self._GZIP_DC_CHUNK_LEN
                try:
                    buffer.extend(dc.decompress(mv[cursor:end]))
                except Exception:
                    break
                else:
                    cursor = end
                if len(buffer) >= self._GZIP_PEEK_MAXIMUM:
                    break
            if len(buffer) > self._GZIP_PEEK_MINIMUM:
                try:
                    inner = FileMagicInfo(buffer, default).extension
                except Exception:
                    pass
                else:
                    extension = F'{inner}.{extension}'
        self.extension = extension

Class variables

var extension
var description
var mime