Module refinery.units.misc.datefix

Expand source code Browse git
from __future__ import annotations

import re

from datetime import datetime, timedelta

from refinery.units import Arg, Unit
from refinery.lib.decorators import linewise
from refinery.lib.tools import date_from_timestamp


_DATETIME_PATTERNS = {
    '%m/%d/%Y',
    '%a %b %d %Y %H:%M:%S',
    '%Y:%m:%d %H:%M:%S',
}
for comma in (',', ''):
    _DATETIME_PATTERNS.add(F'%m/%d/%Y{comma} %H:%M:%S')
    for month_name in ('%B', '%b'):
        for suffix in ('st', 'nd', 'rd', 'th', ''):
            _DATETIME_PATTERNS.add(F'{month_name} %d{suffix} %Y{comma} %H:%M:%S')
            _DATETIME_PATTERNS.add(F'{month_name} %d{comma} %Y')
            for day_name in ('%a', '%A'):
                # Wed, 20 Aug 2025 00:56:59
                _DATETIME_PATTERNS.add(F'{day_name}{comma} %d{suffix} {month_name} %Y %H:%M:%S')
                # Wed Mar 31 00:00:00 UTC 2027
                _DATETIME_PATTERNS.add(F'{day_name}{comma} {month_name} %d{suffix} %Y %H:%M:%S')
                _DATETIME_PATTERNS.add(F'{day_name}{comma} {month_name} %d{suffix} %H:%M:%S %Y')

for timesep in ('T', ' '):
    for millisecs in ('Z%f', '.%f', ''):
        _DATETIME_PATTERNS.add(F'%Y-%m-%d{timesep}%H:%M:%S{millisecs}')

_DATETIME_PATTERNS = sorted(_DATETIME_PATTERNS)
_TIMEZONE_PATTERN = R'''(?x)(?:
    (?:\(?(?:GMT|UTC)\)?)?
    (?P<info>
        (?P<p> [+-] )
        (?P<h> \d\d ):? (?![-T]|\s\d)
        (?P<m> \d\d )?
        (?:\s\([A-Z]{2,6}\))?
    )|
    (?P<name>\(?(?:GMT|UTC)\)?)
)
'''


class datefix(Unit):
    """
    Parses all kinds of date formats and unifies them into the same format.
    """

    def __init__(
        self,
        format: Arg(help='Specify the output format as a strftime-like string, using ISO by default.') = '%Y-%m-%d %H:%M:%S',
        dos: Arg('-d', help='Parse timestamps in DOS rather than Unix format.') = False
    ):
        super().__init__(format=format, dos=dos)

    @staticmethod
    def dostime(stamp: int) -> datetime:
        """
        Parses a given DOS timestamp into a datetime object.
        """
        d, t = stamp >> 16, stamp & 0xFFFF
        s = (t & 0x1F) << 1

        return datetime(
            year   = ((d & 0xFE00) >> 0x9) + 1980,  # noqa
            month  = ((d & 0x01E0) >> 0x5),         # noqa
            day    = ((d & 0x001F) >> 0x0),         # noqa
            hour   = ((t & 0xF800) >> 0xB),         # noqa
            minute = ((t & 0x07E0) >> 0x5),         # noqa
            second = 59 if s == 60 else s,          # noqa
        )

    def _format(self, dt: datetime) -> str:
        return dt.strftime(self.args.format)

    def _extract_timezone(self, data: str):
        def extract(match: re.Match[str]):
            nonlocal zone
            if zone is not None:
                raise ValueError
            h = int(h) if (h := match['h']) else 0
            m = int(m) if (m := match['m']) else 0
            zone = timedelta(hours=h, minutes=m)
            if match['p'] == '-':
                zone = -zone
            return ''
        zone = None
        data = re.sub(_TIMEZONE_PATTERN, extract, data)
        data = re.sub('\\s{2,}', ' ', data).strip()
        return data, zone

    @linewise
    def process(self, data: str) -> str:
        data = data.strip()

        # replace colons (i.e. for exiftool dates: 2017:01:01)
        if len(data) > 10 and data[4] == ':' and data[7] == ':':
            data = F'{data[0:4]}-{data[5:7]}-{data[8:]}'

        # strips Z at end (i.e. 20171022055144Z)
        if data.endswith('Z'):
            data = data[:-1]

        if data.startswith('0x'):
            try:
                data = str(int(data, 16))
            except Exception:
                pass

        # parses timestamps and dates without much format
        if data.isdigit():
            time_stamp = int(data)
            if len(data) > 14:
                raise Exception('cannot parse all-numeric string as date: %s' % data)
            elif len(data) == 14:
                # i.e. 20111020193727
                return self._format(datetime.strptime(data, '%Y%m%d%H%M%S'))
            elif len(data) == 13:
                # i.e. 1458016535000
                time_stamp //= 1000
                data = data[:-3]
            if self.args.dos:
                return self._format(self.dostime(time_stamp))
            else:
                return self._format(date_from_timestamp(time_stamp))

        try:
            data, time_delta = self._extract_timezone(data)
        except ValueError:
            return data

        for f in _DATETIME_PATTERNS:
            try:
                dt = datetime.strptime(data, f)
            except ValueError:
                continue
            return self._format(dt if time_delta is None else dt - time_delta)

        return data

Classes

class datefix (format='%Y-%m-%d %H:%M:%S', dos=False)

Parses all kinds of date formats and unifies them into the same format.

Expand source code Browse git
class datefix(Unit):
    """
    Parses all kinds of date formats and unifies them into the same format.
    """

    def __init__(
        self,
        format: Arg(help='Specify the output format as a strftime-like string, using ISO by default.') = '%Y-%m-%d %H:%M:%S',
        dos: Arg('-d', help='Parse timestamps in DOS rather than Unix format.') = False
    ):
        super().__init__(format=format, dos=dos)

    @staticmethod
    def dostime(stamp: int) -> datetime:
        """
        Parses a given DOS timestamp into a datetime object.
        """
        d, t = stamp >> 16, stamp & 0xFFFF
        s = (t & 0x1F) << 1

        return datetime(
            year   = ((d & 0xFE00) >> 0x9) + 1980,  # noqa
            month  = ((d & 0x01E0) >> 0x5),         # noqa
            day    = ((d & 0x001F) >> 0x0),         # noqa
            hour   = ((t & 0xF800) >> 0xB),         # noqa
            minute = ((t & 0x07E0) >> 0x5),         # noqa
            second = 59 if s == 60 else s,          # noqa
        )

    def _format(self, dt: datetime) -> str:
        return dt.strftime(self.args.format)

    def _extract_timezone(self, data: str):
        def extract(match: re.Match[str]):
            nonlocal zone
            if zone is not None:
                raise ValueError
            h = int(h) if (h := match['h']) else 0
            m = int(m) if (m := match['m']) else 0
            zone = timedelta(hours=h, minutes=m)
            if match['p'] == '-':
                zone = -zone
            return ''
        zone = None
        data = re.sub(_TIMEZONE_PATTERN, extract, data)
        data = re.sub('\\s{2,}', ' ', data).strip()
        return data, zone

    @linewise
    def process(self, data: str) -> str:
        data = data.strip()

        # replace colons (i.e. for exiftool dates: 2017:01:01)
        if len(data) > 10 and data[4] == ':' and data[7] == ':':
            data = F'{data[0:4]}-{data[5:7]}-{data[8:]}'

        # strips Z at end (i.e. 20171022055144Z)
        if data.endswith('Z'):
            data = data[:-1]

        if data.startswith('0x'):
            try:
                data = str(int(data, 16))
            except Exception:
                pass

        # parses timestamps and dates without much format
        if data.isdigit():
            time_stamp = int(data)
            if len(data) > 14:
                raise Exception('cannot parse all-numeric string as date: %s' % data)
            elif len(data) == 14:
                # i.e. 20111020193727
                return self._format(datetime.strptime(data, '%Y%m%d%H%M%S'))
            elif len(data) == 13:
                # i.e. 1458016535000
                time_stamp //= 1000
                data = data[:-3]
            if self.args.dos:
                return self._format(self.dostime(time_stamp))
            else:
                return self._format(date_from_timestamp(time_stamp))

        try:
            data, time_delta = self._extract_timezone(data)
        except ValueError:
            return data

        for f in _DATETIME_PATTERNS:
            try:
                dt = datetime.strptime(data, f)
            except ValueError:
                continue
            return self._format(dt if time_delta is None else dt - time_delta)

        return data

Ancestors

Subclasses

Class variables

var required_dependencies
var optional_dependencies
var console
var reverse

Static methods

def dostime(stamp)

Parses a given DOS timestamp into a datetime object.

Expand source code Browse git
@staticmethod
def dostime(stamp: int) -> datetime:
    """
    Parses a given DOS timestamp into a datetime object.
    """
    d, t = stamp >> 16, stamp & 0xFFFF
    s = (t & 0x1F) << 1

    return datetime(
        year   = ((d & 0xFE00) >> 0x9) + 1980,  # noqa
        month  = ((d & 0x01E0) >> 0x5),         # noqa
        day    = ((d & 0x001F) >> 0x0),         # noqa
        hour   = ((t & 0xF800) >> 0xB),         # noqa
        minute = ((t & 0x07E0) >> 0x5),         # noqa
        second = 59 if s == 60 else s,          # noqa
    )

Inherited members