Module refinery.units.encoding.url
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from urllib.parse import unquote_to_bytes
from refinery.units import Arg, Unit
class url(Unit):
"""
Decodes and encodes URL-encoding, which preserves only alphanumeric characters and the
following symbols: `_`, `.`, `-`, `~`, `\\`, `/`. Every other character is escaped by
hex-encoding it and prefixing it with a percent symbol.
"""
def __init__(
self,
plus: Arg.Switch('-p', help='also replace plus signs by spaces') = False,
hex : Arg.Switch('-x', help='hex encode every character in reverse mode') = False
):
super().__init__(plus=plus, hex=hex)
def process(self, data):
if self.args.plus:
data = data.replace(B'+', B' ')
data = unquote_to_bytes(bytes(data))
data = re.sub(
B'%[uU]([0-9a-fA-F]{4})',
lambda m: int(m[1], 16).to_bytes(2, 'little'),
data)
return data
def reverse(self, data):
if self.args.hex:
result = bytearray(len(data) * 3)
offset = 0
for byte in data:
result[offset + 0] = 0x25
offset += 1
result[offset:offset + 2] = B'%02X' % byte
offset += 2
return result
elif self.args.plus:
def replace(m):
c = m[0][0]
return b'+' if c == 0x20 else B'%%%02X' % c
else:
def replace(m):
return B'%%%02X' % m[0][0]
return re.sub(B'[^a-zA-Z0-9_.-~\\/]', replace, data)
Classes
class url (plus=False, hex=False)
-
Decodes and encodes URL-encoding, which preserves only alphanumeric characters and the following symbols:
_
,.
,-
,~
,\
,/
. Every other character is escaped by hex-encoding it and prefixing it with a percent symbol.Expand source code Browse git
class url(Unit): """ Decodes and encodes URL-encoding, which preserves only alphanumeric characters and the following symbols: `_`, `.`, `-`, `~`, `\\`, `/`. Every other character is escaped by hex-encoding it and prefixing it with a percent symbol. """ def __init__( self, plus: Arg.Switch('-p', help='also replace plus signs by spaces') = False, hex : Arg.Switch('-x', help='hex encode every character in reverse mode') = False ): super().__init__(plus=plus, hex=hex) def process(self, data): if self.args.plus: data = data.replace(B'+', B' ') data = unquote_to_bytes(bytes(data)) data = re.sub( B'%[uU]([0-9a-fA-F]{4})', lambda m: int(m[1], 16).to_bytes(2, 'little'), data) return data def reverse(self, data): if self.args.hex: result = bytearray(len(data) * 3) offset = 0 for byte in data: result[offset + 0] = 0x25 offset += 1 result[offset:offset + 2] = B'%02X' % byte offset += 2 return result elif self.args.plus: def replace(m): c = m[0][0] return b'+' if c == 0x20 else B'%%%02X' % c else: def replace(m): return B'%%%02X' % m[0][0] return re.sub(B'[^a-zA-Z0-9_.-~\\/]', replace, data)
Ancestors
Class variables
var required_dependencies
var optional_dependencies
Inherited members