# 1. Paste this into https://replit.com/languages/python3 and hit the run-button
# 2. Scroll to the bottom to individualize
from math import log
from gzip import compress, decompress
MAP = []
# Some Emoticons
# MAP += [chr(x) for x in range(0x1F600, 0x1F64F)]
# Geometrics
# MAP += [chr(x) for x in range(0x1F780, 0x1F7FF)]
# CJK (Chinese characters)
ranges = [range(0x4E00, 0x9FFF), range(0x3400, 0x4DBF), range(0x20000, 0x2A6DF), range(0x2A700, 0x2B73F), range(0x2B740, 0x2B81F), range(0x2B820, 0x2CEAF), range(0x2CEB0, 0x2EBEF), range(0x30000, 0x3134F)]
MAP = [chr(x) for y in ranges for x in y]
BASE = int(log(len(MAP)+1, 2))
def encode(input, gzip=False):
if gzip:
result = compress(input.encode())
result = [bin(int(x))[2:] for x in result]
else:
result = [bin(ord(x))[2:] for x in input]
result = ["0"*(8 - len(s)) + s for s in result]
result = "".join(result)
result = [result[i:i+BASE] for i in range(0, len(result), BASE)]
result = [s + "0"*(BASE - len(s)) for s in result]
result = [int(x,2) for x in result]
result = [MAP[x] for x in result]
return "".join(result)
def decode(input, gzip=False):
result = [MAP.index(x) for x in input]
if gzip:
result = [bin(x)[2:] for x in result]
else:
result = [MAP.index(x) for x in input]
result = [bin(x)[2:] for x in result if not x==0]
result = ["0"*(BASE - len(s)) + s for s in result]
result = "".join(result)
result = [result[i:i+8] for i in range(0, len(result), 8)]
result = [s + "0"*(8 - len(s)) for s in result]
if gzip:
result = bytes([int(x, 2) for x in result])
return decompress(result).decode()
result = [chr(int(x, 2)) for x in result if not int(x, 2)==0]
return "".join(result)
GZIP=True
input = 'This is a longer text encoded in CJK characters. This way, messages of more than 150 European characters can be sent through Twitter. Is\'t that great!? To add more characters in this demo: äÖüéß@%\n[-space for more chars-]\nWith gzip enabled, even >8-bit Unicode characters are supported: € (EUR)'
print("INPUT:", "\n"+input, "\n(", len(input), "chars )")
encoded = encode(input, GZIP)
print("ENCODED:", "\n"+encoded, "\n(", len(encoded), "chars )")
decoded = decode(encoded, GZIP)
print("DECODED:", "\n"+decoded, "\n(", len(decoded), "chars )")