from math import log
from gzip import compress, decompress
MAP = []
# CJK (Chinese characters)
ranges = [range(0x4E00, 0x9FFF),
range(0x3400, 0x4DBF),
range(0x20000, 0x2A6DF),
range(0x2A700, 0x2B73F),
range(0x2B740, 0x2B81F),
range(0x2B820, 0x2CEAF),
range(0x2CEB0, 0x2EBEF),
range(0x30000, 0x3134F)]
MAP += [chr(x) for y in ranges for x in y]
# Some Emoticons
#MAP += [chr(x) for x in range(0x1F600, 0x1F64F)]
BASE = int(log(len(MAP)+1, 2))
GZIP=True
def encode(input, gzip=False):
if gzip:
result = compress(input.encode())
result = [bin(int(x))[2:] for x in result]
else:
result = [bin(ord(x))[2:] for x in input]
result = ["0"*(8 - len(s)) + s for s in result]
result = "".join(result)
result = [result[i:i+BASE] for i in range(0, len(result), BASE)]
result = [s + "0"*(BASE - len(s)) for s in result]
result = [int(x,2) for x in result]
result = [MAP[x] for x in result]
return "".join(result)
def decode(input, gzip=False):
result = [MAP.index(x) for x in input]
if gzip:
result = [bin(x)[2:] for x in result]
else:
result = [MAP.index(x) for x in input]
result = [bin(x)[2:] for x in result if not x==0]
result = ["0"*(BASE - len(s)) + s for s in result]
result = "".join(result)
result = [result[i:i+8] for i in range(0, len(result), 8)]
result = [s + "0"*(8 - len(s)) for s in result]
if gzip:
result = bytes([int(x, 2) for x in result])
return decompress(result).decode()
result = [chr(int(x, 2)) for x in result if not int(x, 2)==0]
return "".join(result)
def test():
text = ( 'This is a longer text encoded in CJK characters. This way,'
' messages of more than 150 European characters can be sent through'
' Twitter. Is\'t that great!? To add more characters in this demo:'
' äÖüéß@%'
'\n[-space for more chars-]'
'\nWith gzip enabled, even >8-bit Unicode characters are supported:'
' € (EUR)' )
#print("INPUT:", "\n"+text, "\n(", len(text), "chars )")
encoded = encode(text, GZIP)
#print("ENCODED:", "\n"+encoded, "\n(", len(encoded), "chars )")
decoded = decode(encoded, GZIP)
#print("DECODED:", "\n"+decoded, "\n(", len(decoded), "chars )")
assert text == decode(encode(text, True), True)
if __name__ == "__main__":
test()
text = input("Reading message from input: ")
print("INPUT:", "\n"+text, "\n(", len(text), "chars )")
decoded = decode(text, GZIP)
print("DECODED:", "\n"+decoded, "\n(", len(decoded), "chars )")