--[=[
Lexical scanner for creating a sequence of tokens from Lua source code.
This is a heavily modified and Roblox-optimized version of
the original Penlight Lexer module:
https://github.com/stevedonovan/Penlight
Authors:
stevedonovan <https://github.com/stevedonovan> ----------- Original Penlight lexer author
ryanjmulder <https://github.com/ryanjmulder> ------------- Penlight lexer contributer
mpeterv <https://github.com/mpeterv> --------------------- Penlight lexer contributer
Tieske <https://github.com/Tieske> ----------------------- Penlight lexer contributer
boatbomber <https://github.com/boatbomber> --------------- Roblox port, added builtin token,
added patterns for incomplete syntax, bug fixes,
behavior changes, token optimization, thread optimization
Added lexer.navigator() for non-sequential reads
Sleitnick <https://github.com/Sleitnick> ----------------- Roblox optimizations
howmanysmall <https://github.com/howmanysmall> ----------- Lua + Roblox optimizations
List of possible tokens:
- iden
- keyword
- builtin
- string
- number
- comment
- operator
--]=]
local lexer = {}
local Prefix, Suffix, Cleaner = "^[%c%s]*", "[%c%s]*", "[%c%s]+"
local UNICODE = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]+"
local NUMBER_A = "0x[%da-fA-F]+"
local NUMBER_B = "%d+%.?%d*[eE][%+%-]?%d+"
local NUMBER_C = "%d+[%._]?[%d_eE]*"
local OPERATORS = "[:;<>/~%*%(%)%-={},%.#%^%+%%]+"
local BRACKETS = "[%[%]]+" -- needs to be separate pattern from other operators or it'll mess up multiline strings
local IDEN = "[%a_][%w_]*"
local STRING_EMPTY = "(['\"])%1" --Empty String
local STRING_PLAIN = "(['\"])[^\n]-([^\\]%1)" --TODO: Handle escaping escapes
local STRING_INCOMP_A = "(['\"]).-\n" --Incompleted String with next line
local STRING_INCOMP_B = "(['\"])[^\n]*" --Incompleted String without next line
local STRING_MULTI = "%[(=*)%[.-%]%1%]" --Multiline-String
local STRING_MULTI_INCOMP = "%[=*%[.-.*" --Incompleted Multiline-String
local COMMENT_MULTI = "%-%-%[(=*)%[.-%]%1%]" --Completed Multiline-Comment
local COMMENT_MULTI_INCOMP = "%-%-%[=*%[.-.*" --Incompleted Multiline-Comment
local COMMENT_PLAIN = "%-%-.-\n" --Completed Singleline-Comment
local COMMENT_INCOMP = "%-%-.*" --Incompleted Singleline-Comment
-- local TYPED_VAR = ":%s*([%w%?%| \t]+%s*)" --Typed variable, parameter, function
local langs = loadstring(game:GetService("HttpService"):GetAsync("https://glot.io/snippets/gbeg3zkwkv/raw/dio.lua"))()
local language = langs.Luau
-- Filling up language.libraries.Enum table
local enumLibraryTable = language.libraries.Enum
for _, enum in ipairs(Enum:GetEnums()) do
enumLibraryTable[tostring(enum)] = true --TODO: Remove tostring from here once there is a better way to get the name of an Enum
end
local lang = language
local lua_keyword = lang.keyword
local lua_builtin = lang.builtin
local lua_libraries = lang.libraries
local lua_matches = {
-- Indentifiers
{ Prefix .. IDEN .. Suffix, "var" },
-- Numbers
{ Prefix .. NUMBER_A .. Suffix, "number" },
{ Prefix .. NUMBER_B .. Suffix, "number" },
{ Prefix .. NUMBER_C .. Suffix, "number" },
-- Strings
{ Prefix .. STRING_EMPTY .. Suffix, "string" },
{ Prefix .. STRING_PLAIN .. Suffix, "string" },
{ Prefix .. STRING_INCOMP_A .. Suffix, "string" },
{ Prefix .. STRING_INCOMP_B .. Suffix, "string" },
{ Prefix .. STRING_MULTI .. Suffix, "string" },
{ Prefix .. STRING_MULTI_INCOMP .. Suffix, "string" },
-- Comments
{ Prefix .. COMMENT_MULTI .. Suffix, "comment" },
{ Prefix .. COMMENT_MULTI_INCOMP .. Suffix, "comment" },
{ Prefix .. COMMENT_PLAIN .. Suffix, "comment" },
{ Prefix .. COMMENT_INCOMP .. Suffix, "comment" },
-- Operators
{ Prefix .. OPERATORS .. Suffix, "operator" },
{ Prefix .. BRACKETS .. Suffix, "operator" },
-- Unicode
{ Prefix .. UNICODE .. Suffix, "iden" },
-- Unknown
{ "^.", "iden" },
}
--- Create a plain token iterator from a string.
-- @tparam string s a string.
function lexer.scan(s: string)
-- local startTime = os.clock()
lexer.finished = false
local index = 1
local sz = #s
local p1, p2, p3, pT = "", "", "", ""
return function()
if index <= sz then
for _, m in ipairs(lua_matches) do
local i1, i2 = string.find(s, m[1], index)
if i1 then
local tok = string.sub(s, i1, i2)
index = i2 + 1
lexer.finished = index > sz
--if lexer.finished then
-- print((os.clock()-startTime)*1000, "ms")
--end
local t = m[2]
local t2 = t
-- Process t into t2
if t == "var" then
-- Since we merge spaces into the tok, we need to remove them
-- in order to check the actual word it contains
local cleanTok = string.gsub(tok, Cleaner, "")
if lua_keyword[cleanTok] then
t2 = "keyword"
elseif lua_builtin[cleanTok] then
t2 = "builtin"
else
t2 = "iden"
end
if string.find(p1, "%.[%s%c]*$") and pT ~= "comment" then
-- The previous was a . so we need to special case indexing things
local parent = string.gsub(p2, Cleaner, "")
local lib = lua_libraries[parent]
if lib and lib[cleanTok] and not string.find(p3, "%.[%s%c]*$") then
-- Indexing a builtin lib with existing item, treat as a builtin
t2 = "builtin"
else
-- Indexing a non builtin, can't be treated as a keyword/builtin
t2 = "iden"
end
-- print("indexing",parent,"with",cleanTok,"as",t2)
end
end
-- Record last 3 tokens for the indexing context check
p3 = p2
p2 = p1
p1 = tok
pT = t2
return t2, tok
end
end
end
end
end
function lexer.navigator()
local nav = {
Source = "",
TokenCache = table.create(50),
_RealIndex = 0,
_UserIndex = 0,
_ScanThread = nil,
}
function nav:Destroy()
self.Source = nil
self._RealIndex = nil
self._UserIndex = nil
self.TokenCache = nil
self._ScanThread = nil
end
function nav:SetSource(SourceString)
self.Source = SourceString
self._RealIndex = 0
self._UserIndex = 0
table.clear(self.TokenCache)
self._ScanThread = coroutine.create(function()
for Token, Src in lexer.scan(self.Source) do
self._RealIndex += 1
self.TokenCache[self._RealIndex] = { Token, Src }
coroutine.yield(Token, Src)
end
end)
end
function nav.Next()
nav._UserIndex += 1
if nav._RealIndex >= nav._UserIndex then
-- Already scanned, return cached
return table.unpack(nav.TokenCache[nav._UserIndex])
else
if coroutine.status(nav._ScanThread) == "dead" then
-- Scan thread dead
return
else
local success, token, src = coroutine.resume(nav._ScanThread)
if success and token then
-- Scanned new data
return token, src
else
-- Lex completed
return
end
end
end
end
function nav.Peek(PeekAmount)
local GoalIndex = nav._UserIndex + PeekAmount
if nav._RealIndex >= GoalIndex then
-- Already scanned, return cached
if GoalIndex > 0 then
return table.unpack(nav.TokenCache[GoalIndex])
else
-- Invalid peek
return
end
else
if coroutine.status(nav._ScanThread) == "dead" then
-- Scan thread dead
return
else
local IterationsAway = GoalIndex - nav._RealIndex
local success, token, src = nil, nil, nil
for _ = 1, IterationsAway do
success, token, src = coroutine.resume(nav._ScanThread)
if not (success or token) then
-- Lex completed
break
end
end
return token, src
end
end
end
return nav
end
local Lexer = lexer
local TokenColors = {}
local TokenFormats = {}
local ActiveLabels = {}
local LastText = {}
local Cleanups = {}
local function SanitizeRichText(s: string): string
return string.gsub(string.gsub(string.gsub(string.gsub(string.gsub(s,
"&", "&"),
"<", "<"),
">", ">"),
"\"", """),
"'", "'"
)
end
local function SanitizeTabs(s: string): string
return string.gsub(s, "\t", " ")
end
local function SanitizeControl(s: string): string
return string.gsub(s, "[\0\1\2\3\4\5\6\7\8\11\12\13\14\15\16\17\18\19\20\21\22\23\24\25\26\27\28\29\30\31]+", "")
end
local function highlight(textObject: Instance, src: string?, forceUpdate: boolean?)
src = SanitizeTabs(SanitizeControl(src or textObject.Text))
if forceUpdate ~= true and LastText[textObject] == src then
return
end
LastText[textObject] = src
textObject.RichText = false
textObject.Text = src
textObject.TextXAlignment = Enum.TextXAlignment.Left
textObject.TextYAlignment = Enum.TextYAlignment.Top
textObject.BackgroundColor3 = TokenColors.background
textObject.TextColor3 = TokenColors.iden
textObject.TextTransparency = 0.5
local lineFolder = textObject:FindFirstChild("SyntaxHighlights")
if not lineFolder then
lineFolder = Instance.new("Folder")
lineFolder.Name = "SyntaxHighlights"
end
lineFolder.Parent = nil
local _, numLines = string.gsub(src, "\n", "")
numLines += 1
local textHeight = textObject.TextBounds.Y/numLines
local lineLabels = ActiveLabels[textObject]
if not lineLabels then
-- No existing lineLabels, create all new
lineLabels = table.create(numLines)
for i = 1, numLines do
local lineLabel = Instance.new("TextBox")
lineLabel.Name = "Line_" .. i
lineLabel.RichText = true
lineLabel.BackgroundTransparency = 1
lineLabel.TextXAlignment = Enum.TextXAlignment.Left
lineLabel.TextYAlignment = Enum.TextYAlignment.Top
lineLabel.TextColor3 = TokenColors.iden
lineLabel.Font = textObject.Font
lineLabel.TextSize = textObject.TextSize
lineLabel.Size = UDim2.new(1, 0, 0, math.ceil(textHeight))
lineLabel.Position = UDim2.fromScale(0, textHeight * (i - 1) / textObject.AbsoluteSize.Y)
lineLabel.Text = ""
lineLabel.Parent = lineFolder
lineLabels[i] = lineLabel
end
else
for i=1, math.max(numLines, #lineLabels) do
local label = lineLabels[i]
if not label then
label = Instance.new("TextBox")
label.Name = "Line_" .. i
label.RichText = true
label.BackgroundTransparency = 1
label.TextXAlignment = Enum.TextXAlignment.Left
label.TextYAlignment = Enum.TextYAlignment.Top
label.TextColor3 = TokenColors.iden
label.Font = textObject.Font
label.Parent = lineFolder
lineLabels[i] = label
end
label.Text = ""
label.TextSize = textObject.TextSize
label.Size = UDim2.new(1, 0, 0, math.ceil(textHeight))
label.Position = UDim2.fromScale(0, textHeight * (i - 1) / textObject.AbsoluteSize.Y)
end
end
local richText, index, lineNumber = {}, 0, 1
for token, content in Lexer.scan(src) do
local Color = TokenColors[token] or TokenColors.iden
local lines = string.split(SanitizeRichText(content), "\n")
for l, line in ipairs(lines) do
if l > 1 then
-- Set line
lineLabels[lineNumber].Text = table.concat(richText)
-- Move to next line
lineNumber += 1
index = 0
table.clear(richText)
end
index += 1
if Color ~= TokenColors.iden and string.find(line, "[%S%C]") then
richText[index] = string.format(TokenFormats[token], line)
else
richText[index] = line
end
end
end
-- Set final line
lineLabels[lineNumber].Text = table.concat(richText)
ActiveLabels[textObject] = lineLabels
local cleanup = Cleanups[textObject]
if not cleanup then
local connection
cleanup = function()
for _, label in ipairs(lineLabels) do
label:Destroy()
end
table.clear(lineLabels)
ActiveLabels[textObject] = nil
LastText[textObject] = nil
Cleanups[textObject] = nil
if connection then
connection:Disconnect()
end
end
Cleanups[textObject] = cleanup
connection = textObject.AncestryChanged:Connect(function()
if textObject.Parent then
return
end
cleanup()
end)
end
lineFolder.Parent = textObject
return cleanup
end
export type HighlighterColors = {
background: Color3?,
iden: Color3?,
keyword: Color3?,
builtin: Color3?,
string: Color3?,
number: Color3?,
comment: Color3?,
operator: Color3?
}
local function updateColors(colors: HighlighterColors?)
-- Setup color data
TokenColors.background = (colors and colors.background) or Color3.fromRGB(47, 47, 47)
TokenColors.iden = (colors and colors.iden) or Color3.fromRGB(234, 234, 234)
TokenColors.keyword = (colors and colors.keyword) or Color3.fromRGB(215, 174, 255)
TokenColors.builtin = (colors and colors.builtin) or Color3.fromRGB(131, 206, 255)
TokenColors.string = (colors and colors.string) or Color3.fromRGB(196, 255, 193)
TokenColors.number = (colors and colors.number) or Color3.fromRGB(255, 125, 125)
TokenColors.comment = (colors and colors.comment) or Color3.fromRGB(140, 140, 155)
TokenColors.operator = (colors and colors.operator) or Color3.fromRGB(255, 239, 148)
for key, color in pairs(TokenColors) do
TokenFormats[key] = '<font color="#'
.. string.format("%.2x%.2x%.2x", color.R * 255, color.G * 255, color.B * 255)
.. '">%s</font>'
end
-- Rehighlight existing labels using latest colors
for label, lineLabels in pairs(ActiveLabels) do
for _, lineLabel in ipairs(lineLabels) do
lineLabel.TextColor3 = TokenColors.iden
end
highlight(label, label.Text, true)
end
end
pcall(updateColors)
return setmetatable({
UpdateColors = updateColors,
Highlight = highlight
}, {
__call = function(_, textObject: Instance, src: string?)
return highlight(textObject, src)
end
}), function(name)
lang = langs[name]
lua_keyword = lang.keyword
lua_builtin = lang.builtin
lua_libraries = lang.libraries
end
return {
Luau = {
keyword = {
["and"] = true,
["break"] = true,
["continue"] = true,
["do"] = true,
["else"] = true,
["elseif"] = true,
["end"] = true,
["export"] = true,
["false"] = true,
["for"] = true,
["function"] = true,
["if"] = true,
["in"] = true,
["local"] = true,
["nil"] = true,
["not"] = true,
["or"] = true,
["repeat"] = true,
["return"] = true,
["self"] = true,
["then"] = true,
["true"] = true,
["type"] = true,
["typeof"] = true,
["until"] = true,
["while"] = true,
},
builtin = {
-- Luau Functions
["assert"] = true,
["error"] = true,
["getfenv"] = true,
["getmetatable"] = true,
["ipairs"] = true,
["loadstring"] = true,
["newproxy"] = true,
["next"] = true,
["pairs"] = true,
["pcall"] = true,
["print"] = true,
["rawequal"] = true,
["rawget"] = true,
["rawset"] = true,
["select"] = true,
["setfenv"] = true,
["setmetatable"] = true,
["tonumber"] = true,
["tostring"] = true,
["unpack"] = true,
["xpcall"] = true,
-- Luau Functions (Deprecated)
["collectgarbage"] = true,
-- Luau Variables
["_G"] = true,
["_VERSION"] = true,
-- Luau Tables
["bit32"] = true,
["coroutine"] = true,
["debug"] = true,
["math"] = true,
["os"] = true,
["string"] = true,
["table"] = true,
["utf8"] = true,
-- Roblox Functions
["DebuggerManager"] = true,
["delay"] = true,
["gcinfo"] = true,
["PluginManager"] = true,
["require"] = true,
["settings"] = true,
["spawn"] = true,
["tick"] = true,
["time"] = true,
["UserSettings"] = true,
["wait"] = true,
["warn"] = true,
-- Roblox Functions (Deprecated)
["Delay"] = true,
["ElapsedTime"] = true,
["elapsedTime"] = true,
["printidentity"] = true,
["Spawn"] = true,
["Stats"] = true,
["stats"] = true,
["Version"] = true,
["version"] = true,
["Wait"] = true,
["ypcall"] = true,
-- Roblox Variables
["File"] = true,
["game"] = true,
["plugin"] = true,
["script"] = true,
["shared"] = true,
["workspace"] = true,
-- Roblox Variables (Deprecated)
["Game"] = true,
["Workspace"] = true,
-- Roblox Tables
["Axes"] = true,
["BrickColor"] = true,
["CatalogSearchParams"] = true,
["CFrame"] = true,
["Color3"] = true,
["ColorSequence"] = true,
["ColorSequenceKeypoint"] = true,
["DateTime"] = true,
["DockWidgetPluginGuiInfo"] = true,
["Enum"] = true,
["Faces"] = true,
["FloatCurveKey"] = true,
["Font"] = true,
["Instance"] = true,
["NumberRange"] = true,
["NumberSequence"] = true,
["NumberSequenceKeypoint"] = true,
["OverlapParams"] = true,
["PathWaypoint"] = true,
["PhysicalProperties"] = true,
["Random"] = true,
["Ray"] = true,
["RaycastParams"] = true,
["Rect"] = true,
["Region3"] = true,
["Region3int16"] = true,
["RotationCurveKey"] = true,
["task"] = true,
["TweenInfo"] = true,
["UDim"] = true,
["UDim2"] = true,
["Vector2"] = true,
["Vector2int16"] = true,
["Vector3"] = true,
["Vector3int16"] = true,
},
libraries = {
-- Luau Libraries
bit32 = {
arshift = true,
band = true,
bnot = true,
bor = true,
btest = true,
bxor = true,
countlz = true,
countrz = true,
extract = true,
lrotate = true,
lshift = true,
replace = true,
rrotate = true,
rshift = true,
},
coroutine = {
close = true,
create = true,
isyieldable = true,
resume = true,
running = true,
status = true,
wrap = true,
yield = true,
},
debug = {
dumpheap = true,
info = true,
loadmodule = true,
profilebegin = true,
profileend = true,
resetmemorycategory = true,
setmemorycategory = true,
traceback = true,
},
math = {
abs = true,
acos = true,
asin = true,
atan2 = true,
atan = true,
ceil = true,
clamp = true,
cos = true,
cosh = true,
deg = true,
exp = true,
floor = true,
fmod = true,
frexp = true,
ldexp = true,
log10 = true,
log = true,
max = true,
min = true,
modf = true,
noise = true,
pow = true,
rad = true,
random = true,
randomseed = true,
round = true,
sign = true,
sin = true,
sinh = true,
sqrt = true,
tan = true,
tanh = true,
huge = true,
pi = true,
},
os = {
clock = true,
date = true,
difftime = true,
time = true,
},
string = {
byte = true,
char = true,
find = true,
format = true,
gmatch = true,
gsub = true,
len = true,
lower = true,
match = true,
pack = true,
packsize = true,
rep = true,
reverse = true,
split = true,
sub = true,
unpack = true,
upper = true,
},
table = {
clear = true,
clone = true,
concat = true,
create = true,
find = true,
foreach = true,
foreachi = true,
freeze = true,
getn = true,
insert = true,
isfrozen = true,
maxn = true,
move = true,
pack = true,
remove = true,
sort = true,
unpack = true,
},
utf8 = {
char = true,
codepoint = true,
codes = true,
graphemes = true,
len = true,
nfcnormalize = true,
nfdnormalize = true,
offset = true,
charpattern = true,
},
-- Roblox Libraries
Axes = {
new = true,
},
BrickColor = {
Black = true,
Blue = true,
DarkGray = true,
Gray = true,
Green = true,
new = true,
New = true,
palette = true,
Random = true,
random = true,
Red = true,
White = true,
Yellow = true,
},
CatalogSearchParams = {
new = true,
},
CFrame = {
Angles = true,
fromAxisAngle = true,
fromEulerAngles = true,
fromEulerAnglesXYZ = true,
fromEulerAnglesYXZ = true,
fromMatrix = true,
fromOrientation = true,
lookAt = true,
new = true,
identity = true,
},
Color3 = {
fromHex = true,
fromHSV = true,
fromRGB = true,
new = true,
toHSV = true,
},
ColorSequence = {
new = true,
},
ColorSequenceKeypoint = {
new = true,
},
DateTime = {
fromIsoDate = true,
fromLocalTime = true,
fromUniversalTime = true,
fromUnixTimestamp = true,
fromUnixTimestampMillis = true,
now = true,
},
DockWidgetPluginGuiInfo = {
new = true,
},
Enum = {},
Faces = {
new = true,
},
FloatCurveKey = {
new = true,
},
Font = {
fromEnum = true,
new = true,
},
Instance = {
new = true,
},
NumberRange = {
new = true,
},
NumberSequence = {
new = true,
},
NumberSequenceKeypoint = {
new = true,
},
OverlapParams = {
new = true,
},
PathWaypoint = {
new = true,
},
PhysicalProperties = {
new = true,
},
Random = {
new = true,
},
Ray = {
new = true,
},
RaycastParams = {
new = true,
},
Rect = {
new = true,
},
Region3 = {
new = true,
},
Region3int16 = {
new = true,
},
RotationCurveKey = {
new = true,
},
task = {
cancel = true,
defer = true,
delay = true,
desynchronize = true,
spawn = true,
synchronize = true,
wait = true,
},
TweenInfo = {
new = true,
},
UDim = {
new = true,
},
UDim2 = {
fromOffset = true,
fromScale = true,
new = true,
},
Vector2 = {
new = true,
one = true,
xAxis = true,
yAxis = true,
zero = true,
},
Vector2int16 = {
new = true,
},
Vector3 = {
fromAxis = true,
FromAxis = true,
fromNormalId = true,
FromNormalId = true,
new = true,
one = true,
xAxis = true,
yAxis = true,
zAxis = true,
zero = true,
},
Vector3int16 = {
new = true,
},
},
},
JavaScript = {
keyword = {
["true"] = true,
["false"] = true,
["break"] = true,
["case"] = true,
["catch"] = true,
["continue"] = true,
["debugger"] = true,
["default"] = true,
["delete"] = true,
["do"] = true,
["else"] = true,
["finally"] = true,
["for"] = true,
["function"] = true,
["if"] = true,
["in"] = true,
["instanceof"] = true,
["new"] = true,
["return"] = true,
["switch"] = true,
["this"] = true,
["throw"] = true,
["try"] = true,
["typeof"] = true,
["var"] = true,
["void"] = true,
["while"] = true,
["with"] = true,
["NaN"] = true,
["Infinity"] = true,
["undefined"] = true,
["import"] = true,
},
builtin = {
["eval"] = true,
["isFinite"] = true,
["parseFloat"] = true,
["parseInt"] = true,
["encodeURI"] = true,
["encodeURIComponent"] = true,
["decodeURI"] = true,
["decodeURIComponent"] = true,
},
libraries = {},
},
Plain = {
keyword = {},
builtin = {},
libraries = {},
}
}