A missed optimization in Lua

Yesterday [1], I made brief mention of optimizing some Lua [2] code, and said it was for another post.

This is said post.

The code in question (not identical, but this exhibits the same problem):

>
```
require "ddt".tron() -- trace the execution
local lpeg = require "lpeg"
local Cs = lpeg.Cs
local C = lpeg.C
local R = lpeg.R
local P = lpeg.P
function canonical(header)
local function Pi(text)
local pattern = P(true)
for c in text:gmatch(".") do
pattern = pattern * (P(c:lower()) + P(c:upper()))
end
return pattern / text
end
local ALPHA = R("AZ","az")
local id = Pi "ID" -- exceptions to Camel-Case
+ Pi "MIME"
+ Pi "CSeq"
local word = (C(ALPHA) * C(ALPHA^0))
/ function(i,r)
return i:upper() .. r:lower()
end
local other = C((P(1) - ALPHA)^1)
local hdr = Cs((id + word + other)^1)
return hdr:match(header)
end
print(canonical "return-from")
print(canonical "message-id")
print(canonical "mime-version")
print(canonical "cseq")
```

The code in question transforms a header name like return-from to the canonical form Return-From; it'll also transform ReTuRn-FRom into the canonical form. The code is used to match headers in Internet based messages like email, HTTP (HyperText Transport Protocol) or SIP (Session Initiation Protocol) (as the header names need to match case-insensitively—I'll leave how it works to you, the reader (here are some hints [3]) since what the code does isn't germane to this discussion). Now, when you trace the execution, you'll notice something:

>
```
@./ddt.lua 187: end
@code.lua 2: local lpeg = require "lpeg"
@code.lua 4: local Cs = lpeg.Cs
@code.lua 5: local C = lpeg.C
@code.lua 6: local R = lpeg.R
@code.lua 7: local P = lpeg.P
@code.lua 34: end
@code.lua 9: function canonical(header)
@code.lua 36: print(canonical "return-from")
@code.lua 18: end
@code.lua 20: local ALPHA = R("AZ","az")
@code.lua 22: local id = Pi "ID" -- exceptions to Camel-Case
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 23: + Pi "MIME"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 24: + Pi "CSeq"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 26: local word = (C(ALPHA) * C(ALPHA^0))
@code.lua 29: end
@code.lua 30: local other = C((P(1) - ALPHA)^1)
@code.lua 31: local hdr = Cs((id + word + other)^1)
@code.lua 33: return hdr:match(header)
@code.lua 28: return i:upper() .. r:lower()
@code.lua 28: return i:upper() .. r:lower()
@code.lua 37: print(canonical "message-id")
@code.lua 18: end
@code.lua 20: local ALPHA = R("AZ","az")
@code.lua 22: local id = Pi "ID" -- exceptions to Camel-Case
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 23: + Pi "MIME"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 24: + Pi "CSeq"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 26: local word = (C(ALPHA) * C(ALPHA^0))
@code.lua 29: end
@code.lua 30: local other = C((P(1) - ALPHA)^1)
@code.lua 31: local hdr = Cs((id + word + other)^1)
@code.lua 33: return hdr:match(header)
@code.lua 28: return i:upper() .. r:lower()
@code.lua 38: print(canonical "mime-version")
@code.lua 18: end
@code.lua 20: local ALPHA = R("AZ","az")
@code.lua 22: local id = Pi "ID" -- exceptions to Camel-Case
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 23: + Pi "MIME"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 24: + Pi "CSeq"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 26: local word = (C(ALPHA) * C(ALPHA^0))
@code.lua 29: end
@code.lua 30: local other = C((P(1) - ALPHA)^1)
@code.lua 31: local hdr = Cs((id + word + other)^1)
@code.lua 33: return hdr:match(header)
@code.lua 28: return i:upper() .. r:lower()
@code.lua 39: print(canonical "cseq")
@code.lua 18: end
@code.lua 20: local ALPHA = R("AZ","az")
@code.lua 22: local id = Pi "ID" -- exceptions to Camel-Case
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 23: + Pi "MIME"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 24: + Pi "CSeq"
@code.lua 12: local pattern = P(true)
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@code.lua 13: for c in text:gmatch(".") do
@code.lua 17: return pattern / text
@code.lua 26: local word = (C(ALPHA) * C(ALPHA^0))
@code.lua 29: end
@code.lua 30: local other = C((P(1) - ALPHA)^1)
@code.lua 31: local hdr = Cs((id + word + other)^1)
@code.lua 33: return hdr:match(header)
```

There's quite a bit of code executed. That's because the Lua compiler isn't sufficiently smart [4] to notice that most of the code in canonical() never changes—it's independent of the passed in parameter and thus, it could be compiled once. And it's this behavior that I noticed the other day. It's an easy fix (just lift the invarient code out of the function body) and the results are about a third the processing:

>
```
@./ddt.lua 187: end
@c3.lua 2: local lpeg = require "lpeg"
@c3.lua 4: local Cs = lpeg.Cs
@c3.lua 5: local C = lpeg.C
@c3.lua 6: local R = lpeg.R
@c3.lua 7: local P = lpeg.P
@c3.lua 18: end
@c3.lua 20: local ALPHA = R("AZ","az")
@c3.lua 22: local id = Pi "ID" -- exceptions to Camel-Case
@c3.lua 12: local pattern = P(true)
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 17: return pattern / text
@c3.lua 23: + Pi "MIME"
@c3.lua 12: local pattern = P(true)
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 17: return pattern / text
@c3.lua 24: + Pi "CSeq"
@c3.lua 12: local pattern = P(true)
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 14: pattern = pattern * (P(c:lower()) + P(c:upper()))
@c3.lua 13: for c in text:gmatch(".") do
@c3.lua 17: return pattern / text
@c3.lua 26: local word = (C(ALPHA) * C(ALPHA^0))
@c3.lua 29: end
@c3.lua 30: local other = C((P(1) - ALPHA)^1)
@c3.lua 31: local hdr = Cs((id + word + other)^1)
@c3.lua 35: end
@c3.lua 33: function canonical(header)
@c3.lua 35: end
@c3.lua 38: print(canonical "return-from")
@c3.lua 34: return hdr:match(header)
@c3.lua 28: return i:upper() .. r:lower()
@c3.lua 28: return i:upper() .. r:lower()
@c3.lua 39: print(canonical "message-id")
@c3.lua 34: return hdr:match(header)
@c3.lua 28: return i:upper() .. r:lower()
@c3.lua 40: print(canonical "mime-version")
@c3.lua 34: return hdr:match(header)
@c3.lua 28: return i:upper() .. r:lower()
@c3.lua 41: print(canonical "cseq")
@c3.lua 34: return hdr:match(header)
```

I also recorded the execution with LuaJIT (Lua Just-In-Time) [5] (it's faster because it compiles Lua into native code) and it, too, is not sufficiently smart to lift the constant code out of the function. It may be that detecting this is hard for a compiler to do, or that such transformations might not be considered safe (due to possible side effects).

In any case, I found it a bit surprising (although in retrospect, it shouldn't have been).

[1] /boston/2015/02/12.1

[2] http://www.lua.org/

[3] http://www.inf.puc-rio.br/~roberto/lpeg/

[4] http://c2.com/cgi/wiki?SufficientlySmartCompiler

[5] http://luajit.org/

Gemini Mention this post

Contact the author