A direct translation from
Douglas Crockford's JavaScript parser into Haskell keeping as close as possible to the same structure and naming. Plus tokeniser and pretty-printer.
There's also a
side-by-side comparison with the original JavaScript.
1
2 module TopDownParserState where
3
4 -- to stop a collision with record field id (records are kind of odd in Haskell)
5 import Prelude hiding (id, error, lookup)
6 import qualified Prelude
7
8 -- unlike Javascript there's no inbuilt data map support
9 import Data.Map (Map, (!), lookup, insert, member)
10 import qualified Data.Map as Map
11
12 -- support stateful function style
13 import Control.Monad.State
14
15 import Tokeniser
16
17 type Parser a = State Env a
18
19 -- Something which isn't obvious from the original variable names is
20 -- that 'tokens' is input yet 'token' belongs to the completely
21 -- different output type.
22 data Env = Env { scope :: Scope,
23 symbol_table :: SymbolTable,
24 token :: Symbol,
25 tokens :: [Token] }
26
27 itself = return
28
29 -- This could be a simple list of scopes but I'll try to keep closely to
30 -- the structure of the Javascript code
31 data Scope = Scope { def :: SymbolTable,
32 parent :: Scope }
33 | TopScope
34
35 type SymbolTable = Map Value Symbol
36
37 define n@Symbol {value = value} = do
38 this <- gets scope
39 let t = def this ! value
40 when (member value $ def this) $
41 error n $ if reserved t
42 then "Already reserved."
43 else "Already defined."
44 let n' = n { reserved = False,
45 nudf = itself,
46 -- why redefine led here?
47 ledf = \this _ -> error this "Undefined operator.",
48 std = Nothing,
49 lbp = 0,
50 skope = this }
51 env <- get
52 put env { scope = this {
53 def = insert value n' $ def this
54 } }
55 return n'
56
57 find env@Env {scope = Scope {def = def, parent = e}} n =
58 case lookup n def of
59 Just t -> t
60 _ -> find env { scope = e} n
61
62 find Env { symbol_table = st } n =
63 case lookup n st of
64 Just t -> t
65 _ -> st ! "(name)"
66
67 pop = do
68 env <- get
69 put env { scope = parent $ scope env }
70
71 reserve n@Symbol {arity = Name, reserved = False, value = value} = do
72 this <- gets scope
73 let t = def this ! value
74 when (member value $ def this) $ do
75 when (reserved t) $
76 return ()
77 when (arity t == Name) $
78 error n "Unreserved is already defined."
79 env <- get
80 put env { scope = this {
81 def = insert value n { reserved = True } $ def this
82 } }
83
84 reserve _ = return ()
85
86 new_scope = do
87 s <- gets scope
88 let s' = Scope { def = Map.empty,
89 parent = s }
90 env <- get
91 put env { scope = s' }
92 return s'
93
94 advanceIf requiredId = do
95 token <- gets token
96 when (id token /= requiredId) $
97 error token $ "Expected '" ++ requiredId ++ "'."
98 advance
99
100 advance = do
101 this <- get
102 let (t, ts) = case tokens this of
103 []
104 -> (symbol_table this ! "(end)", [])
105 t@(Token a v):tokens'
106 -> let (o, a') = case a of
107 NameType
108 -> (find this $ v, Name)
109 OperatorType
110 -> case lookup v $ symbol_table this of
111 Just t' -> (t', Operator)
112 _ -> error t "Unknown operator."
113 NumberType
114 -> (symbol_table this ! "(literal)", Literal)
115 StringType
116 -> (symbol_table this ! "(literal)", Literal)
117 -- the next case can't happen and ghc throws a warning
118 -- _ -> error t "Unexpected token."
119 in (o { value = v, arity = a' }, tokens')
120
121 put this { token = t, tokens = ts }
122 return t
123
124 expression rbp = do
125 t <- gets token
126 advance
127 left <- nud t
128 let walkRight left = do
129 t <- gets token
130 if rbp < lbp t then do
131 advance
132 left <- led t left
133 walkRight left
134 else return left
135 walkRight left
136
137 type NudFun = This -> Parser Symbol
138 type LedFun = This -> Symbol -> Parser Symbol
139
140 statement = do
141 n <- gets token
142 case n of
143 Symbol { std = Just std } -> do
144 advance
145 reserve n
146 std n
147 otherwise -> do
148 v <- expression 0
149 when (not (isAssignment v) && id v /= "(") $
150 error v "Bad expression statement."
151 advanceIf ";"
152 return [v]
153
154 type StdFun = This -> Parser [Symbol]
155
156 -- For this function and all like it we don't change the return type
157 -- but instead make the pretty printer treat empty lists as null and
158 -- single element lists as the element. To simplify the structure
159 -- of the Symbol data structure we also apply the equivilent
160 -- transformation there which means that single element lists appear
161 -- in many places where the Javascript uses just the element.
162 -- Because we apply this transformation uniformly there are cases
163 -- where our output is slightly different from the original.
164 statements = do
165 token <- gets token
166 if id token == "}" || id token == "(end)"
167 then return []
168 else do
169 s <- statement
170 ss <- statements
171 return $ s ++ ss
172
173 block = do
174 t <- gets token
175 advanceIf "{"
176 case std t of
177 Just s -> s t
178
179 data Symbol = Symbol { id :: Id,
180 arity :: Arity,
181 value :: Value,
182 lbp :: BindingPower,
183 reserved, isAssignment :: Bool,
184 nudf :: NudFun,
185 ledf :: LedFun,
186 std :: Maybe StdFun,
187 skope :: Scope,
188 key :: Maybe Value,
189 first, second, third :: [Symbol] }
190
191 data Arity = Name | Operator | Literal | Unary | Binary | Ternary
192 | Statement | This
193 | Function { name :: Maybe Value }
194 deriving (Eq, Show)
195
196 original_symbol = Symbol {
197 nudf = \this -> error this "Undefined.",
198 ledf = \this _ -> error this "Missing operator.",
199 std = Nothing,
200 first = [], second = [], third = [],
201 id = undefined, arity = undefined, value = undefined, lbp = undefined,
202 isAssignment = False, skope = undefined, reserved = False,
203 key = Nothing
204 }
205
206 -- helper functions to access nudf/ledf with correct "object"
207 nud s = nudf s s
208 led s = ledf s s
209
210 symbol0 id = symbol1 id NilT
211 symbol1 = flip symbol 0
212
213 -- rather than make Symbol mutable this binds the function during
214 -- the symbol creation
215 -- symbol :: Id -> BindingPower -> SymbolType -> State SymbolTable Symbol
216 symbol id bp typ = do
217 st <- get
218 let s' = bind typ $
219 case lookup id st of
220 Just s -> if bp >= lbp s
221 then s { lbp = bp }
222 else s
223 _ -> original_symbol { id = id,
224 value = id,
225 lbp = bp }
226 put $ insert id s' st
227 return s'
228 where
229 bind (Nud f) s = s { nudf = f }
230 bind (Led f) s = s { ledf = f }
231 bind (Std f) s = s { std = Just f }
232 bind _ s = s
233
234 data SymbolType = NilT
235 | Nud NudFun
236 | Led LedFun
237 | Std StdFun
238
239 -- this constant doesn't use the value because that would require a
240 -- datatype for all the kinds of javascript types it could be set to
241 constant0 s v = constant s v $ \this -> do
242 reserve this
243 symbol_table <- gets symbol_table
244 return this { value = value $ symbol_table ! id this,
245 arity = Literal }
246
247 constant s _ f = symbol1 s $ Nud f
248
249 -- infix is a keyword
250 inphix0 s bp = inphix s bp $ \this left -> do
251 right <- expression bp
252 return this { first = [left],
253 second = [right],
254 arity = Binary }
255
256 inphix s bp f = symbol s bp $ Led f
257
258 inphixr0 s bp = inphixr s bp $ \this left -> do
259 right <- expression $ bp-1
260 return this { first = [left],
261 second = [right],
262 arity = Binary }
263
264 -- infixr is a keyword
265 inphixr = inphix
266
267 assignment s = inphixr s 10 $ \this left -> do
268 when (id left /= "." && id left /= "[" && arity left /= Name) $
269 error left "Bad lvalue."
270 right <- expression 9
271 return this { first = [left],
272 second = [right],
273 arity = Binary,
274 isAssignment = True }
275
276 -- prefix isn't a keyword but to named to match inphix and inphixr
277 prephix0 s = prephix s $ \this -> do
278 reserve this
279 expr <- expression 70
280 return this { first = [expr],
281 arity = Unary }
282
283 prephix s f = symbol1 s $ Nud f
284
285 stmt s f = symbol1 s $ Std f
286
287 initial_symbol_table = execState ist Map.empty
288 where
289 ist = do
290
291 symbol0 "(end)"
292 symbol0 "(name)"
293
294 symbol0 ":"
295 symbol0 ";"
296 symbol0 ")"
297 symbol0 "]"
298 symbol0 "}"
299 symbol0 ","
300 symbol0 "else"
301
302 constant0 "true" True
303 constant0 "false" False
304 constant0 "null" undefined
305 constant0 "pi" 3.141592653589793
306 constant0 "Object" Map.empty
307 constant0 "Array" []
308
309 symbol1 "(literal)" $ Nud itself
310
311 symbol1 "this" $ Nud $ \this -> do
312 reserve this
313 return this { arity = This }
314
315 assignment "="
316 assignment "+="
317 assignment "-="
318
319 inphix "?" 20 $ \this left -> do
320 whenTrue <- expression 0
321 advanceIf ":"
322 whenFalse <- expression 0
323 return this { first = [left],
324 second = [whenTrue],
325 third = [whenFalse],
326 arity = Ternary }
327
328 inphixr0 "&&" 30
329 inphixr0 "||" 30
330
331 inphixr0 "===" 40
332 inphixr0 "!==" 40
333 inphixr0 "<" 40
334 inphixr0 "<=" 40
335 inphixr0 ">" 40
336 inphixr0 ">=" 40
337
338 inphix0 "+" 50
339 inphix0 "-" 50
340
341 inphix0 "*" 60
342 inphix0 "/" 60
343
344 inphix "." 80 $ \this left -> do
345 token <- gets token
346 when (arity token /= Name) $
347 error token "Expected a property name."
348 -- Even though the Javascript updates the token it is then
349 -- immediately discaded by 'advance' so we won't bother
350 advance
351 return this { first = [left],
352 second = [token { arity = Literal }],
353 arity = Binary }
354
355 inphix "[" 80 $ \this left -> do
356 s <- expression 0
357 advanceIf "]"
358 return this { first = [left],
359 second = [s],
360 arity = Binary }
361
362 inphix "(" 80 $ \this left -> do
363 t <- gets token
364 a <- if id t /= ")" then
365 let vars = do
366 e <- expression 0
367 token <- gets token
368 if id token /= ","
369 then return [e]
370 else do
371 advanceIf ","
372 v <- vars
373 return $ e:v
374 in vars
375 else return []
376 -- can't use a before it's been populated
377 let this' = if id left == "." || id left == "["
378 then this { first = first left,
379 second = second left,
380 third = a,
381 arity = Ternary }
382 else if (arity left /= Unary || id left /= "function") &&
383 arity left /= Name && id left /= "(" &&
384 id left /= "&&" && id left /= "||" && id left /= "?"
385 then error left "Expected a variable name."
386 else this { first = [left],
387 second = a,
388 arity = Binary }
389 advanceIf ")"
390 return this'
391
392 prephix0 "!"
393 prephix0 "-"
394 prephix0 "typeof"
395
396 prephix "(" $ \this -> do
397 e <- expression 0
398 advanceIf ")"
399 return e
400
401 prephix "function" $ \this -> do
402 new_scope
403 t <- gets token
404 n <- if arity t == Name then do
405 define t
406 advance
407 return $ Just $ value t
408 else return Nothing
409 t <- advanceIf "("
410 a <- if id t /= ")" then
411 let params = do
412 t <- gets token
413 when (arity t /= Name) $
414 error t "Expected a parameter name."
415 define t
416 token <- advance
417 if id token /= ","
418 then return [t]
419 else do
420 advanceIf ","
421 p <- params
422 return $ t:p
423 in params
424 else return []
425 advanceIf ")"
426 advanceIf "{"
427 s <- statements
428 advanceIf "}"
429 pop
430 return this { first = a,
431 second = s,
432 arity = Function { name = n } }
433
434 prephix "[" $ \this -> do
435 t <- gets token
436 a <- if id t /= "]" then
437 let entries = do
438 v <- expression 0
439 token <- gets token
440 if id token /= ","
441 then return [v]
442 else do
443 advanceIf ","
444 e <- entries
445 return $ v:e
446 in entries
447 else return []
448 advanceIf "]"
449 return this { first = a,
450 arity = Unary }
451
452 prephix "{" $ \this -> do
453 t <- gets token
454 a <- if id t /= "}" then
455 let entries = do
456 n <- gets token
457 when (arity n /= Name && arity n /= Literal) $
458 error n "Bad property name."
459 advance
460 advanceIf ":"
461 v <- expression 0
462 let v' = v { key = Just $ value n }
463 token <- gets token
464 if id token /= ","
465 then return [v']
466 else do
467 advanceIf ","
468 e <- entries
469 return $ v':e
470 in entries
471 else return []
472 advanceIf "}"
473 return this { first = a,
474 arity = Unary }
475
476 stmt "{" $ \this -> do
477 new_scope
478 a <- statements
479 advanceIf "}"
480 pop
481 return a
482
483 stmt "var" $ \this -> do
484 let vars = do
485 n <- gets token
486 when (arity n /= Name) $
487 error n "Expected a new variable name."
488 define n
489 t <- advance
490 a <- if id t == "=" then do
491 advanceIf "="
492 s <- expression 0
493 let t' = t { first = [n],
494 second = [s],
495 arity = Binary,
496 isAssignment = True }
497 return [t']
498 else return []
499 t <- gets token
500 if id t /= ","
501 then return a
502 else do
503 advanceIf ","
504 v <- vars
505 return $ a++v
506
507 a <- vars
508 advanceIf ";"
509 return a
510
511 stmt "if" $ \this -> do
512 advanceIf "("
513 test <- expression 0
514 advanceIf ")"
515 body <- block
516 token <- gets token
517 els <- if id token == "else" then do
518 reserve token
519 token <- advanceIf "else"
520 if id token == "if" then statement else block
521 else return []
522 return [this { first = [test],
523 second = body,
524 third = els,
525 arity = Statement }]
526
527 stmt "return" $ \this -> do
528 t <- gets token
529 first <- if id t /= ";" then do
530 e <- expression 0
531 return [e]
532 else return []
533 t <- advanceIf ";"
534 when (id t /= "}") $
535 error t "Unreachable statement."
536 return [this { first = first,
537 arity = Statement }]
538
539 stmt "break" $ \this -> do
540 t <- advanceIf ";"
541 when (id t /= "}") $
542 error t "Unreachable statement."
543 return [this { arity = Statement }]
544
545 stmt "while" $ \this -> do
546 advanceIf "("
547 f <- expression 0
548 advanceIf ")"
549 s <- block
550 return [this { first = [f],
551 second = s,
552 arity = Statement }]
553
554 parse source =
555 evalState ( do
556 new_scope
557 advance
558 s <- statements
559 advanceIf "(end)"
560 return s
561 ) Env { tokens = tokenise source,
562 scope = TopScope,
563 token = original_symbol { id = "(start)" },
564 symbol_table = initial_symbol_table }
565
566 type Value = String
567 type Id = String
568 type BindingPower = Int
569 type This = Symbol
570
571 error t msg = Prelude.error $ msg ++ " " ++ show t
572
573 instance Show Symbol where
574 show Symbol { value = value, arity = arity } =
575 "{value: " ++ show value ++ " " ++ show arity ++ "}"
1
2 module Tokeniser where
3
4 import Text.Read (lex)
5 import Data.Char (isAlpha, isNumber)
6
7 data Token = Token TokenType String
8 deriving Show
9 data TokenType = NameType | StringType | NumberType | OperatorType
10 deriving Show
11
12 tokenise = tokens . head . lex
13 where
14 tokens (t, "") = [token t]
15 tokens (t, s) = token t : tokenise s
16
17 token t@(c:_) = Token tokenType text
18 where
19 tokenType | isAlpha c = NameType
20 | isNumber c = NumberType
21 | '"' == c = StringType
22 | otherwise = OperatorType
23
24 text | c == '"' = drop 1 $ take (length t-1) t
25 | otherwise = t
26
27 token _ = Token OperatorType "(end)"
1
2 module PrettyPrint where
3
4 import Text.PrettyPrint
5
6 import TopDownParserState
7
8 pp = ppList ""
9
10 ppList l [] = empty
11 ppList l (s:[]) = ppSymbol l s
12 ppList l s = bracket l $
13 vcat $ map (ppSymbol "") s
14
15 ppSymbol l Symbol {key = k, value = v, arity = a,
16 first = f, second = s, third = t } =
17 brace l $
18 ppMaybe "key: " k $$
19 ppValue v $$
20 ppArity a $$
21 ppList "first: " f $$
22 ppList "second: " s $$
23 ppList "third: " t
24
25 ppMaybe l (Just k) = text l <> textOf k
26 ppMaybe l Nothing = empty
27
28 ppValue v = text "value: " <> textOf v
29
30 ppArity a@Function {name = n} =
31 text "arity: Function" $$
32 ppMaybe "name: " n
33 ppArity a = text "arity: " <> textOf a
34
35 textOf :: Show a => a -> Doc
36 textOf = text . show
37
38 bracket l s = (text l <> lbrack) $+$ indent s $$ rbrack
39 brace l s = (text l <> lbrace) $+$ indent s $$ rbrace
40
41 indent = nest 4