minor grammar fixes/improvements, better error messages
This commit is contained in:
parent
9fa8f2fcaa
commit
816a299d2f
20
src/lexer.ml
20
src/lexer.ml
@ -15,7 +15,8 @@ let achar =
|
||||
[%sedlex.regexp? Sub (any, (ctl | '"' | '\\' | '\'' | '{' | '}' | wsp))]
|
||||
|
||||
let newline =
|
||||
[%sedlex.regexp? Star wsp, '\n', Star wsp, Opt ('#', Sub (any, '\n'))]
|
||||
[%sedlex.regexp?
|
||||
Plus (Star wsp, '\n', Star wsp, Opt ('#', Star (Sub (any, '\n'))))]
|
||||
|
||||
let esc_pair = [%sedlex.regexp? '\\', vchar]
|
||||
|
||||
@ -76,14 +77,17 @@ let string_of_dqword s =
|
||||
|
||||
exception Error of string
|
||||
|
||||
let error msg = raise @@ Error msg
|
||||
|
||||
let rec token buf =
|
||||
match%sedlex buf with
|
||||
(* 1 *)
|
||||
| "{" -> LBRACE
|
||||
| "}" -> RBRACE
|
||||
| "{", newline -> LBRACE
|
||||
| newline, "}" -> RBRACE
|
||||
(* other *)
|
||||
| wsp -> token buf
|
||||
| Plus newline -> NEWLINE
|
||||
| Opt newline, eof -> EOF
|
||||
| newline -> NEWLINE
|
||||
| atom ->
|
||||
let word = Utf8.lexeme buf in
|
||||
let word = string_of_atom word in
|
||||
@ -97,8 +101,12 @@ let rec token buf =
|
||||
let word = Utf8.lexeme buf in
|
||||
let word = String.sub word 1 (String.length word - 2) in
|
||||
WORD word
|
||||
| eof -> EOF
|
||||
| any ->
|
||||
let invalid = Utf8.lexeme buf in
|
||||
raise @@ Error invalid
|
||||
let start, _stop = Sedlexing.lexing_positions buf in
|
||||
Format.ksprintf error
|
||||
"File %s, line %i, character %i: unexpected lexeme `%s`" start.pos_fname
|
||||
start.pos_lnum
|
||||
(start.pos_cnum - start.pos_bol)
|
||||
invalid
|
||||
| _ -> assert false
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
%token<String.t> WORD
|
||||
%token LBRACE RBRACE NEWLINE EOF
|
||||
%token LBRACE
|
||||
%token RBRACE
|
||||
%token NEWLINE
|
||||
%token EOF
|
||||
|
||||
%{ open Types %}
|
||||
|
||||
@ -10,14 +13,14 @@
|
||||
let params ==
|
||||
| ~ = list(WORD); <>
|
||||
|
||||
let children ==
|
||||
| LBRACE; NEWLINE; ~ = list(directive); RBRACE; <>
|
||||
let children :=
|
||||
| LBRACE; ~ = separated_list(NEWLINE, directive); RBRACE; <>
|
||||
| { [] }
|
||||
|
||||
let directive :=
|
||||
| name = WORD; ~ = params; ~ = children; NEWLINE; {
|
||||
| name = WORD; ~ = params; ~ = children; {
|
||||
{ name; params; children }
|
||||
}
|
||||
|
||||
let config :=
|
||||
| option(NEWLINE); ~ = list(directive); EOF; <>
|
||||
| option(NEWLINE); ~ = separated_list(NEWLINE, directive); EOF; <>
|
||||
|
||||
22
src/parse.ml
22
src/parse.ml
@ -1,24 +1,36 @@
|
||||
(** Module providing functions to parse a config from various kind of inputs. *)
|
||||
|
||||
(** Pretty print a token *)
|
||||
let pp_token fmt = function
|
||||
| Menhir_parser.WORD s -> Format.fprintf fmt "WORD %s" s
|
||||
| LBRACE -> Format.pp_print_string fmt "LBRACE"
|
||||
| RBRACE -> Format.pp_print_string fmt "RBRACE"
|
||||
| NEWLINE -> Format.pp_print_string fmt "NEWLINE"
|
||||
| EOF -> Format.pp_print_string fmt "EOF"
|
||||
|
||||
(** Parse a config from a lexing buffer. *)
|
||||
let from_lexbuf =
|
||||
let parser =
|
||||
MenhirLib.Convert.Simplified.traditional2revised Menhir_parser.config
|
||||
in
|
||||
fun buf ->
|
||||
let last_token = ref None in
|
||||
let provider () =
|
||||
let tok = Lexer.token buf in
|
||||
let start, stop = Sedlexing.lexing_positions buf in
|
||||
last_token := Some tok;
|
||||
(tok, start, stop)
|
||||
in
|
||||
try Ok (parser provider) with
|
||||
| Menhir_parser.Error ->
|
||||
let start, _stop = Sedlexing.lexing_positions buf in
|
||||
Format.ksprintf Result.error
|
||||
"File %s, line %i, character %i: unexpected token" start.pos_fname
|
||||
Format.kasprintf Result.error
|
||||
"File %s, line %i, character %i: unexpected token %a" start.pos_fname
|
||||
start.pos_lnum
|
||||
(start.pos_cnum - start.pos_bol)
|
||||
| Lexer.Error msg -> Error (Format.sprintf "lexer error: %S" msg)
|
||||
(Format.pp_print_option pp_token)
|
||||
!last_token
|
||||
| Lexer.Error msg -> Error msg
|
||||
|
||||
(** Parse a config from a string. *)
|
||||
let from_string s = from_lexbuf (Sedlexing.Utf8.from_string s)
|
||||
@ -37,6 +49,8 @@ let from_channel c = from_lexbuf (Sedlexing.Utf8.from_channel c)
|
||||
(** Parse a config from a file. *)
|
||||
let from_file f =
|
||||
let chan = open_in f in
|
||||
let result = from_lexbuf (Sedlexing.Utf8.from_channel chan) in
|
||||
let lexbuf = Sedlexing.Utf8.from_channel chan in
|
||||
Sedlexing.set_filename lexbuf f;
|
||||
let result = from_lexbuf lexbuf in
|
||||
close_in chan;
|
||||
result
|
||||
|
||||
@ -25,23 +25,23 @@ test 2:
|
||||
}
|
||||
lex error 1:
|
||||
$ dune exec -- scfg lex_error.scfg
|
||||
error: lexer error: "\""
|
||||
error: File lex_error.scfg, line 1, character 2: unexpected lexeme `"`
|
||||
[1]
|
||||
parse error 1:
|
||||
$ dune exec -- scfg parse_error1.scfg
|
||||
error: File , line 1, character 4: unexpected token
|
||||
error: File parse_error1.scfg, line 1, character 2: unexpected lexeme `{`
|
||||
[1]
|
||||
parse error 2:
|
||||
$ dune exec -- scfg parse_error2.scfg
|
||||
error: File , line 2, character 0: unexpected token
|
||||
error: File parse_error2.scfg, line 2, character 0: unexpected token EOF
|
||||
[1]
|
||||
parse error 3:
|
||||
$ dune exec -- scfg parse_error3.scfg
|
||||
error: File , line 1, character 2: unexpected token
|
||||
error: File parse_error3.scfg, line 1, character 2: unexpected lexeme `}`
|
||||
[1]
|
||||
parse error 4:
|
||||
$ dune exec -- scfg parse_error4.scfg
|
||||
error: File , line 1, character 3: unexpected token
|
||||
error: File parse_error4.scfg, line 1, character 2: unexpected lexeme `{`
|
||||
[1]
|
||||
bug 1:
|
||||
$ dune exec -- scfg bug1.scfg
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user