-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathm17n_sedlexing.mli
63 lines (50 loc) · 2.62 KB
/
m17n_sedlexing.mli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
type lexbuf
(** [encode ?normalize uchars] returns the UTF-8 encoded string
corresponding to the character list [uchars], optionally
normalizing it as [normalize]. *)
val encode : ?normalize:[< `NFC | `NFD] -> Uchar.t list -> string
(* The interface used by Sedlex *)
val start : lexbuf -> unit
val next : lexbuf -> int
val mark : lexbuf -> int -> unit
val backtrack : lexbuf -> int
(** [create ~filename gen] creates a lexing buffer from generator [gen]
that returns chunks of the input.
Positions filled with {!fill_lexbuf} will have [filename] as their
[pos_fname] field.
If [kind] equals [`Batch] (the default), positions filled with
[fill_lexbuf] will refer to characters. If [`Toplevel], to bytes.
Additionally, with [kind = `Toplevel], [gen] will never be called
speculatively. *)
val create : ?kind:[`Batch|`Toplevel] -> ?filename:string ->
(bytes * int * int) Gen.t -> lexbuf
(** [lexeme lexbuf] returns the list of codepoints comprising
the current lexeme. *)
val lexeme : lexbuf -> Uchar.t list
(** [lexeme_char n lexbuf] ≡ [List.nth (lexeme lexbuf) n] *)
val lexeme_char : int -> lexbuf -> Uchar.t
(** [sub_lexeme (lft, rgt) lexbuf] returns the subset of the list
of codepoints comprising the current lexeme. [lft] and [rgt]
indicate the amount of characters to sk *)
val sub_lexeme : int * int -> lexbuf -> Uchar.t list
(** [utf8_lexeme ?normalize lexbuf] ≡ [encode ?normalize (lexeme lexbuf)] *)
val utf8_lexeme : ?normalize:[< `NFC | `NFD] -> lexbuf -> string
(** [utf8_sub_lexeme ?normalize range lexbuf] ≡
[encode ?normalize (sub_lexeme range lexbuf)] *)
val utf8_sub_lexeme : ?normalize:[< `NFC | `NFD] -> int * int -> lexbuf -> string
(** [expand_token lexbuf f] memorizes the token start position
in [lexbuf], calls [f ()], then replaces the token start position
with the memorized one, thus allowing you to *)
val expand_token : lexbuf -> (unit -> 'a) -> 'a
(** [fill_lexbuf lexbuf oldlexbuf] fills [oldlexbuf.lex_start_p] and [oldlexbuf.lex_curr_p]
with data from [lexbuf]. *)
val fill_lexbuf : lexbuf -> Lexing.lexbuf -> unit
(** [location lexbuf] returns a [Location.t], corresponding to
the current lexeme in [lexbuf]. *)
val location : lexbuf -> Location.t
(** [set_position lexbuf filename line] sets the location information for
the current position to file [filename], line [line]. Column is not changed. *)
val set_position : lexbuf -> string -> int -> unit
(** [unshift lexbuf] rolls [lexbuf] one character back, but not further than
the start of the current lexeme. Only works for non-newline ASCII characters. *)
val unshift : lexbuf -> unit