diff --git a/.ocamlformat b/.ocamlformat new file mode 100644 index 0000000..1b64a37 --- /dev/null +++ b/.ocamlformat @@ -0,0 +1,42 @@ +version=0.21.0 +assignment-operator=end-line +break-cases=fit +break-fun-decl=wrap +break-fun-sig=wrap +break-infix=wrap +break-infix-before-func=false +break-separators=before +break-sequences=true +cases-exp-indent=2 +cases-matching-exp-indent=normal +doc-comments=before +doc-comments-padding=2 +doc-comments-tag-only=default +dock-collection-brackets=false +exp-grouping=preserve +field-space=loose +if-then-else=compact +indicate-multiline-delimiters=space +indicate-nested-or-patterns=unsafe-no +infix-precedence=indent +leading-nested-match-parens=false +let-and=sparse +let-binding-spacing=compact +let-module=compact +margin=80 +max-indent=68 +module-item-spacing=sparse +ocp-indent-compat=false +parens-ite=false +parens-tuple=always +parse-docstrings=true +sequence-blank-line=preserve-one +sequence-style=terminator +single-case=compact +space-around-arrays=true +space-around-lists=true +space-around-records=true +space-around-variants=true +type-decl=sparse +wrap-comments=false +wrap-fun-args=true diff --git a/example/random_text.ml b/example/random_text.ml index 841ac11..8391233 100644 --- a/example/random_text.ml +++ b/example/random_text.ml @@ -1,21 +1,20 @@ let _ = if Array.length Sys.argv < 2 then - failwith (Format.sprintf "usage: %s " Sys.argv.(0)) ; + failwith (Format.sprintf "usage: %s " Sys.argv.(0)); let file = Sys.argv.(1) in let chan = open_in file in let lines = ref [] in let keep_alpha s = - let to_keep = [' '; '\''; '-'] in + let to_keep = [ ' '; '\''; '-' ] in let to_keep = List.map (fun el -> Uchar.of_char el) to_keep in let b = Buffer.create 256 in let add_alpha () _ = function - | `Malformed _ -> - Uutf.Buffer.add_utf_8 b Uutf.u_rep + | `Malformed _ -> Uutf.Buffer.add_utf_8 b Uutf.u_rep | `Uchar u -> - if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then - Uutf.Buffer.add_utf_8 b u + if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then + Uutf.Buffer.add_utf_8 b u in - Uutf.String.fold_utf_8 add_alpha () s ; + Uutf.String.fold_utf_8 add_alpha () s; Buffer.contents b in ( try @@ -24,7 +23,7 @@ let _ = let l = keep_alpha l in lines := l :: !lines done - with End_of_file -> close_in chan ) ; + with End_of_file -> close_in chan ); let lines = List.rev !lines in let generator = Omg.init () in let generator = diff --git a/src/omg.ml b/src/omg.ml index f697f9e..351c017 100644 --- a/src/omg.ml +++ b/src/omg.ml @@ -1,104 +1,96 @@ let () = Random.self_init () type generator = - { forward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t - ; backward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t - ; words: (int, string) Hashtbl.t - ; last_two_words: string * string } + { forward_cache : (string * string, (string, int) Hashtbl.t) Hashtbl.t + ; backward_cache : (string * string, (string, int) Hashtbl.t) Hashtbl.t + ; words : (int, string) Hashtbl.t + ; last_two_words : string * string + } let list_random_el l = List.nth l (Random.int (List.length l)) let add_word generator w = let n = Hashtbl.length generator.words in - Hashtbl.add generator.words n w ; - {generator with last_two_words= (snd generator.last_two_words, w)} + Hashtbl.add generator.words n w; + { generator with last_two_words = (snd generator.last_two_words, w) } let get_last_two_words generator = match generator.last_two_words with - | "", "" -> - [] - | x, "" | "", x -> - [x] - | x, y -> - [x; y] + | "", "" -> [] + | x, "" | "", x -> [ x ] + | x, y -> [ x; y ] let init () = let gen = - { forward_cache= Hashtbl.create 4096 - ; backward_cache= Hashtbl.create 4096 - ; words= Hashtbl.create 4096 - ; last_two_words= ("", "") } + { forward_cache = Hashtbl.create 4096 + ; backward_cache = Hashtbl.create 4096 + ; words = Hashtbl.create 4096 + ; last_two_words = ("", "") + } in add_word gen "\n" let triples = function | w1 :: w2 :: s -> - let _, _, acc = - List.fold_left - (fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc)) - (w1, w2, []) s - in - List.rev acc - | _ -> - [] + let _, _, acc = + List.fold_left + (fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc)) + (w1, w2, []) s + in + List.rev acc + | _ -> [] let rec add_key cache k v = match Hashtbl.find cache k with | exception Not_found -> - Hashtbl.add cache k (Hashtbl.create 64) ; - add_key cache k v + Hashtbl.add cache k (Hashtbl.create 64); + add_key cache k v | tbl -> ( match Hashtbl.find tbl v with - | exception Not_found -> - Hashtbl.add tbl v 0 - | n -> - Hashtbl.replace tbl v (n + 1) ) + | exception Not_found -> Hashtbl.add tbl v 0 + | n -> Hashtbl.replace tbl v (n + 1) ) let feed generator msg = let splitted = String.split_on_char ' ' msg in - let splitted = splitted @ ["\n"] in + let splitted = splitted @ [ "\n" ] in let triples = triples (get_last_two_words generator @ splitted) in List.iter (fun (w1, w2, w3) -> - add_key generator.forward_cache (w1, w2) w3 ; - add_key generator.backward_cache (w3, w2) w1) - triples ; + add_key generator.forward_cache (w1, w2) w3; + add_key generator.backward_cache (w3, w2) w1 ) + triples; List.fold_left (fun generator el -> add_word generator el) generator splitted let select_seed generator seed_word backward = let dir = if backward then -1 else 1 in match seed_word with | None -> - let rec loop = function - | "\n", _ | _, "\n" -> - let seed = - if backward then - 1 + Random.int (Hashtbl.length generator.words - 1) - else Random.int (Hashtbl.length generator.words - 1) - in - loop - ( Hashtbl.find generator.words seed - , Hashtbl.find generator.words (seed + dir) ) - | seed_word, next_word -> - (seed_word, next_word) - in - loop ("\n", "\n") + let rec loop = function + | "\n", _ | _, "\n" -> + let seed = + if backward then 1 + Random.int (Hashtbl.length generator.words - 1) + else Random.int (Hashtbl.length generator.words - 1) + in + loop + ( Hashtbl.find generator.words seed + , Hashtbl.find generator.words (seed + dir) ) + | seed_word, next_word -> (seed_word, next_word) + in + loop ("\n", "\n") | Some w -> - let possible_indexes = - Hashtbl.fold - (fun k v acc -> if v = w then k :: acc else acc) - generator.words [] - in - let index = list_random_el possible_indexes + dir in - (w, Hashtbl.find generator.words index) + let possible_indexes = + Hashtbl.fold + (fun k v acc -> if v = w then k :: acc else acc) + generator.words [] + in + let index = list_random_el possible_indexes + dir in + (w, Hashtbl.find generator.words index) let generate_markov_text generator max_size seed backward = let seed_word, next_word = match seed with - | None, x | x, None -> - select_seed generator x backward - | Some x, Some y -> - (x, y) + | None, x | x, None -> select_seed generator x backward + | Some x, Some y -> (x, y) in let cache = if backward then generator.backward_cache else generator.forward_cache @@ -114,7 +106,7 @@ let generate_markov_text generator max_size seed backward = let exception Stop in ( try for _ = 0 to max_size do - gen_words := !w1 :: !gen_words ; + gen_words := !w1 :: !gen_words; let tbl = try Hashtbl.find cache (!w1, !w2) with Not_found -> raise Stop in @@ -126,25 +118,22 @@ let generate_markov_text generator max_size seed backward = Hashtbl.fold (fun k v acc -> let acc = acc + v in - if i <= acc then raise (Found k) else acc) + if i <= acc then raise (Found k) else acc ) tbl 0 with - | exception Found s -> - s - | _ -> - raise Stop + | exception Found s -> s + | _ -> raise Stop in - w1 := !w2 ; + w1 := !w2; w2 := new_word done - with Stop -> () ) ; - if !w2 <> "\n" then gen_words := !w2 :: !gen_words ; + with Stop -> () ); + if !w2 <> "\n" then gen_words := !w2 :: !gen_words; let gen_words = List.filter (fun el -> el <> "\n") !gen_words in let buff = Buffer.create 512 in ( match if backward then gen_words else List.rev gen_words with - | [] -> - () + | [] -> () | x :: s -> - Buffer.add_string buff x ; - List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s ) ; + Buffer.add_string buff x; + List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s ); Buffer.contents buff diff --git a/test/test.ml b/test/test.ml index 27387b4..4187c50 100644 --- a/test/test.ml +++ b/test/test.ml @@ -2,47 +2,47 @@ let source = "foo bar baz qux" let test_triples () = assert ( - Omg.triples ["foo"; "bar"; "baz"; "qux"] - = [("foo", "bar", "baz"); ("bar", "baz", "qux")] ) + Omg.triples [ "foo"; "bar"; "baz"; "qux" ] + = [ ("foo", "bar", "baz"); ("bar", "baz", "qux") ] ) let test_forward () = - let wanted = ["foo bar baz qux"; "bar baz qux"; "baz qux"] in + let wanted = [ "foo bar baz qux"; "bar baz qux"; "baz qux" ] in for _ = 1 to 1000 do let generator = Omg.init () in let generator = Omg.feed generator source in let generated = Omg.generate_markov_text generator 30 (None, None) false in assert (List.mem generated wanted) - done ; + done; let seen = List.map (fun el -> (el, ref false)) wanted in for _ = 1 to 1000 do let generator = Omg.init () in let generator = Omg.feed generator source in let generated = Omg.generate_markov_text generator 30 (None, None) false in List.iter (fun (el, seen) -> if el = generated then seen := true) seen - done ; + done; List.iter (fun (el, seen) -> - if not !seen then failwith (Format.sprintf "didn't see #%s#" el)) + if not !seen then failwith (Format.sprintf "didn't see #%s#" el) ) seen let test_backward () = - let wanted = ["foo bar baz qux"; "foo bar baz"; "foo bar"] in + let wanted = [ "foo bar baz qux"; "foo bar baz"; "foo bar" ] in for _ = 1 to 1000 do let generator = Omg.init () in let generator = Omg.feed generator source in let generated = Omg.generate_markov_text generator 30 (None, None) true in assert (List.mem generated wanted) - done ; + done; let seen = List.map (fun el -> (el, ref false)) wanted in for _ = 1 to 1000 do let generator = Omg.init () in let generator = Omg.feed generator source in let generated = Omg.generate_markov_text generator 30 (None, None) true in List.iter (fun (el, seen) -> if el = generated then seen := true) seen - done ; + done; List.iter (fun (el, seen) -> - if not !seen then failwith (Format.sprintf "didn't see #%s#" el)) + if not !seen then failwith (Format.sprintf "didn't see #%s#" el) ) seen let test_force_seed_forward () = @@ -56,7 +56,7 @@ let test_force_seed_forward () = if not (generated = wanted) then failwith (Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted) - done ; + done; for _ = 1 to 1000 do let generator = Omg.init () in let generator = Omg.feed generator source in @@ -79,7 +79,7 @@ let test_force_seed_backward () = if not (generated = wanted) then failwith (Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted) - done ; + done; for _ = 1 to 1000 do let generator = Omg.init () in let generator = Omg.feed generator source in @@ -92,14 +92,14 @@ let test_force_seed_backward () = done let () = - Format.printf "testing triples...@." ; - test_triples () ; - Format.printf "testing forward...@." ; - test_forward () ; - Format.printf "testing backward...@." ; - test_backward () ; - Format.printf "testing force seed forward...@." ; - test_force_seed_forward () ; - Format.printf "test force seed backward...@." ; - test_force_seed_backward () ; + Format.printf "testing triples...@."; + test_triples (); + Format.printf "testing forward...@."; + test_forward (); + Format.printf "testing backward...@."; + test_backward (); + Format.printf "testing force seed forward...@."; + test_force_seed_forward (); + Format.printf "test force seed backward...@."; + test_force_seed_backward (); Format.printf "Tests are OK !@."