fmt
This commit is contained in:
parent
4e32abe0b3
commit
197b14523c
42
.ocamlformat
Normal file
42
.ocamlformat
Normal file
@ -0,0 +1,42 @@
|
||||
version=0.21.0
|
||||
assignment-operator=end-line
|
||||
break-cases=fit
|
||||
break-fun-decl=wrap
|
||||
break-fun-sig=wrap
|
||||
break-infix=wrap
|
||||
break-infix-before-func=false
|
||||
break-separators=before
|
||||
break-sequences=true
|
||||
cases-exp-indent=2
|
||||
cases-matching-exp-indent=normal
|
||||
doc-comments=before
|
||||
doc-comments-padding=2
|
||||
doc-comments-tag-only=default
|
||||
dock-collection-brackets=false
|
||||
exp-grouping=preserve
|
||||
field-space=loose
|
||||
if-then-else=compact
|
||||
indicate-multiline-delimiters=space
|
||||
indicate-nested-or-patterns=unsafe-no
|
||||
infix-precedence=indent
|
||||
leading-nested-match-parens=false
|
||||
let-and=sparse
|
||||
let-binding-spacing=compact
|
||||
let-module=compact
|
||||
margin=80
|
||||
max-indent=68
|
||||
module-item-spacing=sparse
|
||||
ocp-indent-compat=false
|
||||
parens-ite=false
|
||||
parens-tuple=always
|
||||
parse-docstrings=true
|
||||
sequence-blank-line=preserve-one
|
||||
sequence-style=terminator
|
||||
single-case=compact
|
||||
space-around-arrays=true
|
||||
space-around-lists=true
|
||||
space-around-records=true
|
||||
space-around-variants=true
|
||||
type-decl=sparse
|
||||
wrap-comments=false
|
||||
wrap-fun-args=true
|
||||
@ -1,21 +1,20 @@
|
||||
let _ =
|
||||
if Array.length Sys.argv < 2 then
|
||||
failwith (Format.sprintf "usage: %s <file>" Sys.argv.(0)) ;
|
||||
failwith (Format.sprintf "usage: %s <file>" Sys.argv.(0));
|
||||
let file = Sys.argv.(1) in
|
||||
let chan = open_in file in
|
||||
let lines = ref [] in
|
||||
let keep_alpha s =
|
||||
let to_keep = [' '; '\''; '-'] in
|
||||
let to_keep = [ ' '; '\''; '-' ] in
|
||||
let to_keep = List.map (fun el -> Uchar.of_char el) to_keep in
|
||||
let b = Buffer.create 256 in
|
||||
let add_alpha () _ = function
|
||||
| `Malformed _ ->
|
||||
Uutf.Buffer.add_utf_8 b Uutf.u_rep
|
||||
| `Malformed _ -> Uutf.Buffer.add_utf_8 b Uutf.u_rep
|
||||
| `Uchar u ->
|
||||
if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then
|
||||
Uutf.Buffer.add_utf_8 b u
|
||||
if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then
|
||||
Uutf.Buffer.add_utf_8 b u
|
||||
in
|
||||
Uutf.String.fold_utf_8 add_alpha () s ;
|
||||
Uutf.String.fold_utf_8 add_alpha () s;
|
||||
Buffer.contents b
|
||||
in
|
||||
( try
|
||||
@ -24,7 +23,7 @@ let _ =
|
||||
let l = keep_alpha l in
|
||||
lines := l :: !lines
|
||||
done
|
||||
with End_of_file -> close_in chan ) ;
|
||||
with End_of_file -> close_in chan );
|
||||
let lines = List.rev !lines in
|
||||
let generator = Omg.init () in
|
||||
let generator =
|
||||
|
||||
133
src/omg.ml
133
src/omg.ml
@ -1,104 +1,96 @@
|
||||
let () = Random.self_init ()
|
||||
|
||||
type generator =
|
||||
{ forward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t
|
||||
; backward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t
|
||||
; words: (int, string) Hashtbl.t
|
||||
; last_two_words: string * string }
|
||||
{ forward_cache : (string * string, (string, int) Hashtbl.t) Hashtbl.t
|
||||
; backward_cache : (string * string, (string, int) Hashtbl.t) Hashtbl.t
|
||||
; words : (int, string) Hashtbl.t
|
||||
; last_two_words : string * string
|
||||
}
|
||||
|
||||
let list_random_el l = List.nth l (Random.int (List.length l))
|
||||
|
||||
let add_word generator w =
|
||||
let n = Hashtbl.length generator.words in
|
||||
Hashtbl.add generator.words n w ;
|
||||
{generator with last_two_words= (snd generator.last_two_words, w)}
|
||||
Hashtbl.add generator.words n w;
|
||||
{ generator with last_two_words = (snd generator.last_two_words, w) }
|
||||
|
||||
let get_last_two_words generator =
|
||||
match generator.last_two_words with
|
||||
| "", "" ->
|
||||
[]
|
||||
| x, "" | "", x ->
|
||||
[x]
|
||||
| x, y ->
|
||||
[x; y]
|
||||
| "", "" -> []
|
||||
| x, "" | "", x -> [ x ]
|
||||
| x, y -> [ x; y ]
|
||||
|
||||
let init () =
|
||||
let gen =
|
||||
{ forward_cache= Hashtbl.create 4096
|
||||
; backward_cache= Hashtbl.create 4096
|
||||
; words= Hashtbl.create 4096
|
||||
; last_two_words= ("", "") }
|
||||
{ forward_cache = Hashtbl.create 4096
|
||||
; backward_cache = Hashtbl.create 4096
|
||||
; words = Hashtbl.create 4096
|
||||
; last_two_words = ("", "")
|
||||
}
|
||||
in
|
||||
add_word gen "\n"
|
||||
|
||||
let triples = function
|
||||
| w1 :: w2 :: s ->
|
||||
let _, _, acc =
|
||||
List.fold_left
|
||||
(fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc))
|
||||
(w1, w2, []) s
|
||||
in
|
||||
List.rev acc
|
||||
| _ ->
|
||||
[]
|
||||
let _, _, acc =
|
||||
List.fold_left
|
||||
(fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc))
|
||||
(w1, w2, []) s
|
||||
in
|
||||
List.rev acc
|
||||
| _ -> []
|
||||
|
||||
let rec add_key cache k v =
|
||||
match Hashtbl.find cache k with
|
||||
| exception Not_found ->
|
||||
Hashtbl.add cache k (Hashtbl.create 64) ;
|
||||
add_key cache k v
|
||||
Hashtbl.add cache k (Hashtbl.create 64);
|
||||
add_key cache k v
|
||||
| tbl -> (
|
||||
match Hashtbl.find tbl v with
|
||||
| exception Not_found ->
|
||||
Hashtbl.add tbl v 0
|
||||
| n ->
|
||||
Hashtbl.replace tbl v (n + 1) )
|
||||
| exception Not_found -> Hashtbl.add tbl v 0
|
||||
| n -> Hashtbl.replace tbl v (n + 1) )
|
||||
|
||||
let feed generator msg =
|
||||
let splitted = String.split_on_char ' ' msg in
|
||||
let splitted = splitted @ ["\n"] in
|
||||
let splitted = splitted @ [ "\n" ] in
|
||||
let triples = triples (get_last_two_words generator @ splitted) in
|
||||
List.iter
|
||||
(fun (w1, w2, w3) ->
|
||||
add_key generator.forward_cache (w1, w2) w3 ;
|
||||
add_key generator.backward_cache (w3, w2) w1)
|
||||
triples ;
|
||||
add_key generator.forward_cache (w1, w2) w3;
|
||||
add_key generator.backward_cache (w3, w2) w1 )
|
||||
triples;
|
||||
List.fold_left (fun generator el -> add_word generator el) generator splitted
|
||||
|
||||
let select_seed generator seed_word backward =
|
||||
let dir = if backward then -1 else 1 in
|
||||
match seed_word with
|
||||
| None ->
|
||||
let rec loop = function
|
||||
| "\n", _ | _, "\n" ->
|
||||
let seed =
|
||||
if backward then
|
||||
1 + Random.int (Hashtbl.length generator.words - 1)
|
||||
else Random.int (Hashtbl.length generator.words - 1)
|
||||
in
|
||||
loop
|
||||
( Hashtbl.find generator.words seed
|
||||
, Hashtbl.find generator.words (seed + dir) )
|
||||
| seed_word, next_word ->
|
||||
(seed_word, next_word)
|
||||
in
|
||||
loop ("\n", "\n")
|
||||
let rec loop = function
|
||||
| "\n", _ | _, "\n" ->
|
||||
let seed =
|
||||
if backward then 1 + Random.int (Hashtbl.length generator.words - 1)
|
||||
else Random.int (Hashtbl.length generator.words - 1)
|
||||
in
|
||||
loop
|
||||
( Hashtbl.find generator.words seed
|
||||
, Hashtbl.find generator.words (seed + dir) )
|
||||
| seed_word, next_word -> (seed_word, next_word)
|
||||
in
|
||||
loop ("\n", "\n")
|
||||
| Some w ->
|
||||
let possible_indexes =
|
||||
Hashtbl.fold
|
||||
(fun k v acc -> if v = w then k :: acc else acc)
|
||||
generator.words []
|
||||
in
|
||||
let index = list_random_el possible_indexes + dir in
|
||||
(w, Hashtbl.find generator.words index)
|
||||
let possible_indexes =
|
||||
Hashtbl.fold
|
||||
(fun k v acc -> if v = w then k :: acc else acc)
|
||||
generator.words []
|
||||
in
|
||||
let index = list_random_el possible_indexes + dir in
|
||||
(w, Hashtbl.find generator.words index)
|
||||
|
||||
let generate_markov_text generator max_size seed backward =
|
||||
let seed_word, next_word =
|
||||
match seed with
|
||||
| None, x | x, None ->
|
||||
select_seed generator x backward
|
||||
| Some x, Some y ->
|
||||
(x, y)
|
||||
| None, x | x, None -> select_seed generator x backward
|
||||
| Some x, Some y -> (x, y)
|
||||
in
|
||||
let cache =
|
||||
if backward then generator.backward_cache else generator.forward_cache
|
||||
@ -114,7 +106,7 @@ let generate_markov_text generator max_size seed backward =
|
||||
let exception Stop in
|
||||
( try
|
||||
for _ = 0 to max_size do
|
||||
gen_words := !w1 :: !gen_words ;
|
||||
gen_words := !w1 :: !gen_words;
|
||||
let tbl =
|
||||
try Hashtbl.find cache (!w1, !w2) with Not_found -> raise Stop
|
||||
in
|
||||
@ -126,25 +118,22 @@ let generate_markov_text generator max_size seed backward =
|
||||
Hashtbl.fold
|
||||
(fun k v acc ->
|
||||
let acc = acc + v in
|
||||
if i <= acc then raise (Found k) else acc)
|
||||
if i <= acc then raise (Found k) else acc )
|
||||
tbl 0
|
||||
with
|
||||
| exception Found s ->
|
||||
s
|
||||
| _ ->
|
||||
raise Stop
|
||||
| exception Found s -> s
|
||||
| _ -> raise Stop
|
||||
in
|
||||
w1 := !w2 ;
|
||||
w1 := !w2;
|
||||
w2 := new_word
|
||||
done
|
||||
with Stop -> () ) ;
|
||||
if !w2 <> "\n" then gen_words := !w2 :: !gen_words ;
|
||||
with Stop -> () );
|
||||
if !w2 <> "\n" then gen_words := !w2 :: !gen_words;
|
||||
let gen_words = List.filter (fun el -> el <> "\n") !gen_words in
|
||||
let buff = Buffer.create 512 in
|
||||
( match if backward then gen_words else List.rev gen_words with
|
||||
| [] ->
|
||||
()
|
||||
| [] -> ()
|
||||
| x :: s ->
|
||||
Buffer.add_string buff x ;
|
||||
List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s ) ;
|
||||
Buffer.add_string buff x;
|
||||
List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s );
|
||||
Buffer.contents buff
|
||||
|
||||
44
test/test.ml
44
test/test.ml
@ -2,47 +2,47 @@ let source = "foo bar baz qux"
|
||||
|
||||
let test_triples () =
|
||||
assert (
|
||||
Omg.triples ["foo"; "bar"; "baz"; "qux"]
|
||||
= [("foo", "bar", "baz"); ("bar", "baz", "qux")] )
|
||||
Omg.triples [ "foo"; "bar"; "baz"; "qux" ]
|
||||
= [ ("foo", "bar", "baz"); ("bar", "baz", "qux") ] )
|
||||
|
||||
let test_forward () =
|
||||
let wanted = ["foo bar baz qux"; "bar baz qux"; "baz qux"] in
|
||||
let wanted = [ "foo bar baz qux"; "bar baz qux"; "baz qux" ] in
|
||||
for _ = 1 to 1000 do
|
||||
let generator = Omg.init () in
|
||||
let generator = Omg.feed generator source in
|
||||
let generated = Omg.generate_markov_text generator 30 (None, None) false in
|
||||
assert (List.mem generated wanted)
|
||||
done ;
|
||||
done;
|
||||
let seen = List.map (fun el -> (el, ref false)) wanted in
|
||||
for _ = 1 to 1000 do
|
||||
let generator = Omg.init () in
|
||||
let generator = Omg.feed generator source in
|
||||
let generated = Omg.generate_markov_text generator 30 (None, None) false in
|
||||
List.iter (fun (el, seen) -> if el = generated then seen := true) seen
|
||||
done ;
|
||||
done;
|
||||
List.iter
|
||||
(fun (el, seen) ->
|
||||
if not !seen then failwith (Format.sprintf "didn't see #%s#" el))
|
||||
if not !seen then failwith (Format.sprintf "didn't see #%s#" el) )
|
||||
seen
|
||||
|
||||
let test_backward () =
|
||||
let wanted = ["foo bar baz qux"; "foo bar baz"; "foo bar"] in
|
||||
let wanted = [ "foo bar baz qux"; "foo bar baz"; "foo bar" ] in
|
||||
for _ = 1 to 1000 do
|
||||
let generator = Omg.init () in
|
||||
let generator = Omg.feed generator source in
|
||||
let generated = Omg.generate_markov_text generator 30 (None, None) true in
|
||||
assert (List.mem generated wanted)
|
||||
done ;
|
||||
done;
|
||||
let seen = List.map (fun el -> (el, ref false)) wanted in
|
||||
for _ = 1 to 1000 do
|
||||
let generator = Omg.init () in
|
||||
let generator = Omg.feed generator source in
|
||||
let generated = Omg.generate_markov_text generator 30 (None, None) true in
|
||||
List.iter (fun (el, seen) -> if el = generated then seen := true) seen
|
||||
done ;
|
||||
done;
|
||||
List.iter
|
||||
(fun (el, seen) ->
|
||||
if not !seen then failwith (Format.sprintf "didn't see #%s#" el))
|
||||
if not !seen then failwith (Format.sprintf "didn't see #%s#" el) )
|
||||
seen
|
||||
|
||||
let test_force_seed_forward () =
|
||||
@ -56,7 +56,7 @@ let test_force_seed_forward () =
|
||||
if not (generated = wanted) then
|
||||
failwith
|
||||
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
|
||||
done ;
|
||||
done;
|
||||
for _ = 1 to 1000 do
|
||||
let generator = Omg.init () in
|
||||
let generator = Omg.feed generator source in
|
||||
@ -79,7 +79,7 @@ let test_force_seed_backward () =
|
||||
if not (generated = wanted) then
|
||||
failwith
|
||||
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
|
||||
done ;
|
||||
done;
|
||||
for _ = 1 to 1000 do
|
||||
let generator = Omg.init () in
|
||||
let generator = Omg.feed generator source in
|
||||
@ -92,14 +92,14 @@ let test_force_seed_backward () =
|
||||
done
|
||||
|
||||
let () =
|
||||
Format.printf "testing triples...@." ;
|
||||
test_triples () ;
|
||||
Format.printf "testing forward...@." ;
|
||||
test_forward () ;
|
||||
Format.printf "testing backward...@." ;
|
||||
test_backward () ;
|
||||
Format.printf "testing force seed forward...@." ;
|
||||
test_force_seed_forward () ;
|
||||
Format.printf "test force seed backward...@." ;
|
||||
test_force_seed_backward () ;
|
||||
Format.printf "testing triples...@.";
|
||||
test_triples ();
|
||||
Format.printf "testing forward...@.";
|
||||
test_forward ();
|
||||
Format.printf "testing backward...@.";
|
||||
test_backward ();
|
||||
Format.printf "testing force seed forward...@.";
|
||||
test_force_seed_forward ();
|
||||
Format.printf "test force seed backward...@.";
|
||||
test_force_seed_backward ();
|
||||
Format.printf "Tests are OK !@."
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user