zapashcanon 3 months ago
parent
commit
197b14523c
Signed by: zapashcanon GPG Key ID: 8981C3C62D1D28F1
  1. 42
      .ocamlformat
  2. 15
      example/random_text.ml
  3. 133
      src/omg.ml
  4. 44
      test/test.ml

42
.ocamlformat

@ -0,0 +1,42 @@
version=0.21.0
assignment-operator=end-line
break-cases=fit
break-fun-decl=wrap
break-fun-sig=wrap
break-infix=wrap
break-infix-before-func=false
break-separators=before
break-sequences=true
cases-exp-indent=2
cases-matching-exp-indent=normal
doc-comments=before
doc-comments-padding=2
doc-comments-tag-only=default
dock-collection-brackets=false
exp-grouping=preserve
field-space=loose
if-then-else=compact
indicate-multiline-delimiters=space
indicate-nested-or-patterns=unsafe-no
infix-precedence=indent
leading-nested-match-parens=false
let-and=sparse
let-binding-spacing=compact
let-module=compact
margin=80
max-indent=68
module-item-spacing=sparse
ocp-indent-compat=false
parens-ite=false
parens-tuple=always
parse-docstrings=true
sequence-blank-line=preserve-one
sequence-style=terminator
single-case=compact
space-around-arrays=true
space-around-lists=true
space-around-records=true
space-around-variants=true
type-decl=sparse
wrap-comments=false
wrap-fun-args=true

15
example/random_text.ml

@ -1,21 +1,20 @@
let _ =
if Array.length Sys.argv < 2 then
failwith (Format.sprintf "usage: %s <file>" Sys.argv.(0)) ;
failwith (Format.sprintf "usage: %s <file>" Sys.argv.(0));
let file = Sys.argv.(1) in
let chan = open_in file in
let lines = ref [] in
let keep_alpha s =
let to_keep = [' '; '\''; '-'] in
let to_keep = [ ' '; '\''; '-' ] in
let to_keep = List.map (fun el -> Uchar.of_char el) to_keep in
let b = Buffer.create 256 in
let add_alpha () _ = function
| `Malformed _ ->
Uutf.Buffer.add_utf_8 b Uutf.u_rep
| `Malformed _ -> Uutf.Buffer.add_utf_8 b Uutf.u_rep
| `Uchar u ->
if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then
Uutf.Buffer.add_utf_8 b u
if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then
Uutf.Buffer.add_utf_8 b u
in
Uutf.String.fold_utf_8 add_alpha () s ;
Uutf.String.fold_utf_8 add_alpha () s;
Buffer.contents b
in
( try
@ -24,7 +23,7 @@ let _ =
let l = keep_alpha l in
lines := l :: !lines
done
with End_of_file -> close_in chan ) ;
with End_of_file -> close_in chan );
let lines = List.rev !lines in
let generator = Omg.init () in
let generator =

133
src/omg.ml

@ -1,104 +1,96 @@
let () = Random.self_init ()
type generator =
{ forward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t
; backward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t
; words: (int, string) Hashtbl.t
; last_two_words: string * string }
{ forward_cache : (string * string, (string, int) Hashtbl.t) Hashtbl.t
; backward_cache : (string * string, (string, int) Hashtbl.t) Hashtbl.t
; words : (int, string) Hashtbl.t
; last_two_words : string * string
}
let list_random_el l = List.nth l (Random.int (List.length l))
let add_word generator w =
let n = Hashtbl.length generator.words in
Hashtbl.add generator.words n w ;
{generator with last_two_words= (snd generator.last_two_words, w)}
Hashtbl.add generator.words n w;
{ generator with last_two_words = (snd generator.last_two_words, w) }
let get_last_two_words generator =
match generator.last_two_words with
| "", "" ->
[]
| x, "" | "", x ->
[x]
| x, y ->
[x; y]
| "", "" -> []
| x, "" | "", x -> [ x ]
| x, y -> [ x; y ]
let init () =
let gen =
{ forward_cache= Hashtbl.create 4096
; backward_cache= Hashtbl.create 4096
; words= Hashtbl.create 4096
; last_two_words= ("", "") }
{ forward_cache = Hashtbl.create 4096
; backward_cache = Hashtbl.create 4096
; words = Hashtbl.create 4096
; last_two_words = ("", "")
}
in
add_word gen "\n"
let triples = function
| w1 :: w2 :: s ->
let _, _, acc =
List.fold_left
(fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc))
(w1, w2, []) s
in
List.rev acc
| _ ->
[]
let _, _, acc =
List.fold_left
(fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc))
(w1, w2, []) s
in
List.rev acc
| _ -> []
let rec add_key cache k v =
match Hashtbl.find cache k with
| exception Not_found ->
Hashtbl.add cache k (Hashtbl.create 64) ;
add_key cache k v
Hashtbl.add cache k (Hashtbl.create 64);
add_key cache k v
| tbl -> (
match Hashtbl.find tbl v with
| exception Not_found ->
Hashtbl.add tbl v 0
| n ->
Hashtbl.replace tbl v (n + 1) )
| exception Not_found -> Hashtbl.add tbl v 0
| n -> Hashtbl.replace tbl v (n + 1) )
let feed generator msg =
let splitted = String.split_on_char ' ' msg in
let splitted = splitted @ ["\n"] in
let splitted = splitted @ [ "\n" ] in
let triples = triples (get_last_two_words generator @ splitted) in
List.iter
(fun (w1, w2, w3) ->
add_key generator.forward_cache (w1, w2) w3 ;
add_key generator.backward_cache (w3, w2) w1)
triples ;
add_key generator.forward_cache (w1, w2) w3;
add_key generator.backward_cache (w3, w2) w1 )
triples;
List.fold_left (fun generator el -> add_word generator el) generator splitted
let select_seed generator seed_word backward =
let dir = if backward then -1 else 1 in
match seed_word with
| None ->
let rec loop = function
| "\n", _ | _, "\n" ->
let seed =
if backward then
1 + Random.int (Hashtbl.length generator.words - 1)
else Random.int (Hashtbl.length generator.words - 1)
in
loop
( Hashtbl.find generator.words seed
, Hashtbl.find generator.words (seed + dir) )
| seed_word, next_word ->
(seed_word, next_word)
in
loop ("\n", "\n")
let rec loop = function
| "\n", _ | _, "\n" ->
let seed =
if backward then 1 + Random.int (Hashtbl.length generator.words - 1)
else Random.int (Hashtbl.length generator.words - 1)
in
loop
( Hashtbl.find generator.words seed
, Hashtbl.find generator.words (seed + dir) )
| seed_word, next_word -> (seed_word, next_word)
in
loop ("\n", "\n")
| Some w ->
let possible_indexes =
Hashtbl.fold
(fun k v acc -> if v = w then k :: acc else acc)
generator.words []
in
let index = list_random_el possible_indexes + dir in
(w, Hashtbl.find generator.words index)
let possible_indexes =
Hashtbl.fold
(fun k v acc -> if v = w then k :: acc else acc)
generator.words []
in
let index = list_random_el possible_indexes + dir in
(w, Hashtbl.find generator.words index)
let generate_markov_text generator max_size seed backward =
let seed_word, next_word =
match seed with
| None, x | x, None ->
select_seed generator x backward
| Some x, Some y ->
(x, y)
| None, x | x, None -> select_seed generator x backward
| Some x, Some y -> (x, y)
in
let cache =
if backward then generator.backward_cache else generator.forward_cache
@ -114,7 +106,7 @@ let generate_markov_text generator max_size seed backward =
let exception Stop in
( try
for _ = 0 to max_size do
gen_words := !w1 :: !gen_words ;
gen_words := !w1 :: !gen_words;
let tbl =
try Hashtbl.find cache (!w1, !w2) with Not_found -> raise Stop
in
@ -126,25 +118,22 @@ let generate_markov_text generator max_size seed backward =
Hashtbl.fold
(fun k v acc ->
let acc = acc + v in
if i <= acc then raise (Found k) else acc)
if i <= acc then raise (Found k) else acc )
tbl 0
with
| exception Found s ->
s
| _ ->
raise Stop
| exception Found s -> s
| _ -> raise Stop
in
w1 := !w2 ;
w1 := !w2;
w2 := new_word
done
with Stop -> () ) ;
if !w2 <> "\n" then gen_words := !w2 :: !gen_words ;
with Stop -> () );
if !w2 <> "\n" then gen_words := !w2 :: !gen_words;
let gen_words = List.filter (fun el -> el <> "\n") !gen_words in
let buff = Buffer.create 512 in
( match if backward then gen_words else List.rev gen_words with
| [] ->
()
| [] -> ()
| x :: s ->
Buffer.add_string buff x ;
List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s ) ;
Buffer.add_string buff x;
List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s );
Buffer.contents buff

44
test/test.ml

@ -2,47 +2,47 @@ let source = "foo bar baz qux"
let test_triples () =
assert (
Omg.triples ["foo"; "bar"; "baz"; "qux"]
= [("foo", "bar", "baz"); ("bar", "baz", "qux")] )
Omg.triples [ "foo"; "bar"; "baz"; "qux" ]
= [ ("foo", "bar", "baz"); ("bar", "baz", "qux") ] )
let test_forward () =
let wanted = ["foo bar baz qux"; "bar baz qux"; "baz qux"] in
let wanted = [ "foo bar baz qux"; "bar baz qux"; "baz qux" ] in
for _ = 1 to 1000 do
let generator = Omg.init () in
let generator = Omg.feed generator source in
let generated = Omg.generate_markov_text generator 30 (None, None) false in
assert (List.mem generated wanted)
done ;
done;
let seen = List.map (fun el -> (el, ref false)) wanted in
for _ = 1 to 1000 do
let generator = Omg.init () in
let generator = Omg.feed generator source in
let generated = Omg.generate_markov_text generator 30 (None, None) false in
List.iter (fun (el, seen) -> if el = generated then seen := true) seen
done ;
done;
List.iter
(fun (el, seen) ->
if not !seen then failwith (Format.sprintf "didn't see #%s#" el))
if not !seen then failwith (Format.sprintf "didn't see #%s#" el) )
seen
let test_backward () =
let wanted = ["foo bar baz qux"; "foo bar baz"; "foo bar"] in
let wanted = [ "foo bar baz qux"; "foo bar baz"; "foo bar" ] in
for _ = 1 to 1000 do
let generator = Omg.init () in
let generator = Omg.feed generator source in
let generated = Omg.generate_markov_text generator 30 (None, None) true in
assert (List.mem generated wanted)
done ;
done;
let seen = List.map (fun el -> (el, ref false)) wanted in
for _ = 1 to 1000 do
let generator = Omg.init () in
let generator = Omg.feed generator source in
let generated = Omg.generate_markov_text generator 30 (None, None) true in
List.iter (fun (el, seen) -> if el = generated then seen := true) seen
done ;
done;
List.iter
(fun (el, seen) ->
if not !seen then failwith (Format.sprintf "didn't see #%s#" el))
if not !seen then failwith (Format.sprintf "didn't see #%s#" el) )
seen
let test_force_seed_forward () =
@ -56,7 +56,7 @@ let test_force_seed_forward () =
if not (generated = wanted) then
failwith
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
done ;
done;
for _ = 1 to 1000 do
let generator = Omg.init () in
let generator = Omg.feed generator source in
@ -79,7 +79,7 @@ let test_force_seed_backward () =
if not (generated = wanted) then
failwith
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
done ;
done;
for _ = 1 to 1000 do
let generator = Omg.init () in
let generator = Omg.feed generator source in
@ -92,14 +92,14 @@ let test_force_seed_backward () =
done
let () =
Format.printf "testing triples...@." ;
test_triples () ;
Format.printf "testing forward...@." ;
test_forward () ;
Format.printf "testing backward...@." ;
test_backward () ;
Format.printf "testing force seed forward...@." ;
test_force_seed_forward () ;
Format.printf "test force seed backward...@." ;
test_force_seed_backward () ;
Format.printf "testing triples...@.";
test_triples ();
Format.printf "testing forward...@.";
test_forward ();
Format.printf "testing backward...@.";
test_backward ();
Format.printf "testing force seed forward...@.";
test_force_seed_forward ();
Format.printf "test force seed backward...@.";
test_force_seed_backward ();
Format.printf "Tests are OK !@."

Loading…
Cancel
Save