zapashcanon
5 years ago
commit
8422f504ec
13 changed files with 441 additions and 0 deletions
@ -0,0 +1,53 @@ |
|||
image: debian/unstable |
|||
packages: |
|||
- opam |
|||
- ocaml |
|||
sources: |
|||
- https://git.zapashcanon.fr/zapashcanon/omg |
|||
environment: |
|||
name: omg |
|||
deploy: fs@zapashcanon.fr |
|||
sshopts: "-o StrictHostKeyChecking=no -q" |
|||
coverage_dst: /var/www/coverage.zapashcanon.fr |
|||
doc_dst: /var/www/doc.zapashcanon.fr |
|||
archive_dst: /var/www/fs.zapashcanon.fr/archive |
|||
secrets: |
|||
- ec1f49cd-38dc-41d9-89f4-c3b6ecd7bcad # ssh deploy key |
|||
tasks: |
|||
- setup: | |
|||
opam init -y |
|||
opam update -y |
|||
opam install -y dune bisect_ppx odoc ocamlformat |
|||
- lint-format: | |
|||
cd $name |
|||
eval "$(opam env)" |
|||
ocamlformat -p ocamlformat --enable-outside-detected-project --check $(find . -name '*.ml') |
|||
- build: | |
|||
cd $name |
|||
eval "$(opam env)" |
|||
dune build @all |
|||
- test: | |
|||
cd $name |
|||
eval "$(opam env)" |
|||
dune runtest |
|||
- deploy-doc: | |
|||
cd $name |
|||
eval "$(opam env)" |
|||
dune build @doc |
|||
ssh $sshopts $deploy "mkdir -p $doc_dst/$name/" |
|||
scp $sshopts -r _build/default/_doc/_html/* $deploy:$doc_dst/$name/ |
|||
- deploy-coverage: | |
|||
cd $name |
|||
eval "$(opam env)" |
|||
dune clean |
|||
BISECT_ENABLE=YES dune runtest --no-buffer --force > /dev/null |
|||
bisect-ppx-report -html _coverage/ "$(find . -name 'bisect*.out')" |
|||
ssh $sshopts $deploy "mkdir -p $coverage_dst/$name/" |
|||
scp $sshopts -r _coverage/* $deploy:$coverage_dst/$name/ |
|||
- archive: | |
|||
cd $name |
|||
eval "$(opam env)" |
|||
dune clean |
|||
git archive -o ${name}-dev.tar.xz HEAD |
|||
ssh $sshopts $deploy "mkdir -p $archive_dst/$name/" |
|||
scp $sshopts ${name}-dev.tar.xz $deploy:$archive_dst/$name/ |
@ -0,0 +1,4 @@ |
|||
_build/ |
|||
_coverage/ |
|||
*.merlin |
|||
*.install |
@ -0,0 +1,3 @@ |
|||
## 0.0.1 - 2019-11-20 |
|||
|
|||
First release |
@ -0,0 +1,9 @@ |
|||
The ISC License (ISC) |
|||
===================== |
|||
|
|||
Copyright © 2019, Léo Andrès |
|||
|
|||
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|||
|
@ -0,0 +1,9 @@ |
|||
# omg [![builds.sr.ht status](https://builds.sr.ht/~zapashcanon/omg.svg)](https://builds.sr.ht/~zapashcanon/omg?) |
|||
|
|||
omg (OCaml Markov Gen) is an OCaml library to generate text using Markov Chains. |
|||
|
|||
## Credits |
|||
|
|||
The code is inspired by [markovgen]. |
|||
|
|||
[markovgen]: https://github.com/ProgVal/markovgen |
@ -0,0 +1,31 @@ |
|||
(lang dune 1.11) |
|||
|
|||
(name omg) |
|||
|
|||
(license ISC) |
|||
|
|||
(authors "Léo Andrès <contact@ndrs.fr>") |
|||
|
|||
(maintainers "Léo Andrès <contact@ndrs.fr>") |
|||
|
|||
(source |
|||
(uri git://git.zapashcanon.fr/zapashcanon/omg.git)) |
|||
|
|||
(bug_reports https://git.zapashcanon.fr/zapashcanon/omg/issues) |
|||
|
|||
(homepage https://git.zapashcanon.fr/zapashcanon/omg) |
|||
|
|||
(documentation https://doc.zapashcanon.fr/omg/) |
|||
|
|||
(generate_opam_files true) |
|||
|
|||
(explicit_js_mode) |
|||
|
|||
(package |
|||
(name omg) |
|||
(synopsis "Generate text using Markov chains") |
|||
(description |
|||
"omg is an OCaml library to generate random text using Markov chains. You initialize a generator, feed it with some input, then it generates random text based on the input.") |
|||
(depends |
|||
(dune |
|||
(> 1.11.0)))) |
@ -0,0 +1,3 @@ |
|||
(executable |
|||
(name random_text) |
|||
(libraries omg uutf uucp)) |
@ -0,0 +1,34 @@ |
|||
let _ = |
|||
if Array.length Sys.argv < 2 then |
|||
failwith (Format.sprintf "usage: %s <file>" Sys.argv.(0)) ; |
|||
let file = Sys.argv.(1) in |
|||
let chan = open_in file in |
|||
let lines = ref [] in |
|||
let keep_alpha s = |
|||
let to_keep = [' '; '\''; '-'] in |
|||
let to_keep = List.map (fun el -> Uchar.of_char el) to_keep in |
|||
let b = Buffer.create 256 in |
|||
let add_alpha () _ = function |
|||
| `Malformed _ -> |
|||
Uutf.Buffer.add_utf_8 b Uutf.u_rep |
|||
| `Uchar u -> |
|||
if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then |
|||
Uutf.Buffer.add_utf_8 b u |
|||
in |
|||
Uutf.String.fold_utf_8 add_alpha () s ; |
|||
Buffer.contents b |
|||
in |
|||
( try |
|||
while true do |
|||
let l = String.trim (input_line chan) in |
|||
let l = keep_alpha l in |
|||
lines := l :: !lines |
|||
done |
|||
with End_of_file -> close_in chan ) ; |
|||
let lines = List.rev !lines in |
|||
let generator = Omg.init () in |
|||
List.iter (fun el -> Omg.feed generator el) lines ; |
|||
for _ = 0 to 30 do |
|||
Format.fprintf Format.std_formatter "GEN: %s@." |
|||
(Omg.generate_markov_text generator 50 (None, None) false) |
|||
done |
@ -0,0 +1,29 @@ |
|||
# This file is generated by dune, edit dune-project instead |
|||
opam-version: "2.0" |
|||
synopsis: "Generate text using Markov chains" |
|||
description: |
|||
"omg is an OCaml library to generate random text using Markov chains. You initialize a generator, feed it with some input, then it generates random text based on the input." |
|||
maintainer: ["Léo Andrès <contact@ndrs.fr>"] |
|||
authors: ["Léo Andrès <contact@ndrs.fr>"] |
|||
license: "ISC" |
|||
homepage: "https://git.zapashcanon.fr/zapashcanon/omg" |
|||
doc: "https://doc.zapashcanon.fr/omg/" |
|||
bug-reports: "https://git.zapashcanon.fr/zapashcanon/omg/issues" |
|||
depends: [ |
|||
"dune" {> "1.11.0"} |
|||
] |
|||
build: [ |
|||
["dune" "subst"] {pinned} |
|||
[ |
|||
"dune" |
|||
"build" |
|||
"-p" |
|||
name |
|||
"-j" |
|||
jobs |
|||
"@install" |
|||
"@runtest" {with-test} |
|||
"@doc" {with-doc} |
|||
] |
|||
] |
|||
dev-repo: "git://git.zapashcanon.fr/zapashcanon/omg.git" |
@ -0,0 +1,4 @@ |
|||
(library |
|||
(public_name omg) |
|||
(preprocess |
|||
(pps bisect_ppx -conditional))) |
@ -0,0 +1,154 @@ |
|||
let _ = Random.self_init () |
|||
|
|||
type generator = |
|||
{ forward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t |
|||
; backward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t |
|||
; words: (int, string) Hashtbl.t |
|||
; last_two_words: (string * string) ref } |
|||
|
|||
let add_word generator w = |
|||
generator.last_two_words := (snd !(generator.last_two_words), w) ; |
|||
let n = Hashtbl.length generator.words in |
|||
Hashtbl.add generator.words n w |
|||
|
|||
let get_last_two_words generator = |
|||
match !(generator.last_two_words) with |
|||
| "", "" -> |
|||
[] |
|||
| x, "" | "", x -> |
|||
[x] |
|||
| x, y -> |
|||
[x; y] |
|||
|
|||
let init () = |
|||
let gen = |
|||
{ forward_cache= Hashtbl.create 4096 |
|||
; backward_cache= Hashtbl.create 4096 |
|||
; words= Hashtbl.create 4096 |
|||
; last_two_words= ref ("", "") } |
|||
in |
|||
add_word gen "\n" ; gen |
|||
|
|||
let triples = function |
|||
| w1 :: w2 :: s -> |
|||
let _, _, acc = |
|||
List.fold_left |
|||
(fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc)) |
|||
(w1, w2, []) s |
|||
in |
|||
List.rev acc |
|||
| _ -> |
|||
[] |
|||
|
|||
let rec add_key cache k v = |
|||
match Hashtbl.find cache k with |
|||
| exception Not_found -> |
|||
Hashtbl.add cache k (Hashtbl.create 64) ; |
|||
add_key cache k v |
|||
| tbl -> ( |
|||
match Hashtbl.find tbl v with |
|||
| exception Not_found -> |
|||
Hashtbl.add tbl v 0 |
|||
| n -> |
|||
Hashtbl.replace tbl v (n + 1) ) |
|||
|
|||
let feed generator msg = |
|||
let splitted = String.split_on_char ' ' msg in |
|||
let splitted = splitted @ ["\n"] in |
|||
let triples = triples (get_last_two_words generator @ splitted) in |
|||
List.iter |
|||
(fun (w1, w2, w3) -> add_key generator.forward_cache (w1, w2) w3) |
|||
triples ; |
|||
List.iter |
|||
(fun (w1, w2, w3) -> add_key generator.backward_cache (w3, w2) w1) |
|||
triples ; |
|||
List.iter (fun el -> add_word generator el) splitted |
|||
|
|||
let select_seed generator seed_word backward = |
|||
let dir = if backward then -1 else 1 in |
|||
match seed_word with |
|||
| None -> |
|||
let seed_word = ref "\n" in |
|||
let next_word = ref "\n" in |
|||
while !seed_word = "\n" || !next_word = "\n" do |
|||
let seed = 1 + Random.int (Hashtbl.length generator.words - 2) in |
|||
seed_word := Hashtbl.find generator.words seed ; |
|||
next_word := Hashtbl.find generator.words (seed + dir) |
|||
done ; |
|||
(!seed_word, !next_word) |
|||
| Some w -> |
|||
let possible_indexes = |
|||
Hashtbl.fold |
|||
(fun k v acc -> if v = w then k :: acc else acc) |
|||
generator.words [] |
|||
in |
|||
if possible_indexes = [] then failwith "select_seed" ; |
|||
let index = Random.int (List.length possible_indexes) in |
|||
let index = List.nth possible_indexes index in |
|||
(w, Hashtbl.find generator.words (index + dir)) |
|||
|
|||
let generate_markov_text generator max_size seed backward = |
|||
let seed_word, next_word = |
|||
match seed with |
|||
| None, None -> |
|||
select_seed generator None backward |
|||
| Some x, None | None, Some x -> |
|||
select_seed generator (Some x) backward |
|||
| Some x, Some y -> |
|||
(x, y) |
|||
in |
|||
let cache = |
|||
if backward then generator.backward_cache else generator.forward_cache |
|||
in |
|||
let w1, w2 = |
|||
if Random.int 3 = 0 && Hashtbl.mem cache ("\n", seed_word) then |
|||
("\n", seed_word) |
|||
else (seed_word, next_word) |
|||
in |
|||
let w1 = ref w1 in |
|||
let w2 = ref w2 in |
|||
let gen_words = ref [] in |
|||
let exception Stop in |
|||
( try |
|||
for _ = 0 to max_size do |
|||
gen_words := !w1 :: !gen_words ; |
|||
let tbl = |
|||
match Hashtbl.find cache (!w1, !w2) with |
|||
| exception Not_found -> |
|||
raise Stop |
|||
| tbl -> |
|||
tbl |
|||
in |
|||
let cache_n = Hashtbl.fold (fun _ v acc -> acc + v) tbl 0 in |
|||
let i = if cache_n = 0 then 0 else Random.int cache_n in |
|||
let exception Found of string in |
|||
let new_word = |
|||
match |
|||
Hashtbl.fold |
|||
(fun k v acc -> |
|||
let acc = acc + v in |
|||
if i <= acc then raise (Found k) else acc) |
|||
tbl 0 |
|||
with |
|||
| exception Found s -> |
|||
s |
|||
| _ -> |
|||
raise Stop |
|||
in |
|||
w1 := !w2 ; |
|||
w2 := new_word |
|||
done |
|||
with Stop -> () ) ; |
|||
if not (!w2 = "\n") then gen_words := !w2 :: !gen_words ; |
|||
let gen_words = !gen_words in |
|||
let gen_words = List.filter (fun el -> not (el = "\n")) gen_words in |
|||
let buff = Buffer.create 512 in |
|||
( match if not backward then List.rev gen_words else gen_words with |
|||
| [x] -> |
|||
Buffer.add_string buff x |
|||
| x :: s -> |
|||
Buffer.add_string buff x ; |
|||
List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s |
|||
| [] -> |
|||
() ) ; |
|||
Buffer.contents buff |
@ -0,0 +1,3 @@ |
|||
(test |
|||
(name test) |
|||
(libraries omg)) |
@ -0,0 +1,105 @@ |
|||
let source = "foo bar baz qux" |
|||
|
|||
let test_triples () = |
|||
assert ( |
|||
Omg.triples ["foo"; "bar"; "baz"; "qux"] |
|||
= [("foo", "bar", "baz"); ("bar", "baz", "qux")] ) |
|||
|
|||
let test_forward () = |
|||
let wanted = ["foo bar baz qux"; "bar baz qux"; "baz qux"] in |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = Omg.generate_markov_text generator 30 (None, None) false in |
|||
assert (List.mem generated wanted) |
|||
done ; |
|||
let seen = List.map (fun el -> (el, ref false)) wanted in |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = Omg.generate_markov_text generator 30 (None, None) false in |
|||
List.iter (fun (el, seen) -> if el = generated then seen := true) seen |
|||
done ; |
|||
List.iter |
|||
(fun (el, seen) -> |
|||
if not !seen then failwith (Format.sprintf "didn't see #%s#" el)) |
|||
seen |
|||
|
|||
let test_backward () = |
|||
let wanted = ["foo bar baz qux"; "foo bar baz"; "foo bar"] in |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = Omg.generate_markov_text generator 30 (None, None) true in |
|||
assert (List.mem generated wanted) |
|||
done ; |
|||
let seen = List.map (fun el -> (el, ref false)) wanted in |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = Omg.generate_markov_text generator 30 (None, None) true in |
|||
List.iter (fun (el, seen) -> if el = generated then seen := true) seen |
|||
done ; |
|||
List.iter |
|||
(fun (el, seen) -> |
|||
if not !seen then failwith (Format.sprintf "didn't see #%s#" el)) |
|||
seen |
|||
|
|||
let test_force_seed_forward () = |
|||
let wanted = "bar baz qux" in |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = |
|||
Omg.generate_markov_text generator 30 (Some "bar", None) false |
|||
in |
|||
if not (generated = wanted) then |
|||
failwith |
|||
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted) |
|||
done ; |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = |
|||
Omg.generate_markov_text generator 30 (Some "bar", Some "baz") false |
|||
in |
|||
if not (generated = wanted) then |
|||
failwith |
|||
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted) |
|||
done |
|||
|
|||
let test_force_seed_backward () = |
|||
let wanted = "foo bar" in |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = |
|||
Omg.generate_markov_text generator 30 (Some "bar", None) true |
|||
in |
|||
if not (generated = wanted) then |
|||
failwith |
|||
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted) |
|||
done ; |
|||
for _ = 1 to 1000 do |
|||
let generator = Omg.init () in |
|||
Omg.feed generator source ; |
|||
let generated = |
|||
Omg.generate_markov_text generator 30 (Some "bar", Some "foo") true |
|||
in |
|||
if not (generated = wanted) then |
|||
failwith |
|||
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted) |
|||
done |
|||
|
|||
let _ = |
|||
Format.printf "testing triples...@." ; |
|||
test_triples () ; |
|||
Format.printf "testing forward...@." ; |
|||
test_forward () ; |
|||
Format.printf "testing backward...@." ; |
|||
test_backward () ; |
|||
Format.printf "testing force seed forward...@." ; |
|||
test_force_seed_forward () ; |
|||
Format.printf "test force seed backward...@." ; |
|||
test_force_seed_backward () ; |
|||
Format.printf "Tests are OK !@." |
Loading…
Reference in new issue