Browse Source

first commit

master
zapashcanon 5 years ago
commit
8422f504ec
Signed by: zapashcanon GPG Key ID: 8981C3C62D1D28F1
  1. 53
      .build.yml
  2. 4
      .gitignore
  3. 3
      CHANGES.md
  4. 9
      LICENSE.md
  5. 9
      README.md
  6. 31
      dune-project
  7. 3
      example/dune
  8. 34
      example/random_text.ml
  9. 29
      omg.opam
  10. 4
      src/dune
  11. 154
      src/omg.ml
  12. 3
      test/dune
  13. 105
      test/test.ml

53
.build.yml

@ -0,0 +1,53 @@
image: debian/unstable
packages:
- opam
- ocaml
sources:
- https://git.zapashcanon.fr/zapashcanon/omg
environment:
name: omg
deploy: fs@zapashcanon.fr
sshopts: "-o StrictHostKeyChecking=no -q"
coverage_dst: /var/www/coverage.zapashcanon.fr
doc_dst: /var/www/doc.zapashcanon.fr
archive_dst: /var/www/fs.zapashcanon.fr/archive
secrets:
- ec1f49cd-38dc-41d9-89f4-c3b6ecd7bcad # ssh deploy key
tasks:
- setup: |
opam init -y
opam update -y
opam install -y dune bisect_ppx odoc ocamlformat
- lint-format: |
cd $name
eval "$(opam env)"
ocamlformat -p ocamlformat --enable-outside-detected-project --check $(find . -name '*.ml')
- build: |
cd $name
eval "$(opam env)"
dune build @all
- test: |
cd $name
eval "$(opam env)"
dune runtest
- deploy-doc: |
cd $name
eval "$(opam env)"
dune build @doc
ssh $sshopts $deploy "mkdir -p $doc_dst/$name/"
scp $sshopts -r _build/default/_doc/_html/* $deploy:$doc_dst/$name/
- deploy-coverage: |
cd $name
eval "$(opam env)"
dune clean
BISECT_ENABLE=YES dune runtest --no-buffer --force > /dev/null
bisect-ppx-report -html _coverage/ "$(find . -name 'bisect*.out')"
ssh $sshopts $deploy "mkdir -p $coverage_dst/$name/"
scp $sshopts -r _coverage/* $deploy:$coverage_dst/$name/
- archive: |
cd $name
eval "$(opam env)"
dune clean
git archive -o ${name}-dev.tar.xz HEAD
ssh $sshopts $deploy "mkdir -p $archive_dst/$name/"
scp $sshopts ${name}-dev.tar.xz $deploy:$archive_dst/$name/

4
.gitignore

@ -0,0 +1,4 @@
_build/
_coverage/
*.merlin
*.install

3
CHANGES.md

@ -0,0 +1,3 @@
## 0.0.1 - 2019-11-20
First release

9
LICENSE.md

@ -0,0 +1,9 @@
The ISC License (ISC)
=====================
Copyright © 2019, Léo Andrès
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

9
README.md

@ -0,0 +1,9 @@
# omg [![builds.sr.ht status](https://builds.sr.ht/~zapashcanon/omg.svg)](https://builds.sr.ht/~zapashcanon/omg?)
omg (OCaml Markov Gen) is an OCaml library to generate text using Markov Chains.
## Credits
The code is inspired by [markovgen].
[markovgen]: https://github.com/ProgVal/markovgen

31
dune-project

@ -0,0 +1,31 @@
(lang dune 1.11)
(name omg)
(license ISC)
(authors "Léo Andrès <contact@ndrs.fr>")
(maintainers "Léo Andrès <contact@ndrs.fr>")
(source
(uri git://git.zapashcanon.fr/zapashcanon/omg.git))
(bug_reports https://git.zapashcanon.fr/zapashcanon/omg/issues)
(homepage https://git.zapashcanon.fr/zapashcanon/omg)
(documentation https://doc.zapashcanon.fr/omg/)
(generate_opam_files true)
(explicit_js_mode)
(package
(name omg)
(synopsis "Generate text using Markov chains")
(description
"omg is an OCaml library to generate random text using Markov chains. You initialize a generator, feed it with some input, then it generates random text based on the input.")
(depends
(dune
(> 1.11.0))))

3
example/dune

@ -0,0 +1,3 @@
(executable
(name random_text)
(libraries omg uutf uucp))

34
example/random_text.ml

@ -0,0 +1,34 @@
let _ =
if Array.length Sys.argv < 2 then
failwith (Format.sprintf "usage: %s <file>" Sys.argv.(0)) ;
let file = Sys.argv.(1) in
let chan = open_in file in
let lines = ref [] in
let keep_alpha s =
let to_keep = [' '; '\''; '-'] in
let to_keep = List.map (fun el -> Uchar.of_char el) to_keep in
let b = Buffer.create 256 in
let add_alpha () _ = function
| `Malformed _ ->
Uutf.Buffer.add_utf_8 b Uutf.u_rep
| `Uchar u ->
if Uucp.Alpha.is_alphabetic u || List.mem u to_keep then
Uutf.Buffer.add_utf_8 b u
in
Uutf.String.fold_utf_8 add_alpha () s ;
Buffer.contents b
in
( try
while true do
let l = String.trim (input_line chan) in
let l = keep_alpha l in
lines := l :: !lines
done
with End_of_file -> close_in chan ) ;
let lines = List.rev !lines in
let generator = Omg.init () in
List.iter (fun el -> Omg.feed generator el) lines ;
for _ = 0 to 30 do
Format.fprintf Format.std_formatter "GEN: %s@."
(Omg.generate_markov_text generator 50 (None, None) false)
done

29
omg.opam

@ -0,0 +1,29 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "Generate text using Markov chains"
description:
"omg is an OCaml library to generate random text using Markov chains. You initialize a generator, feed it with some input, then it generates random text based on the input."
maintainer: ["Léo Andrès <contact@ndrs.fr>"]
authors: ["Léo Andrès <contact@ndrs.fr>"]
license: "ISC"
homepage: "https://git.zapashcanon.fr/zapashcanon/omg"
doc: "https://doc.zapashcanon.fr/omg/"
bug-reports: "https://git.zapashcanon.fr/zapashcanon/omg/issues"
depends: [
"dune" {> "1.11.0"}
]
build: [
["dune" "subst"] {pinned}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git://git.zapashcanon.fr/zapashcanon/omg.git"

4
src/dune

@ -0,0 +1,4 @@
(library
(public_name omg)
(preprocess
(pps bisect_ppx -conditional)))

154
src/omg.ml

@ -0,0 +1,154 @@
let _ = Random.self_init ()
type generator =
{ forward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t
; backward_cache: (string * string, (string, int) Hashtbl.t) Hashtbl.t
; words: (int, string) Hashtbl.t
; last_two_words: (string * string) ref }
let add_word generator w =
generator.last_two_words := (snd !(generator.last_two_words), w) ;
let n = Hashtbl.length generator.words in
Hashtbl.add generator.words n w
let get_last_two_words generator =
match !(generator.last_two_words) with
| "", "" ->
[]
| x, "" | "", x ->
[x]
| x, y ->
[x; y]
let init () =
let gen =
{ forward_cache= Hashtbl.create 4096
; backward_cache= Hashtbl.create 4096
; words= Hashtbl.create 4096
; last_two_words= ref ("", "") }
in
add_word gen "\n" ; gen
let triples = function
| w1 :: w2 :: s ->
let _, _, acc =
List.fold_left
(fun (w1, w2, acc) el -> (w2, el, (w1, w2, el) :: acc))
(w1, w2, []) s
in
List.rev acc
| _ ->
[]
let rec add_key cache k v =
match Hashtbl.find cache k with
| exception Not_found ->
Hashtbl.add cache k (Hashtbl.create 64) ;
add_key cache k v
| tbl -> (
match Hashtbl.find tbl v with
| exception Not_found ->
Hashtbl.add tbl v 0
| n ->
Hashtbl.replace tbl v (n + 1) )
let feed generator msg =
let splitted = String.split_on_char ' ' msg in
let splitted = splitted @ ["\n"] in
let triples = triples (get_last_two_words generator @ splitted) in
List.iter
(fun (w1, w2, w3) -> add_key generator.forward_cache (w1, w2) w3)
triples ;
List.iter
(fun (w1, w2, w3) -> add_key generator.backward_cache (w3, w2) w1)
triples ;
List.iter (fun el -> add_word generator el) splitted
let select_seed generator seed_word backward =
let dir = if backward then -1 else 1 in
match seed_word with
| None ->
let seed_word = ref "\n" in
let next_word = ref "\n" in
while !seed_word = "\n" || !next_word = "\n" do
let seed = 1 + Random.int (Hashtbl.length generator.words - 2) in
seed_word := Hashtbl.find generator.words seed ;
next_word := Hashtbl.find generator.words (seed + dir)
done ;
(!seed_word, !next_word)
| Some w ->
let possible_indexes =
Hashtbl.fold
(fun k v acc -> if v = w then k :: acc else acc)
generator.words []
in
if possible_indexes = [] then failwith "select_seed" ;
let index = Random.int (List.length possible_indexes) in
let index = List.nth possible_indexes index in
(w, Hashtbl.find generator.words (index + dir))
let generate_markov_text generator max_size seed backward =
let seed_word, next_word =
match seed with
| None, None ->
select_seed generator None backward
| Some x, None | None, Some x ->
select_seed generator (Some x) backward
| Some x, Some y ->
(x, y)
in
let cache =
if backward then generator.backward_cache else generator.forward_cache
in
let w1, w2 =
if Random.int 3 = 0 && Hashtbl.mem cache ("\n", seed_word) then
("\n", seed_word)
else (seed_word, next_word)
in
let w1 = ref w1 in
let w2 = ref w2 in
let gen_words = ref [] in
let exception Stop in
( try
for _ = 0 to max_size do
gen_words := !w1 :: !gen_words ;
let tbl =
match Hashtbl.find cache (!w1, !w2) with
| exception Not_found ->
raise Stop
| tbl ->
tbl
in
let cache_n = Hashtbl.fold (fun _ v acc -> acc + v) tbl 0 in
let i = if cache_n = 0 then 0 else Random.int cache_n in
let exception Found of string in
let new_word =
match
Hashtbl.fold
(fun k v acc ->
let acc = acc + v in
if i <= acc then raise (Found k) else acc)
tbl 0
with
| exception Found s ->
s
| _ ->
raise Stop
in
w1 := !w2 ;
w2 := new_word
done
with Stop -> () ) ;
if not (!w2 = "\n") then gen_words := !w2 :: !gen_words ;
let gen_words = !gen_words in
let gen_words = List.filter (fun el -> not (el = "\n")) gen_words in
let buff = Buffer.create 512 in
( match if not backward then List.rev gen_words else gen_words with
| [x] ->
Buffer.add_string buff x
| x :: s ->
Buffer.add_string buff x ;
List.iter (fun el -> Buffer.add_string buff (" " ^ el)) s
| [] ->
() ) ;
Buffer.contents buff

3
test/dune

@ -0,0 +1,3 @@
(test
(name test)
(libraries omg))

105
test/test.ml

@ -0,0 +1,105 @@
let source = "foo bar baz qux"
let test_triples () =
assert (
Omg.triples ["foo"; "bar"; "baz"; "qux"]
= [("foo", "bar", "baz"); ("bar", "baz", "qux")] )
let test_forward () =
let wanted = ["foo bar baz qux"; "bar baz qux"; "baz qux"] in
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated = Omg.generate_markov_text generator 30 (None, None) false in
assert (List.mem generated wanted)
done ;
let seen = List.map (fun el -> (el, ref false)) wanted in
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated = Omg.generate_markov_text generator 30 (None, None) false in
List.iter (fun (el, seen) -> if el = generated then seen := true) seen
done ;
List.iter
(fun (el, seen) ->
if not !seen then failwith (Format.sprintf "didn't see #%s#" el))
seen
let test_backward () =
let wanted = ["foo bar baz qux"; "foo bar baz"; "foo bar"] in
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated = Omg.generate_markov_text generator 30 (None, None) true in
assert (List.mem generated wanted)
done ;
let seen = List.map (fun el -> (el, ref false)) wanted in
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated = Omg.generate_markov_text generator 30 (None, None) true in
List.iter (fun (el, seen) -> if el = generated then seen := true) seen
done ;
List.iter
(fun (el, seen) ->
if not !seen then failwith (Format.sprintf "didn't see #%s#" el))
seen
let test_force_seed_forward () =
let wanted = "bar baz qux" in
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated =
Omg.generate_markov_text generator 30 (Some "bar", None) false
in
if not (generated = wanted) then
failwith
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
done ;
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated =
Omg.generate_markov_text generator 30 (Some "bar", Some "baz") false
in
if not (generated = wanted) then
failwith
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
done
let test_force_seed_backward () =
let wanted = "foo bar" in
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated =
Omg.generate_markov_text generator 30 (Some "bar", None) true
in
if not (generated = wanted) then
failwith
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
done ;
for _ = 1 to 1000 do
let generator = Omg.init () in
Omg.feed generator source ;
let generated =
Omg.generate_markov_text generator 30 (Some "bar", Some "foo") true
in
if not (generated = wanted) then
failwith
(Format.sprintf "generated = #%s# and wanted = #%s#@." generated wanted)
done
let _ =
Format.printf "testing triples...@." ;
test_triples () ;
Format.printf "testing forward...@." ;
test_forward () ;
Format.printf "testing backward...@." ;
test_backward () ;
Format.printf "testing force seed forward...@." ;
test_force_seed_forward () ;
Format.printf "test force seed backward...@." ;
test_force_seed_backward () ;
Format.printf "Tests are OK !@."
Loading…
Cancel
Save