昨日載せたコードを少し修正してみた。辞書ファイルへの書き込みも加えた。
MarkovChain.rb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
def paragraphSplit(text) words = [] temp = "" input = "" open(text) do |f| f.each do |line| temp << line end end input << temp sentence = input.scan(/[^.?!]*./) sentence.each do |s| fragments = s.split count = 0 fragments.each do |word| if count == 0 then word = "%START%" + word end words.push(word) count += 1 end end return words end def writeASentence(markov) count = 0 suffix = "" newSentence = "" while count < 100 if newSentence == "" then startCandidates = [] candidatesCount = 0 markov.each do |a, b, c| if a.include?("%START%") then startCandidates << [a, b, c] candidatesCount += 1 end end r = rand(candidatesCount) a = startCandidates[r][0] b = startCandidates[r][1] c = startCandidates[r][2] newSentence = a + " " + b + " " + c suffix = c count += 1 else rowCount = 0 candidates = [] markov.each do |a, b, c| if suffix == a then; candidates << [a, b, c] rowCount += 1 end end r = rand(rowCount) b = candidates[r][1] c = candidates[r][2] newSentence += " " + b + " " + c suffix = c count += 1 end if suffix.include?("%END%") then newSentence.gsub!("%START%", "") newSentence.gsub!("%END%", "") return newSentence break end end end def markovDic(words) unless words.size < 3 markov = [] for i in 0..words.size - 2 do next if words[i].include?(".") or words[i].include?("?") if words[i+2] == nil or words[i+1].include?(".") or words[i+1].include?("?") then markov << [words[i], words[i+1], "%END%"] elsif words[i+2].include?(".") or words[i+2].include?("?") then markov << [words[i], words[i+1], words[i+2] + "%END%"] else markov << [words[i], words[i+1], words[i+2]] end end end return markov end |
MarkovChainTestKiche.rb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
require '.\MarkovChain' words = [] markov = [] words = paragraphSplit('./MINEDUC.txt') markov = markovDic(words) count = 1 while count <=10 do sentence = writeASentence(markov) puts ("Sentence " + count.to_s + ": ") puts sentence puts"" count += 1 end open('./MarkovDic.txt', 'w') do |f| markov.each do |a, b, c| f.puts([a + " " + b + " " + c]) end end |
辞書ファイルの読み込みはこの様に:
1 2 3 4 5 6 7 |
markov = [] open('./MarkovDic.txt') do |f| f.each do |line| a, b, c = line.split markov << [a, b, c] end end |
単純だけど必要なことは全てこれらのコードで出来ている。しかし、コードを見ると統計分析を行っているような書き方になっている。癖が抜けないなぁと思う。