Kotz’i’jに恋して15：マルコフ連鎖(マヤ・キチェ語その２)

昨日載せたコードを少し修正してみた。辞書ファイルへの書き込みも加えた。

MarkovChain.rb

def paragraphSplit(text)
  words = []
  temp = ""
  input = ""
  open(text) do |f|
    f.each do |line|
     temp << line
    end
  end
  input << temp
  sentence = input.scan(/[^.?!]*./)
  sentence.each do |s|
    fragments = s.split
    count = 0
    fragments.each do |word|
     if count == 0 then
       word = "%START%" + word
     end   
     words.push(word)
     count += 1
    end
  end
  return words
end

def writeASentence(markov)
  count = 0
  suffix = ""
  newSentence = ""

  while count < 100
    if newSentence == "" then
      startCandidates = []
      candidatesCount = 0
      markov.each do |a, b, c|
        if a.include?("%START%") then
          startCandidates << [a, b, c]
          candidatesCount += 1
        end    
      end  
    
      r = rand(candidatesCount)
      a = startCandidates[r][0]
      b = startCandidates[r][1]
      c = startCandidates[r][2]
      newSentence = a + " " + b + " " + c
      suffix = c
      count += 1 
    else
      rowCount = 0
      candidates = []
      markov.each do |a, b, c|
        if suffix == a then;
          candidates << [a, b, c]
          rowCount += 1
        end      
      end
    
      r = rand(rowCount)
      b = candidates[r][1]
      c = candidates[r][2]
      newSentence += " " + b + " " + c
      suffix = c
      count += 1
    end
    
    if suffix.include?("%END%") then
      newSentence.gsub!("%START%", "")
      newSentence.gsub!("%END%", "")
      return newSentence
      break
    end
  end
end

def markovDic(words)
  unless words.size < 3
    markov = []
    for i in 0..words.size - 2 do 
      next if words[i].include?(".") or words[i].include?("?")
  
      if words[i+2] == nil or words[i+1].include?(".") or words[i+1].include?("?") then 
        markov << [words[i], words[i+1], "%END%"] 
      elsif words[i+2].include?(".") or words[i+2].include?("?") then
        markov << [words[i], words[i+1], words[i+2] + "%END%"] 
      else
        markov << [words[i], words[i+1], words[i+2]] 
      end
    end
  end
  return markov
end

def paragraphSplit(text)

words = []

temp = ""

input = ""

open(text) do |f|

f.each do |line|

temp << line

end

input << temp

sentence = input.scan(/[^.?!]*./)

sentence.each do |s|

fragments = s.split

count = 0

fragments.each do |word|

if count == 0 then

word = "%START%" + word

end

words.push(word)

count += 1

end

return words

end

def writeASentence(markov)

count = 0

suffix = ""

newSentence = ""

while count < 100

if newSentence == "" then

startCandidates = []

candidatesCount = 0

markov.each do |a, b, c|

if a.include?("%START%") then

startCandidates << [a, b, c]

candidatesCount += 1

end

r = rand(candidatesCount)

a = startCandidates[r][0]

b = startCandidates[r][1]

c = startCandidates[r][2]

newSentence = a + " " + b + " " + c

suffix = c

count += 1

else

rowCount = 0

candidates = []

markov.each do |a, b, c|

if suffix == a then;

candidates << [a, b, c]

rowCount += 1

end

r = rand(rowCount)

b = candidates[r][1]

c = candidates[r][2]

newSentence += " " + b + " " + c

suffix = c

count += 1

end

if suffix.include?("%END%") then

newSentence.gsub!("%START%", "")

newSentence.gsub!("%END%", "")

return newSentence

break

end

def markovDic(words)

unless words.size < 3

markov = []

for i in 0..words.size - 2 do

next if words[i].include?(".") or words[i].include?("?")

if words[i+2] == nil or words[i+1].include?(".") or words[i+1].include?("?") then

markov << [words[i], words[i+1], "%END%"]

elsif words[i+2].include?(".") or words[i+2].include?("?") then

markov << [words[i], words[i+1], words[i+2] + "%END%"]

else

markov << [words[i], words[i+1], words[i+2]]

end

return markov

end

MarkovChainTestKiche.rb

require '.\MarkovChain'

words = []
markov = []
words = paragraphSplit('./MINEDUC.txt')
markov = markovDic(words)

count = 1
while count <=10 do
  sentence = writeASentence(markov)
  puts ("Sentence " + count.to_s + ": ")
  puts sentence
  puts""
  count += 1
end

open('./MarkovDic.txt', 'w') do |f|
  markov.each do |a, b, c|
    f.puts([a + " " + b + " " + c])
  end
end

require '.\MarkovChain'

words = []

markov = []

words = paragraphSplit('./MINEDUC.txt')

markov = markovDic(words)

count = 1

while count <=10 do

sentence = writeASentence(markov)

puts ("Sentence " + count.to_s + ": ")

puts sentence

puts""

count += 1

end

open('./MarkovDic.txt', 'w') do |f|

markov.each do |a, b, c|

f.puts([a + " " + b + " " + c])

end

辞書ファイルの読み込みはこの様に：

markov = []
open('./MarkovDic.txt') do |f|
  f.each do |line|
    a, b, c = line.split
	markov << [a, b, c]
  end
end

markov = []

open('./MarkovDic.txt') do |f|

f.each do |line|

a, b, c = line.split

markov << [a, b, c]

end

単純だけど必要なことは全てこれらのコードで出来ている。しかし、コードを見ると統計分析を行っているような書き方になっている。癖が抜けないなぁと思う。

13 No'j

グアテマラ、マヤ文明とプログラミング/Mayan World and Programming

Kotz’i’jに恋して15：マルコフ連鎖(マヤ・キチェ語その２)

Leave a Reply Cancel reply