bot ahokaiの作成
さっきできた。3時間ぐらいかかった。 http://twitter.com/ahokai
いつかどこかでn-gramモデルを作って文章を連結させると自然になると読んだので、やってみた。
DBに溜めたpostをmecabで分割する所で文字コード関係でつまづいていたが、色々やってたらなんとかなった。
口調が完全に自分と同じなのできもい。
まず自分のpostを収集するDBを作成
migrate_posts.rb
#!/usr/bin/env ruby require 'rubygems' require 'active_record' require File.dirname(__FILE__) + "/model_post.rb" ActiveRecord::Base.establish_connection( :adapter => 'sqlite3', #:dbfile => ':memory:', :dbfile => 'db_posts', :timeout => 30000 ) class PostMigration < ActiveRecord::Migration def self.up create_table(:posts){|t| t.string :message, :null => false t.string :uri, :null => false t.time :time, :null => false } end def self.down drop_table :posts end end if ARGV.size < 1 || (ARGV[0]!="up" && ARGV[0]!="down") begin Post.find(:all).each{ |post| puts post.to_s } rescue puts "couldn't connect dbfile" end puts 'usage: "ruby migrate.rb up" or "ruby migrate.rb down"' exit(1) end PostMigration.migrate(ARGV[0])
activerecordのmodel
model_post.rb
class Post < ActiveRecord::Base def to_s return "#{time} #{message} #{uri}" end end
収集スクリプト。3200件までは取得できた。
store.rb
#!/usr/bin/env ruby require 'rubygems' require 'active_record' require 'feed-normalizer' require 'open-uri' require 'kconv' require File.dirname(__FILE__) + "/model_post.rb" user = "3631571" # twitterID ActiveRecord::Base.establish_connection( :adapter => 'sqlite3', #:dbfile => ':memory:', :dbfile => 'db_posts', :timeout => 30000 ) for page in 1..160 uri = "http://twitter.com/statuses/user_timeline/#{user}.atom?page=#{page}" feed = FeedNormalizer::FeedNormalizer.parse open(uri) puts uri feed.entries.each{ |e| if Post.find_by_uri(e.url) == nil post = Post.create(:uri => e.url, :message => e.content.gsub(/&#(?:(\d*?)|(?:[xX]([0-9a-fA-F]{4})));/) { [$1.nil? ? $2.to_i(16) : $1.to_i].pack('U') }, :time => e.last_updated ) puts post end } sleep 10 end
n-gramを作るためのDBのmigration用
migrate_ngrams.rb
#!/usr/bin/env ruby require 'rubygems' require 'active_record' require File.dirname(__FILE__) + "/model_ngram.rb" ActiveRecord::Base.establish_connection( :adapter => 'sqlite3', #:dbfile => ':memory:', :dbfile => 'db_ngrams', :timeout => 30000 ) class NgramMigration < ActiveRecord::Migration def self.up create_table(:ngrams){|t| t.string :a, :null => false t.string :b, :null => false t.string :c, :null => false t.column :count, :int, :null => false } end def self.down drop_table :ngrams end end if ARGV.size < 1 || (ARGV[0]!="up" && ARGV[0]!="down") begin Ngram.find(:all).each{ |ng| puts ng.to_s } rescue puts "couldn't connect dbfile" end puts 'usage: "ruby migrate_ngrams.rb up" or "ruby migrate_ngrams.rb down"' exit(1) end puts ARGV[0] NgramMigration.migrate(ARGV[0])
今回は3-gramにする。active_recordのmodel。
model_ngram.rb
class Ngram < ActiveRecord::Base def to_s return "#{a} #{b} #{c} #{count}" end end
収集した3000ぐらいのpostからn-gramモデルを作成するスクリプト
make3gram.rb
#!/usr/bin/env ruby require 'rubygems' require 'MeCab' require 'active_record' require 'kconv' require File.dirname(__FILE__) + "/model_post.rb" require File.dirname(__FILE__) + "/model_ngram.rb" #$KCODE = 'UTF8' ActiveRecord::Base.establish_connection( :adapter => 'sqlite3', #:dbfile => ':memory:', :dbfile => 'db_posts', :timeout => 30000 ) mecab = MeCab::Tagger.new('-Ochasen') messages = Post.find(:all).map{ |post| post.message } ActiveRecord::Base.establish_connection( :adapter => 'sqlite3', #:dbfile => ':memory:', :dbfile => 'db_ngrams', :timeout => 30000 ) messages.each{ |message| puts message parsed = mecab.parse(message.gsub("shokai: ","")) words = Array.new parsed.each{|s| w = s.split(/\t/)[0] words.push(w) if !(w =~ /EOS/) } # 3-gramを作成 for n in 0..words.size-3 a,b,c = words[n..n+2] # 3-gram puts a+b+c ng = Ngram.find(:first, :conditions => ["a=? and b=? and c=?", a, b, c]) if ng != nil ng.count += 1 ng.save else Ngram.create(:a => a, :b => b, :c => c, :count => 1) end end }
できた3-gramを連結させて文章を作ってtwitterに投稿するスクリプト
post3gram.rb
#!/usr/bin/env ruby require 'rubygems' require 'MeCab' require 'active_record' require 'kconv' gem 'twitter' require 'twitter' require File.dirname(__FILE__) + "/model_ngram.rb" #$KCODE = 'UTF8' user = "username" pass = "password" ActiveRecord::Base.establish_connection( :adapter => 'sqlite3', #:dbfile => ':memory:', :dbfile => File.dirname(__FILE__) + '/db_ngrams', :timeout => 30000 ) ngs = Ngram.find(:all) head = ngs[rand(ngs.size)] puts head.to_s results = head.a+head.b+head.c 50.times do begin ngs = Ngram.find(:all, :conditions => ["a=? and b=?",head.b , head.c]) next_ng = ngs[rand(ngs.size)] puts next_ng.to_s results+=next_ng.c head = next_ng rescue break end end twit = Twitter::Base.new(user, pass) twit.update(results.toutf8) puts results