延滞
自分が読みたいと思った本が、長く返されていないことが何度かあった。なんとなく、これはかなりの数があるのでは、と思いちょっとしたプログラムを作って調べたところ11592冊中83冊もの本が100日以上延滞されていた。図書館のデータベースには貸し出し検索サービスを介してしかアクセスできないので蔵書数がどれほどあるかわからないが、割合として0.7%というのは看過できない数ではないだろうか。
プログラムはAWSでisbnを取得して図書館の検索サービスにアクセスしている。(キーはXで伏せてある。)
require 'jcode' require 'net/http' require "rexml/document" require 'rubygems' require 'active_record' require 'thread.rb' require 'app/models/history.rb' Net::HTTP.version_1_2 module AmazonWebService module Associates class Client SERVICE="Service=AWSECommerceService&" OPERATION="Operation=ItemSearch&SearchIndex=Books&ResponseGroup=Medium&" VERSION="Version=2008-03-03" attr_accessor :doc def initialize key="XXXXXXXXXXXXXXXXXXXX" @aws_key="AWSAccessKeyId=#{key}&" end def search_by_title p,page=nil p="Title=#{p}&" search p, page end def search_by_keywords p, page=nil p="Keywords=#{URI.escape(p)}&" search p, page end def search p, page Net::HTTP.start('ecs.amazonaws.jp', 80) {|http| response = http.get('/onca/xml?'+SERVICE+OPERATION+@aws_key+p+ (page.nil? ? "" : "ItemPage=#{page}&" )+VERSION) @doc = REXML::Document.new response.body p "error occured" if @doc.nil? } end def items parse("ItemSearchResponse/Items/Item").collect! { |i| Item.new i } end def total_page parsed = parse("ItemSearchResponse/Items/TotalPages") if !parsed.nil? && !parsed[0].nil? parsed[0].text end end def parse p @doc.elements.each(p){nil} unless @doc.nil? end end class Item < HashWithIndifferentAccess def initialize item proc = Proc.new { |p| item.elements[p].nil? ? "" : item.elements[p].text } self[:asin] = proc.call "ASIN" self[:title] = proc.call "*/Title" self[:author] = proc.call "*/Author" self[:publication_date] = proc.call "*/PublicationDate" self[:publisher] = proc.call "*/Publisher" end end end end module TachikawaLibrary class Client @@prefix = "search_tech=direct&high_light=on&list_max_size=500&expanse.k=on&expanse.a=on&expanse.e=on&expanse.u=off&terms_connection=AND&search_condition.0=" @@comma="%2C" @@left_parenthetic="OR%28" @@right_parenthetic="%29" @@suffix="&version=0210&server_host_name=OPAC&text_db_name=TOSHO&target_file=INDEX-F&listing_file=HEAD-F&view_file=HEAD-F" attr_accessor :books private def normalize_html doc doc.toutf8. gsub(/<([a-z]*)(\s*)/i){"<" +$1.downcase + $2 }. gsub(/<\/([a-z]*)(\s*)/i){"</" +$1.downcase + $2 }. gsub(/( \w*)=([a-zA-Z0-9][a-zA-Z]*)/i){$1.downcase+'="'+ $2 + '"' }. gsub(/( [\w-]*)=\"/i){$1.downcase+'="' }. gsub(/<!--((?!-->).)*-->/m){""}. gsub(/<script((?!\/script>).)*\/script>/m){""}. gsub(/(<input(?:(?!>).)*)(?:>)/m){$1 + "/>" }. sub(/(<base .*)>/){""}.sub(/(<meta.*)>/){""}. gsub(/<br>/){"<br/>"} end def makecommand isbns command="" if isbns.size>1 isbns[0..-2].each { |isbn| command+="INDEX-F%3CTOSHO.INDEX-6%3E%3Aeq%28%27#{isbn}%27%29"+@@comma } end command+="INDEX-F%3CTOSHO.INDEX-6%3E%3Aeq%28%27#{isbns[-1]}%27%29" command = if isbns.size >1 @@left_parenthetic + command + @@right_parenthetic else command end @@prefix + command + @@suffix end public def search isbns @books=[] Net::HTTP.start('www.library.tachikawa.tokyo.jp', 80) {|http| query = makecommand isbns response = http.post('/cgi-bin/ts2gate.exe/TG2search/TG2search', query) if (/0件/ =~ response.body.toutf8).nil? fixed_response = "<html>" + normalize_html(response.body). sub(/ shade/){""}.gsub(/<hr>/){""}. sub(/(<dd)>/){$1 + "/>" }. sub(/<\/h3>/){ ""} doc = REXML::Document.new fixed_response links = doc.elements.each("/html/body/form/table[2]/tr/td/a[position() mod 2=0]"){nil} links.each { |link| @books << Book.new(link) } end } end def get popular_books=[] @books.each do |book| Net::HTTP.start('www.library.tachikawa.tokyo.jp',80) { |http| response = http.get(book[:link]) fixed_response = normalize_html(response.body). gsub(/ shade/){""}.gsub(/<hr>/){""} doc = REXML::Document.new fixed_response popular_books << book.set_state(doc) } sleep 0.2 end unless @books.nil? popular_books.each do |b| @books += b; end unless popular_books.empty? end end class Book < HashWithIndifferentAccess def initialize link self[:link]=link.attributes['href'] self[:title]=link.text self[:sdid]=/sdid=([0-9]*)/.match(self[:link]).to_a[1] end def set_state doc pr = Proc.new { |row,column| doc.elements["/html/body/table[2]/tr[#{row}]/td[position() div #{column} = 1]"] } m = /(?:ISBN :)((?:(?!<br\/>).)*)<br\/>(?:(?: +)*([X\d-]*)(?:(?!件名).)*)/.match(doc.root.to_s) self[:isbn] = (if m[2].length==0 m[1] else m[2] end).gsub("-"){""} count = doc.elements["count(html/body/table[2]/tr)"] setter = Proc.new { |obj,row| obj[:catalog_id] = pr.call(row,2).text.sub(/([\d]*)(\s*)/){$1} obj[:state] = pr.call(row,6).text obj[:return_on] = (pr.call(row,7).text || "").gsub(/[年月日]/){"/"} store = pr.call(row,3) floor = pr.call(row,8) obj[:stored_at] = (store.elements["a"].nil? ? store.text || "" : store.elements["a"].text) + ":" +(floor.elements["a"].nil? ? floor.text || "" : floor.elements["a"].text) } dups=[] if count > 1 setter.call self, 2 for i in 3..count dup = self.dup setter.call dup, i dups << dup end if count > 2 end dups end end end class FindOverdue attr_accessor :response def initialize end def start before = Time.now @m = Mutex.new @exit=false h = History.find(1) Thread.start do self.tachikawa end sleep 0.1 Thread.start do self.amazon end Signal.trap("INT", Proc.new{ @m.synchronize{ @exit=true } (ThreadGroup::Default.list - [Thread.current]).each {|th| th.join} n = History.find(1) p "アマゾン:" + h.amazon_last_query + "->" + n.amazon_last_query + "," + h.amazon_last_page.to_s + "~" + n.amazon_last_page.to_s p "立川図書館:" + h.tachikawa_last_id.to_s + "~" + n.tachikawa_last_id.to_s p ((Time.now - before)/60).to_i.to_s + "分" + ((Time.now - before)%60).to_s + "秒" exit } ) sleep 0.1 p "実行中..." while true sleep 1 end end def tachikawa p "立川図書館開始:" + Time.now.to_s tachikawa = TachikawaLibrary::Client.new f = true while f begin last_id = History.find(1).tachikawa_last_id || 0 all_books = AmazonBook.find(:all, :limit=>50, :conditions=>"id > #{last_id}", :order=>"id asc") proc = Proc.new{ |s,e| asins = all_books[s,e].map{ |b| b[:asin]} if all_books[0].instance_of?(AmazonBook) tachikawa.search asins|| all_books tachikawa.get tachikawa.books.each do |book| TachikawaBook.new(book.reject{|k,v|/(title|link)/=~k.to_s }).save end unless tachikawa.books.nil? } div = 4 before = Time.now if all_books.size > div for i in 0..((all_books.size/div)-1) before_1 = Time.now start=(i*div) end_i=((i+1)*div-1) proc.call(start,end_i) p (Time.now - before_1).to_s + ":" + i.to_s + "/" + ((all_books.size/div)-1).to_s end start= (i+1)*div end_i=all_books.size-1 proc.call(start, end_i) if start < end_i else proc.call(0, all_books.size-1) end p all_books.size.to_s + "件終了:" + (Time.now-before).to_s History.update(1,{:tachikawa_last_id=>all_books[-1].id}) @m.synchronize{ f=false if @exit } rescue => err p "error occured!" + err.to_s err.backtrace.map { |stack| puts "from:"+stack } f=false end end p "立川図書館終了:" + Time.now.to_s end def amazon p "アマゾン開始:" + Time.now.to_s a = AmazonWebService::Associates::Client.new h = History.find(1) q = h.amazon_last_query || "あ" page = h.amazon_last_page + 1 f=false (q.."ん").each do |e| while (page.nil? || ((401 > page) && (a.total_page.nil? ? true : a.total_page.to_i > 0 ? a.total_page.to_i > page : page==0 ? true : false))) begin a.search_by_title e, page a.items.each do |item| AmazonBook.new(item).save sleep 0.5 end History.update(1,{:amazon_last_page=>page,:amazon_last_query=>e}) p e + "行:" + (page.nil?? "none" : page.to_s ) + "/" + (a.total_page.nil? ? "none" : a.total_page.to_s ) page = unless page.nil? page + 1 else 1 end @m.synchronize{ f=true if @exit } sleep 2 break if f rescue =>err p "error occured on : " + e + ":" + err.to_s err.backtrace.map { |stack| puts "from:"+stack } end end page = 0 if f p "break!!" break end end p "アマゾン終了:" + Time.now.to_s end end FindOverdue.new.start