延滞

自分が読みたいと思った本が、長く返されていないことが何度かあった。なんとなく、これはかなりの数があるのでは、と思いちょっとしたプログラムを作って調べたところ11592冊中83冊もの本が100日以上延滞されていた。図書館のデータベースには貸し出し検索サービスを介してしかアクセスできないので蔵書数がどれほどあるかわからないが、割合として0.7%というのは看過できない数ではないだろうか。
プログラムはAWSでisbnを取得して図書館の検索サービスにアクセスしている。(キーはXで伏せてある。)

require 'jcode'
require 'net/http'
require "rexml/document"
require 'rubygems'
require 'active_record'
require 'thread.rb'
require 'app/models/history.rb'

Net::HTTP.version_1_2

module AmazonWebService
  module Associates
    class Client
      SERVICE="Service=AWSECommerceService&"
      OPERATION="Operation=ItemSearch&SearchIndex=Books&ResponseGroup=Medium&"
      VERSION="Version=2008-03-03"
      attr_accessor :doc
      def initialize
        key="XXXXXXXXXXXXXXXXXXXX"
        @aws_key="AWSAccessKeyId=#{key}&"
      end

      def search_by_title p,page=nil
        p="Title=#{p}&"
        search p, page
      end

      def search_by_keywords p, page=nil
        p="Keywords=#{URI.escape(p)}&"
        search p, page
      end

      def search p, page
        Net::HTTP.start('ecs.amazonaws.jp', 80) {|http|
          response = http.get('/onca/xml?'+SERVICE+OPERATION+@aws_key+p+
                              (page.nil? ? "" : "ItemPage=#{page}&" )+VERSION)
          @doc = REXML::Document.new response.body
          p "error occured" if @doc.nil?
        }
      end
      def items
        parse("ItemSearchResponse/Items/Item").collect! { |i|
          Item.new i
        }
      end

      def total_page
        parsed = parse("ItemSearchResponse/Items/TotalPages")
        if !parsed.nil? && !parsed[0].nil?
          parsed[0].text
        end
      end

      def parse p
        @doc.elements.each(p){nil} unless @doc.nil?
      end
    end

    class Item < HashWithIndifferentAccess
      def initialize item
        proc = Proc.new { |p|
          item.elements[p].nil? ? "" : item.elements[p].text
        }
        self[:asin] = proc.call "ASIN"
        self[:title] = proc.call "*/Title"
        self[:author] = proc.call "*/Author"
        self[:publication_date] = proc.call "*/PublicationDate"
        self[:publisher] = proc.call "*/Publisher"

      end
    end
  end

end
module TachikawaLibrary
  class Client
    @@prefix = "search_tech=direct&high_light=on&list_max_size=500&expanse.k=on&expanse.a=on&expanse.e=on&expanse.u=off&terms_connection=AND&search_condition.0="
    @@comma="%2C"
    @@left_parenthetic="OR%28"
    @@right_parenthetic="%29"
    @@suffix="&version=0210&server_host_name=OPAC&text_db_name=TOSHO&target_file=INDEX-F&listing_file=HEAD-F&view_file=HEAD-F"
    attr_accessor :books
    private
    def normalize_html doc
      doc.toutf8.
        gsub(/<([a-z]*)(\s*)/i){"<" +$1.downcase + $2 }.
        gsub(/<\/([a-z]*)(\s*)/i){"</" +$1.downcase + $2 }.
        gsub(/( \w*)=([a-zA-Z0-9][a-zA-Z]*)/i){$1.downcase+'="'+ $2 + '"' }.
        gsub(/( [\w-]*)=\"/i){$1.downcase+'="' }.
        gsub(/<!--((?!-->).)*-->/m){""}.
        gsub(/<script((?!\/script>).)*\/script>/m){""}.
        gsub(/(<input(?:(?!>).)*)(?:>)/m){$1 + "/>" }.
        sub(/(<base .*)>/){""}.sub(/(<meta.*)>/){""}.
        gsub(/<br>/){"<br/>"}
    end

    def makecommand isbns
      command=""
      if isbns.size>1
        isbns[0..-2].each { |isbn|
          command+="INDEX-F%3CTOSHO.INDEX-6%3E%3Aeq%28%27#{isbn}%27%29"+@@comma
        }
      end
      command+="INDEX-F%3CTOSHO.INDEX-6%3E%3Aeq%28%27#{isbns[-1]}%27%29"
      command = if isbns.size >1
                  @@left_parenthetic + command + @@right_parenthetic
                else
                  command
                end

      @@prefix + command + @@suffix
    end

    public
    def search isbns
      @books=[]
      Net::HTTP.start('www.library.tachikawa.tokyo.jp', 80) {|http|
        query = makecommand isbns
        response = http.post('/cgi-bin/ts2gate.exe/TG2search/TG2search',
                             query)
        if (/0件/ =~ response.body.toutf8).nil?
          fixed_response = "<html>" + normalize_html(response.body).
            sub(/ shade/){""}.gsub(/<hr>/){""}.
            sub(/(<dd)>/){$1 + "/>" }.
            sub(/<\/h3>/){ ""}
          doc = REXML::Document.new fixed_response
          links = doc.elements.each("/html/body/form/table[2]/tr/td/a[position() mod 2=0]"){nil}
          links.each { |link|
            @books << Book.new(link)
          }
        end
      }
    end

    def get
      popular_books=[]
      @books.each do |book|
        Net::HTTP.start('www.library.tachikawa.tokyo.jp',80) { |http|
          response = http.get(book[:link])
          fixed_response = normalize_html(response.body).
          gsub(/ shade/){""}.gsub(/<hr>/){""}
          doc = REXML::Document.new fixed_response
          popular_books << book.set_state(doc)
        }
        sleep 0.2
      end unless @books.nil?
      popular_books.each do |b| @books += b; end unless popular_books.empty?
    end
  end

  class Book < HashWithIndifferentAccess
    def initialize link
      self[:link]=link.attributes['href']
      self[:title]=link.text
      self[:sdid]=/sdid=([0-9]*)/.match(self[:link]).to_a[1]
    end

    def set_state doc
      pr = Proc.new { |row,column|
        doc.elements["/html/body/table[2]/tr[#{row}]/td[position() div #{column} = 1]"]
      }
      m =  /(?:ISBN :)((?:(?!<br\/>).)*)<br\/>(?:(?: +)*([X\d-]*)(?:(?!件名).)*)/.match(doc.root.to_s)
      self[:isbn] = (if m[2].length==0
                     m[1]
                   else
                     m[2]
                   end).gsub("-"){""}
      count = doc.elements["count(html/body/table[2]/tr)"]
      setter = Proc.new { |obj,row|
        obj[:catalog_id] =  pr.call(row,2).text.sub(/([\d]*)(\s*)/){$1}
        obj[:state] =  pr.call(row,6).text
        obj[:return_on] = (pr.call(row,7).text || "").gsub(/[年月日]/){"/"}
        store = pr.call(row,3)
        floor = pr.call(row,8)
        obj[:stored_at] = (store.elements["a"].nil? ? store.text || "" :
        store.elements["a"].text) + ":" +(floor.elements["a"].nil? ?
        floor.text || "" : floor.elements["a"].text)

      }
      dups=[]
      if count > 1
        setter.call self, 2
        for i in 3..count
          dup = self.dup
          setter.call dup, i
          dups << dup
        end if count > 2
      end
      dups
    end
  end
end


class FindOverdue
  attr_accessor :response
  def initialize
  end

  def start
    before = Time.now
    @m = Mutex.new
    @exit=false
    h = History.find(1)
    Thread.start do
      self.tachikawa
    end
    sleep 0.1
    Thread.start do
      self.amazon
    end
    Signal.trap("INT",
                Proc.new{
                  @m.synchronize{
                    @exit=true
                  }
                  (ThreadGroup::Default.list - [Thread.current]).each {|th|
                    th.join}
                  n = History.find(1)
                  p "アマゾン:" + h.amazon_last_query + "->" +
                  n.amazon_last_query + "," + h.amazon_last_page.to_s + "~" +
                  n.amazon_last_page.to_s
                  p "立川図書館:" + h.tachikawa_last_id.to_s + "~" +
                  n.tachikawa_last_id.to_s
                  p ((Time.now - before)/60).to_i.to_s + "" +
                  ((Time.now - before)%60).to_s + ""
                  exit
                } )
    sleep 0.1
    p "実行中..."
    while true
      sleep 1
    end
  end

  def tachikawa
    p "立川図書館開始:" + Time.now.to_s
    tachikawa = TachikawaLibrary::Client.new
    f = true
    while f
      begin
        last_id = History.find(1).tachikawa_last_id || 0
        all_books = AmazonBook.find(:all,
                                    :limit=>50,
                                    :conditions=>"id > #{last_id}",
                                    :order=>"id asc")
        proc = Proc.new{ |s,e|
          asins = all_books[s,e].map{ |b| b[:asin]} if
          all_books[0].instance_of?(AmazonBook)
          tachikawa.search asins|| all_books
          tachikawa.get
          tachikawa.books.each do |book|
            TachikawaBook.new(book.reject{|k,v|/(title|link)/=~k.to_s }).save
          end unless tachikawa.books.nil?
        }
        div = 4
        before = Time.now
        if all_books.size > div
          for i in 0..((all_books.size/div)-1)
            before_1 = Time.now
            start=(i*div)
            end_i=((i+1)*div-1)
            proc.call(start,end_i)
            p (Time.now - before_1).to_s + ":" + i.to_s + "/" + ((all_books.size/div)-1).to_s
          end
          start= (i+1)*div
          end_i=all_books.size-1
          proc.call(start, end_i) if start < end_i
        else
          proc.call(0, all_books.size-1)
        end
        p all_books.size.to_s + "件終了:" + (Time.now-before).to_s
        History.update(1,{:tachikawa_last_id=>all_books[-1].id})
        @m.synchronize{
          f=false if @exit
        }
      rescue => err
        p "error occured!" + err.to_s
        err.backtrace.map { |stack| puts "from:"+stack }
        f=false
      end
    end
    p "立川図書館終了:" + Time.now.to_s
  end

  def amazon
    p "アマゾン開始:" + Time.now.to_s
    a = AmazonWebService::Associates::Client.new
    h = History.find(1)
    q = h.amazon_last_query || ""
    page = h.amazon_last_page + 1
    f=false
    (q.."").each do |e|
      while (page.nil? || ((401 > page) && (a.total_page.nil? ? true : a.total_page.to_i > 0 ? a.total_page.to_i > page : page==0 ? true : false)))
        begin
          a.search_by_title e, page
          a.items.each do |item|
            AmazonBook.new(item).save
            sleep 0.5
          end
          History.update(1,{:amazon_last_page=>page,:amazon_last_query=>e})
          p e + "行:" + (page.nil?? "none" : page.to_s ) + "/" + (a.total_page.nil? ? "none" : a.total_page.to_s )
          page = unless page.nil?
                   page + 1
                 else
                   1
                 end
          @m.synchronize{
            f=true if @exit
          }
          sleep 2
          break if f
        rescue =>err
          p  "error occured on : " + e + ":" + err.to_s
          err.backtrace.map { |stack| puts "from:"+stack }
        end
      end
      page = 0
      if f
        p "break!!"
        break
      end
    end
    p "アマゾン終了:" + Time.now.to_s
  end
end
FindOverdue.new.start