require 'uri'
require 'net/http'
require 'rexml/document'
require 'zoom'

DEV_TOKEN     = "MY_AMAZON_DEV_TOKEN" # insert value here

STORES = {
  "US" => "amazon.com",
  "UK" => "amazon.co.uk",
  "DE" => "amazon.de",
  "JP" => "amazon.co.jp",
  "CA" => "amazon.ca"
}
  
PROP_MAP = {
  "ISBN" => "dc:identifier" ,
  "Publisher" => "dc:publisher" ,
  "Author" => "dc:creator" ,
  "PublicationDate" => "dct:issued" ,
  "Title" => "dc:title" ,
  "EAN" => "dc:identifier" ,
  "Binding" => "dct:hasFormat" ,
  "NumberOfPages" => "ex:pages" ,
  "Studio" => "dc:publisher" ,
  "Format" => "dct:hasFormat" ,
  "Director" => "dc:contributor" ,
  "ReleaseDate" => "dct:issued" ,
  "RunningTime" => "dct:extent" ,
  "Actor" => "ex:actor" ,
  "RegionCode" => "ex:region" ,
  "ASIN" => "dc:identifier" ,
  "Artist" => "dc:creator" ,
  "Label" => "dc:publisher" ,
  "UPC" => "dc:identifier" ,
  "AmazonSubject" => "dc:subject" ,
  "LocSubject" => "dc:subject" ,
  "AmazonReview" => "dc:description"
  # number of discs? binding (eg cd)
  # should use format string, eg "rdf:type <%s>"
}

def do_request(id, store="US", id_type=nil, index=nil, response="Medium")
  sleep 1 # don't overload amazon...
  id_type = nil if id_type == 'ASIN'
  host = STORES[store]
  url = "http://webservices.#{host}/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=#{DEV_TOKEN}&Operation=ItemLookup&ItemId=#{id}"
  url += "&IdType=#{id_type}" if id_type
  url += "&SearchIndex=#{index}" if index
  url += "&ResponseGroup=#{response}" if response 
  result = Net::HTTP.get_response(URI.parse(url))
  return nil if result.code != "200"
  REXML::Document.new result.body
end

def parse_item(item)
  result = {}
  result["ASIN"] = [item.elements['ASIN'].text]
  result["SeeAlso"] = [item.elements['DetailPageURL'].text]
  item.each_element("ItemAttributes/*") do |attr|
    next if attr.has_elements? # not interested in dimensions or list price etc
    if result.has_key?(attr.name)
      result[attr.name].push(attr.text)
    else
      result[attr.name] = [attr.text]
    end
  end
  result
end

def get_subjects(asin, store)
  result = do_request(asin, store, nil, nil, "Subjects")
  return {} if !result
  subjects = []
  result.root.each_element("Items/Item/Subjects/Subject") do |subject|
    subjects.push(subject.text)
  end
  return {} if subjects.empty?
  {"AmazonSubject" => subjects}
end

def get_ed_reviews(asin, store)
  result = do_request(asin, store, nil, nil, "EditorialReview")
  return {} if !result
  reviews = []
  result.root.each_element("Items/Item/EditorialReviews/EditorialReview/Content") do |review|
    reviews.push(review.text)
  end
  return {} if reviews.empty?
  {"AmazonReview" => reviews}
end

def to_isbn(ean)
  return nil if ean.size != 13 || ean !~ /^978/
  ean = ean.sub(/^.../,"")
  sum = 0
  (2..10).each { |i| sum += i * ean[10-i,1].to_i }  
  rem = 11 - sum % 11
  rem = 0 if (rem == 11)
  rem = "X" if (rem == 10)
  ean[0,9] + rem.to_s
end

def to_isbn_urn(isbn)
  "urn:isbn:#{isbn[0,1]}-#{isbn[1,3]}-#{isbn[4,5]}-#{isbn[9,1]}"
end

def to_n3(attrs)
  if attrs.has_key?("ISBN")
    puts "<#{to_isbn_urn(attrs['ISBN'][0])}>"
  elsif attrs.has_key?("EAN")
    puts "<urn:ean:#{attrs['EAN'][0]}>"
  elsif attrs.has_key?("ASIN")
    puts "<urn:asin:#{attrs['ASIN'][0]}>"
  else
    puts "[]"
  end
  puts "\ta ex:#{attrs['ProductGroup'][0]} ;" if attrs.has_key?('ProductGroup')
  puts "\trdfs:seeAlso <#{attrs['SeeAlso']}> ;" if attrs.has_key?("SeeAlso")
  attrs.each do |key, value|
    next unless value
    next unless PROP_MAP.has_key?(key)
    prop = PROP_MAP[key]
    value.each do |val|
      puts "\t#{prop} \"\"\"#{val}\"\"\" ;"
    end
  end
  puts "\t."
end

def find_asin(ean, stores, types)
  stores.each do |store|
    types.each do |type|
      response = do_request(ean, store, "EAN", type, "Small")
      error = response.root.elements["Items/Request/Errors/Error"]
      if !error
        return response.root.elements["Items/Item/ASIN"].text
      end
    end
  end
  nil
end

def find_item(asin, stores)
  stores.each do |store|
    response = do_request(asin, store)
    next if !response
    error = response.root.elements["Items/Request/Errors/Error"]
    return store, response if !error
  end
  nil
end

def add_marc(map, key, tag, subfield, doc)
  doc.root.each_element("datafield[@tag='#{tag}']/subfield[@code='#{subfield}']") do |element|
    val = element.text
    if map.has_key?(key)
      map[key].push(val)
    else
      map[key] = [val]
    end
  end
end

def add_marc_join(map, key, tag, sep, doc)
  doc.root.each_element("datafield[@tag='#{tag}']") do |field|
    vals = []
    field.each_element("subfield") do |element|
      vals.push(element.text)
    end
    next if vals.empty?
    joined = vals.reverse.join(sep)
    if map.has_key?(key)
      map[key].push(joined)
    else
      map[key] = [joined]
    end
  end
end

def parse_marc_xml(xml)
  map = {}
  doc = REXML::Document.new(xml)
  add_marc(map, "Title", "245", "a", doc)
  add_marc(map, "Author", "100", "a", doc)
  add_marc(map, "Author", "700", "a", doc)
  add_marc(map, "Publisher", "260", "b", doc)
  add_marc_join(map, "subject", "650", "/", doc)
  add_marc(map, "LocSubject", "082", "a", doc)
  add_marc(map, "LocSubject", "050", "a", doc)
  map
end

loc_conn = ZOOM::Connection.new
loc_conn.connect('z3950.loc.gov',7090)
loc_conn.database_name = 'Voyager'
loc_conn.preferred_record_syntax = 'MARC'

skip = 0
skip = ARGV[0].to_i if ARGV[0]

STDERR.puts "Skipping: #{skip}"

#STDIN.each do |ean|
while ean = STDIN.gets # textmate bug?
  next if (STDIN.lineno <= skip)
  ean.chomp!
  
  asin = to_isbn(ean)
  asin = ean if (!asin && ean.size == 10)
  
  if !asin # try EAN lookup via DE and JP
    asin = find_asin(ean, ["DE", "JP"], ["DVD", "Music"])
    if !asin
      STDERR.puts "Line: #{STDIN.lineno} Missing: '#{ean}' (can't map)"
      next
    end
  end
  
  # LOC z39.50 lookup
  loc_reses = loc_conn.search("@attr 1=7 #{asin}")
  loc_res = nil
  begin
    loc_res = parse_marc_xml(loc_reses[0].xml) if loc_reses[0]
  rescue Exception => e
    STDERR.puts "Line: #{STDIN.lineno} Exception: #{e}"
  end
  
  store, response = find_item(asin, ["UK", "US", "CA", "DE", "JP"])
  
  if !response && !loc_res
    STDERR.puts "Line: #{STDIN.lineno} Missing: '#{ean}' (#{asin})"
  elsif !response && loc_res
    STDERR.puts "Line: #{STDIN.lineno} Found: '#{ean}' (#{asin}) [LOC]"
    loc_res["ISBN"] = [asin]
    to_n3(loc_res)
  else
    STDERR.puts "Line: #{STDIN.lineno} Found: '#{ean}' (#{asin}) [#{store}]"
    response.root.each_element("Items/Item") do |item|
      result = parse_item(item)
      subjects = get_subjects(asin, store)
      result.merge!(subjects)
      reviews = get_ed_reviews(asin, store)
      result.merge!(reviews)
      result["LocSubject"] = loc_res["LocSubject"] if loc_res # add loc subjects
      to_n3(result)
    end
  end
end

