#!/usr/bin/env ruby

$:.push('/home/hamaji/local/lib/ruby/site_ruby/1.8')
$:.push('/home/hamaji/local/lib/ruby/site_ruby/1.8/i386-freebsd4')

require 'time'
require 'depot'
require 'villa'

require 'nkf'
require 'cgi'

require 'reform'

SITES = {
  'jw' => 'http://ja.wikipedia.org/wiki/',
  'ew' => 'http://en.wikipedia.org/wiki/',
  'pm' => 'http://planetmath.org/encyclopedia/',
  'ht' => 'http://www12.plala.or.jp/ksp/',
}
SITESUFS = {
  'jw' => '',
  'ew' => '',
  'pm' => '.html',
  'ht' => '',
}
SITENAMES = {
  'jw' => 'ja.Wikipedia',
  'ew' => 'en.Wikipedia',
  'pm' => 'PlanetMath',
  'ht' => NKF.nkf('-Ew', '物理のかぎしっぽ'),
}

cgi = CGI.new
q = cgi['q'].to_s
@targetsite = cgi['site'].to_s

print "Content-Type: text/html; charset=UTF-8\n\n"

print %Q(<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html lang="ja">

<head>
 <meta http-equiv="CONTENT-TYPE" CONTENT="text/html; charset=UTF-8">
 <title>Formula Search</title>
 <link rev="MADE" href="mailto:hamaji _at_ nii.ac.jp">
 <link rel="INDEX" href=".">
 <link rel="stylesheet" type="text/css" href="/site.css">
</head>

<body>

<h1>Formula Search</h1>

<form method="GET" action="search.cgi">
<input name="q" value="#{q}">
<input type="submit" value="search">)

print %Q(<input type="hidden" name="site" value="#{@targetsite}") if @targetsite

print %Q(</form>

<p>
example: <a href="search.cgi?q=F%3Dma">F=ma</a>, <a href="search.cgi?q=e%5E%7Bjx%7D">e^{jx}</a>, <a href="search.cgi?q=%5Cfrac%7B1%7D%7Bn%7D+%5Csum+x">\\frac{1}{n} \\sum x</a>, <a href="search.cgi?q=%5Csin%5E2x%2B%5Ccos%5E2x">\\sin^2x+\\cos^2x</a>
</p>

<h2>Result</h2>

)

def dump_result(rlist, mathfile = "math.db", excludemap = Hash.new)
  mdb = nil
  iddb = nil
  begin
    mdb = Villa::new(mathfile)
    iddb = Depot::new("id.db")

    msg = ''
    ret = Array.new

    r = rlist.join('')
    mdb.curjump(r, Villa::JFORWARD)

    while true
      k = mdb.curkey
      i = k.index(r)
      break if !i || i != 0

      v = mdb.curval.split(',')
      per = r.size * 100 / (v.shift.to_i + k.size)
      ret <<= [per, v.shift]

      mdb.curnext
    end

    if (ret.size == 0)
      msg = "not found"
    end

    if (ret.size >= 100)
      msg = "too many results (#{ret.size} hits)"
    end

    if (msg.empty?)
      retmap = Hash.new
      ret.each do |f|
        per = f[0]
        cnt = f[1]
        main, sub = iddb.get(cnt).split("\n")
        info = main.split(',')
        site = info.shift
#        id = info.shift
#        time = Time.at(info.shift.to_i)
        title = info.shift
        formula = [info.join(',')]

        next if (@targetsite != '' && site != @targetsite)

        ninfo = [ per, cnt, site, title, formula, sub ? sub : title ]
#        ninfo = [ per, cnt, site, id, time, title, formula ]
        if (!excludemap.key?(title))
          if (retmap.key?(title))
            retmap[title][0] += per
            retmap[title][4] <<= formula
          else
            retmap[title] = ninfo
          end
        end
      end

      print ", #{retmap.size} hits. <dl>"
      retmap.sort{|x,y|
        c = y[1][0] <=> x[1][0]
        (c != 0) ? c : x[1][1].to_i <=> y[1][1].to_i
      }.each do |f|
#        per, cnt, site, id, time, title, formula = f[1]
        per, cnt, site, path, formula, title = f[1]
        print %Q(<dt><a href="#{SITES[site]}#{CGI.escape(path).gsub('%2F','/')}#{SITESUFS[site]}">#{title}</a> @#{SITENAMES[site]} (#{per}%) <dd> <em>#{formula.join('</em> <dd> <em>')}</em> \n)
      end
      print "</dl>"

      return retmap
    else
      print %Q(<p>#{msg}\n)
    end
  ensure
    mdb.close if mdb
    iddb.close if iddb
  end
end

begin
  arg = q.dup
  msg = nil
  if (arg.size < 3)
    msg = 'too short'
  elsif ((r = reform(arg)).size < 2)
    msg = 'too short'
  else
    print %Q(<p>'#{q}' => '#{r.join(' ')}')
    rmap = dump_result(r)
    if (rmap && ri = intersect(r))
      print %Q(<h2>Similar result</h2>)
      print %Q(<p>'#{q}' => '#{ri.join(' ')}')
      dump_result(ri, "int.db", rmap)
    end
  end

  if (msg)
    print %Q(<p>#{q} : #{msg}\n)
  end
rescue
  print "<pre>\n"
  print CGI.escapeHTML("#{$!.to_s}\n")
  print CGI.escapeHTML("#{$!.backtrace.join("\n")}\n")
  print "</pre>\n"
ensure
  print %Q(
<hr>

<p>
<a href="http://shinh.skr.jp/">home</a>
/
<a href=".">index</a>

<div align="right">
hamaji _at_ nii.ac.jp
/
shinichiro.h
</div>

</body>

</html>
)
end
