package gihyo.lucene.ch3;

/**
 * Copyright 2006 SEKIGUCHI, Koji
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

import java.io.IOException;
import java.io.File;
import java.io.FileInputStream;
import org.xml.sax.SAXException;
import org.apache.commons.digester.Digester;
import org.apache.commons.digester.xmlrules.DigesterLoader;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class BookIndexer extends AbstractIndexer {

    private static final String XML_DIGESTER_RULES = "book-digester-rules.xml";
    public static final String F_PUBLISHER = "oŎ";
    public static final String F_CATEGORY = "JeS";
    public static final String F_TITLE = "^Cg";
    public static final String F_AUTHOR = "";
    public static final String F_PAGES = "y[W";
    public static final String F_ISBN = "ISBN";
    public static final String F_DATE = "s";
    public static final String F_PRICE = "i";
    public static final String F_SUMMARY = "Tv";

    private String indexDir;
    private String dataDir;
    private String[] xmlFiles;
    private int num;
    private int count;
    private Directory directory;
    private Digester digester;

    public static void main( String args[] ) throws IndexerException {
	BookIndexer bi = new BookIndexer( args[0], args[1] );
	bi.makeIndex();
	System.out.println( Integer.toString( bi.count ) + " ̏Ѓf[^o^܂B" );
    }

    private BookIndexer( String indexDir, String dataDir ){
	this.indexDir = indexDir;
	this.dataDir = dataDir;
	ClassLoader cl = getClass().getClassLoader();
	digester = DigesterLoader.createDigester( cl.getResource( XML_DIGESTER_RULES ) );
    }

    protected void begin() throws IndexerException {
	xmlFiles = new File( dataDir ).list();
	num = 0;
	count = 0;
    }

    protected boolean hasNext() throws IndexerException {
	while( num < xmlFiles.length ){
	    if( !xmlFiles[num].endsWith( ".xml" ) )
		num++;
	    else{
		count++;
		return true;
	    }
	}
	return false;
    }

    protected Object next() throws IndexerException {
	String file = dataDir + System.getProperty( "file.separator" ) + xmlFiles[num++];
	BookInfo bookInfo = getBookInfo( file );
	return bookInfo;
    }

    private BookInfo getBookInfo( final String path ) throws IndexerException {
	try{
	    return (BookInfo)digester.parse( new FileInputStream( path ) );
	}
	catch( IOException e ){
	    throw new IndexerException( e );
	}
	catch( SAXException e ){
	    throw new IndexerException( e );
	}
    }

    protected Directory getDirectory() throws IndexerException {
	try{
	    if( directory == null )
		directory = FSDirectory.getDirectory( indexDir, true );
	}
	catch( IOException e ){
	    throw new IndexerException( e );
	}
	return directory;
    }

    protected Document getDocument( final Object record ) throws IndexerException {
	BookInfo bookInfo = (BookInfo)record;
	String publisher = bookInfo.getPublisher();
	String category = bookInfo.getCategory();
	String title = bookInfo.getTitle();
	String author = bookInfo.getAuthor();
	int pages = bookInfo.getPages();
	String isbn = bookInfo.getIsbn();
	String date = bookInfo.getDate();
	int price = bookInfo.getPrice();
	String summary = bookInfo.getSummary();
	Document doc = new Document();
	doc.add( new Field( F_PUBLISHER, publisher, getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_CATEGORY, category, getFieldStore(), Field.Index.TOKENIZED ) );
	doc.add( new Field( F_TITLE, title, getFieldStore(), Field.Index.TOKENIZED ) );
	doc.add( new Field( F_AUTHOR, author, getFieldStore(), Field.Index.TOKENIZED ) );
	doc.add( new Field( F_PAGES, NumberTools.longToString( pages ), getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_ISBN, isbn, getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_DATE, date, getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_PRICE, NumberTools.longToString( price ), getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_SUMMARY, summary, getFieldStore(), Field.Index.TOKENIZED ) );
	return doc;
    }

    public static class BookInfo {

	private String publisher;
	private String category;
	private String title;
	private String author;
	private int pages;
	private String isbn;
	private String date;
	private int price;
	private String summary;

	public void setPublisher( String publisher ){ this.publisher = publisher; }
	public String getPublisher(){ return publisher; }

	public void setCategory( String category ){ this.category = category; }
	public String getCategory(){ return category; }

	public void setTitle( String title ){ this.title = title; }
	public String getTitle(){ return title; }

	public void setAuthor( String author ){ this.author = author; }
	public String getAuthor(){ return author; }

	public void setPages( int pages ){ this.pages = pages; }
	public int getPages(){ return pages; }

	public void setIsbn( String isbn ){ this.isbn = isbn; }
	public String getIsbn(){ return isbn; }

	public void setDate( String date ){ this.date = date; }
	public String getDate(){ return date; }

	public void setPrice( int price ){ this.price = price; }
	public int getPrice(){ return price; }

	public void setSummary( String summary ){ this.summary = summary; }
	public String getSummary(){ return summary; }
    }
}
