package gihyo.lucene.ch3;

/**
 * Copyright 2006 SEKIGUCHI, Koji
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.util.StringTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class PostIndexer extends AbstractIndexer {

    private static final String CHARSET = "MS932";
    private static final String SEPARATOR = ",";
    private static final String QUOTE = "\"";
    public static final String F_CODE = "ԍ";
    public static final String F_KANA = "Ji";
    public static final String F_NAME = "";
    public static final String F_ADDR = "Z";
    public static final String F_CONTENT = "Rec";
    public static final String F_INDZIP = "ʗX֔ԍ";
    public static final String F_ZIP = "X֔ԍ";
    public static final String F_POST = "X֋ǖ";

    private String indexDir;
    private String dataFile;
    private int count;
    private Directory directory;
    private BufferedReader br;
    private String line;

    public static void main( String args[] ) throws IndexerException {
	PostIndexer pi = new PostIndexer( args[0], args[1] );
	pi.makeIndex();
	System.out.println( Integer.toString( pi.count ) + " ̗X֔ԍf[^o^܂B" );
    }

    private PostIndexer( String indexDir, String dataFile ){
	this.indexDir = indexDir;
	this.dataFile = dataFile;
    }

    protected void begin() throws IndexerException {
	try{
	    br = new BufferedReader( new InputStreamReader( new FileInputStream( dataFile ), CHARSET ) );
	}
	catch( UnsupportedEncodingException e ){
	    throw new IndexerException( e );
	}
	catch( IOException e ){
	    throw new IndexerException( e );
	}
	count = 0;
    }

    protected void end() throws IndexerException {
	try{
	    br.close();
	}
	catch( IOException e ){
	    throw new IndexerException( e );
	}
    }

    protected boolean hasNext() throws IndexerException {
	try{
	    line = br.readLine();
	    if( line != null ){
		count++;
		return true;
	    }
	    return false;
	}
	catch( IOException e ){
	    throw new IndexerException( e );
	}
    }

    protected Object next() throws IndexerException {
	return line;
    }

    protected Directory getDirectory() throws IndexerException {
	try{
	    if( directory == null )
		directory = FSDirectory.getDirectory( indexDir, true );
	}
	catch( IOException e ){
	    throw new IndexerException( e );
	}
	return directory;
    }

    protected Document getDocument( final Object record ) throws IndexerException {
	Document doc = new Document();
	StringTokenizer st = new StringTokenizer( (String)record, SEPARATOR );
	String code = unquote( st );
	String kana = unquote( st );
	String name = unquote( st );
	String addr1 = unquote( st );
	String addr2 = unquote( st );
	String addr3 = unquote( st );
	String addr4 = unquote( st );
	String indZip = unquote( st );
	String zip = unquote( st );
	String post = unquote( st );
	String addr = addr1 + addr2 + addr3 + addr4;
	String content = name + " " + addr;
	doc.add( new Field( F_CODE, code, getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_KANA, kana, getFieldStore(), Field.Index.TOKENIZED ) );
	doc.add( new Field( F_NAME, name, getFieldStore(), Field.Index.TOKENIZED ) );
	doc.add( new Field( F_ADDR, addr, getFieldStore(), Field.Index.TOKENIZED ) );
	doc.add( new Field( F_CONTENT, content, getFieldStore(), Field.Index.TOKENIZED ) );
	doc.add( new Field( F_INDZIP, indZip, getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_ZIP, zip, getFieldStore(), Field.Index.UN_TOKENIZED ) );
	doc.add( new Field( F_POST, post, getFieldStore(), Field.Index.TOKENIZED ) );
	return doc;
    }

    private String unquote( StringTokenizer st ){
	String quoted = st.nextToken();
	if( quoted.startsWith( QUOTE ) && quoted.endsWith( QUOTE ) )
	    return quoted.substring( 1, quoted.length() - 1 );
	return quoted;
    }
}
