package nayami.indexer;

/**
 * Copyright 2006 SEKIGUCHI, Koji
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

import java.io.IOException;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.util.Iterator;
//POIgꍇ͂̃Rg͂
//import org.apache.poi.hdf.extractor.WordDocument;
//import java.io.StringWriter;
//import java.io.InputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import nayami.constant.DocumentTypes;

public class ArticleSourceIterator extends AbstractFileSourceIterator {

    private static Log log = LogFactory.getLog( ArticleSourceIterator.class );

    // POIgꍇ.txt.docɕύX
    private static final String FILE_EXTENSION = ".txt";

    protected String getFileExtension(){
	return FILE_EXTENSION;
    }

    public boolean hasNext(){
	return getIterator().hasNext();
    }

    public Document next(){
	Document doc = new Document();
	RelativeFile rf = (RelativeFile)getIterator().next();
	doc.add( getIdField( txt2native( rf.getRelativeFilePath() ) ) );
	doc.add( getDateField( getNativeLastModified( rf.getFile() ) ) );
	doc.add( getTypeField( DocumentTypes.ARTICLE ) );
	doc.add( getUrlField( getUrlPrefix() +
			      rf.getRelativePath() +
			      txt2native( rf.getFile().getName() ) ) );
	String text = getWordText( rf.getFile() );
	if( text == null )
	    return null;
	doc.add( getTitleField( getTitle( text ) ) );
	doc.add( getContentField( getContent( text ) ) );
	return doc;
    }

    /* POIgꍇ͂̃\bhLɂ
    protected String getWordText( File word ){
	InputStream is = null;
	StringWriter writer = new StringWriter();
	try{
	    is = new FileInputStream( word );
	    WordDocument wd = new WordDocument( is );
	    wd.writeAllText( writer );
	}
	catch( IOException e ){
	    log.error( "error during reading word doc. error = " + e.toString() );
	}
	finally{
	    try{
		if( is != null ) is.close();
	    }
	    catch( IOException e ){
		log.error( "error during closing word doc. error = " + e.toString() );
	    }
	}
	return writer.toString();
    }
    */

    // POIgꍇ͂̃\bh𖳌ɂ
    protected String getWordText( File word ){
	final String CHARSET = "SJIS"; // xdoc2txt̃ftHgo̓GR[h
	StringBuffer buffer = new StringBuffer();
	BufferedReader br = null;
	try{
	    br = new BufferedReader( new InputStreamReader( new FileInputStream( word ), CHARSET ) );
	    String line = null;
	    while( ( line = br.readLine() ) != null )
		buffer.append( line );
	}
	catch( IOException e ){
	    log.error( "error during reading word doc. error = " + e.toString() );
	    return null;
	}
	finally{
	    try{
		if( br != null ) br.close();
	    }
	    catch( IOException e ){
		log.error( "error during closing word doc. error = " + e.toString() );
	    }
	}
	return buffer.toString();
    }

    protected String getTitle( String text ){
	if( text.length() > 60 )
	    return text.substring( 0, 60 ) + "...";
	return text;
    }

    protected String getContent( String text ){
	return text;
    }

    protected String txt2native( String name ){
	int i = name.lastIndexOf( FILE_EXTENSION );
	if( i >= 0 )
	    return name.substring( 0, i );
	return "";
    }

    protected long getNativeLastModified( File file ){
	String txtName = file.getAbsolutePath();
	String nativeName = txt2native( txtName );
	return new File( nativeName ).lastModified();
    }
}
