package gihyo.lucene.ch6;

/**
 * Copyright 2006 SEKIGUCHI, Koji
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import net.java.sen.StreamTagger;

public class AccumPosBasicSenTokenizer extends Tokenizer {

    protected StreamTagger tagger = null;
    protected int accum;
    protected int prevEnd;

    public AccumPosBasicSenTokenizer( Reader in, String configFile ) throws IOException {
        input = in;
        tagger = new StreamTagger( input, configFile );
    }
    
    public Token next() throws IOException {
        if( !tagger.hasNext() ) return null;
        net.java.sen.Token token = tagger.next();
        if( token == null ) return next();
	int start = token.start();
	if( start == 0 )
	    accum += prevEnd;
	int end = token.end();
	prevEnd = end;
	start += accum;
	end += accum;
        return new Token
            ( token.getBasicString(),
	      start,
	      end,
	      token.getPos() );
    }

    public void close() throws IOException {
	super.close();
    }
}
