package org.apache.solr.handler.tagger;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/solr/handler/tagger/Tagger.class */
public abstract class Tagger {
    private static final Logger log;
    private final TokenStream tokenStream;
    private final TermToBytesRefAttribute byteRefAtt;
    private final PositionIncrementAttribute posIncAtt;
    private final OffsetAttribute offsetAtt;
    private final TaggingAttribute taggingAtt;
    private final TagClusterReducer tagClusterReducer;
    private final Terms terms;
    private final Bits liveDocs;
    private final boolean skipAltTokens;
    private final boolean ignoreStopWords;
    private Map<BytesRef, IntsRef> docIdsCache;
    private boolean loggedSkippedAltTokenWarning = false;
    static final /* synthetic */ boolean $assertionsDisabled;

    public Tagger(Terms terms, Bits bits, TokenStream tokenStream, TagClusterReducer tagClusterReducer, boolean z, boolean z2) throws IOException {
        this.terms = terms;
        this.liveDocs = bits;
        this.tokenStream = tokenStream;
        this.skipAltTokens = z;
        this.ignoreStopWords = z2;
        this.byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
        this.posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
        this.offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
        this.taggingAtt = (TaggingAttribute) tokenStream.addAttribute(TaggingAttribute.class);
        tokenStream.reset();
        this.tagClusterReducer = tagClusterReducer;
    }

    public void enableDocIdsCache(int i) {
        if (i > 0) {
            this.docIdsCache = new HashMap(i);
        }
    }

    public void process() throws IOException {
        BytesRef bytesRef;
        TagLL tagLL;
        if (this.terms == null) {
            return;
        }
        TagLL[] tagLLArr = new TagLL[1];
        TermPrefixCursor termPrefixCursor = null;
        boolean z = false;
        while (this.tokenStream.incrementToken()) {
            if (log.isTraceEnabled()) {
                log.trace("Token: {}, posInc: {},  offset: [{},{}]", new Object[]{this.byteRefAtt, Integer.valueOf(this.posIncAtt.getPositionIncrement()), Integer.valueOf(this.offsetAtt.startOffset()), Integer.valueOf(this.offsetAtt.endOffset())});
            }
            if (this.posIncAtt.getPositionIncrement() >= 1) {
                if (!this.ignoreStopWords && this.posIncAtt.getPositionIncrement() > 1) {
                    log.trace("   - posInc > 1 ... mark cluster as done");
                    advanceTagsAndProcessClusterIfDone(tagLLArr, null);
                }
                if (this.taggingAtt.isTaggable() || tagLLArr[0] != null) {
                    bytesRef = this.byteRefAtt.getBytesRef();
                    if (bytesRef.length == 0) {
                        throw new IllegalArgumentException("term: " + bytesRef.utf8ToString() + " analyzed to a zero-length token");
                    }
                } else {
                    bytesRef = null;
                }
                advanceTagsAndProcessClusterIfDone(tagLLArr, bytesRef);
                if (this.taggingAtt.isTaggable() && bytesRef != null) {
                    if (termPrefixCursor == null) {
                        termPrefixCursor = new TermPrefixCursor(this.terms.iterator(), this.liveDocs, this.docIdsCache);
                    }
                    if (termPrefixCursor.advance(bytesRef)) {
                        TagLL tagLL2 = new TagLL(tagLLArr, termPrefixCursor, this.offsetAtt.startOffset(), this.offsetAtt.endOffset(), null);
                        termPrefixCursor = null;
                        if (tagLLArr[0] == null) {
                            tagLLArr[0] = tagLL2;
                        } else {
                            TagLL tagLL3 = tagLLArr[0];
                            while (true) {
                                tagLL = tagLL3;
                                if (tagLL.nextTag == null) {
                                    break;
                                } else {
                                    tagLL3 = tagLL.nextTag;
                                }
                            }
                            tagLL.addAfterLL(tagLL2);
                        }
                    }
                }
            } else {
                if (!this.skipAltTokens) {
                    throw new IllegalStateException("Query Analyzer generates alternate Tokens (posInc == 0). Please adapt your Analyzer configuration or enable 'skipAltTokens' to skip such tokens. NOTE: enabling 'skipAltTokens' might result in wrong tagging results if the index time analyzer is not configured accordingly. For detailed information see https://github.com/OpenSextant/SolrTextTagger/pull/11#issuecomment-24936225");
                }
                z = true;
                log.trace("  ... ignored token");
            }
        }
        advanceTagsAndProcessClusterIfDone(tagLLArr, null);
        if (!$assertionsDisabled && tagLLArr[0] != null) {
            throw new AssertionError();
        }
        if (!this.loggedSkippedAltTokenWarning && z) {
            this.loggedSkippedAltTokenWarning = true;
            log.warn("{}{}{}{}", new Object[]{"The Tagger skipped some alternate tokens (tokens with posInc == 0) ", "while processing text. This may cause problems with some Analyzer ", "configurations (e.g. query time synonym expansion). For details see ", "https://github.com/OpenSextant/SolrTextTagger/pull/11#issuecomment-24936225"});
        }
        this.tokenStream.end();
    }

    private void advanceTagsAndProcessClusterIfDone(TagLL[] tagLLArr, BytesRef bytesRef) throws IOException {
        int endOffset = bytesRef != null ? this.offsetAtt.endOffset() : -1;
        boolean z = false;
        TagLL tagLL = tagLLArr[0];
        while (true) {
            TagLL tagLL2 = tagLL;
            if (tagLL2 == null) {
                break;
            }
            z |= tagLL2.advance(bytesRef, endOffset);
            tagLL = tagLL2.nextTag;
        }
        if (z || tagLLArr[0] == null) {
            return;
        }
        this.tagClusterReducer.reduce(tagLLArr);
        TagLL tagLL3 = tagLLArr[0];
        while (true) {
            TagLL tagLL4 = tagLL3;
            if (tagLL4 == null) {
                tagLLArr[0] = null;
                return;
            } else {
                if (!$assertionsDisabled && tagLL4.value == null) {
                    throw new AssertionError();
                }
                tagCallback(tagLL4.startOffset, tagLL4.endOffset, tagLL4.value);
                tagLL3 = tagLL4.nextTag;
            }
        }
    }

    protected abstract void tagCallback(int i, int i2, Object obj);

    /* JADX INFO: Access modifiers changed from: protected */
    public IntsRef lookupDocIds(Object obj) {
        return (IntsRef) obj;
    }

    static {
        $assertionsDisabled = !Tagger.class.desiredAssertionStatus();
        log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    }
}
