package com.aliasi.chunk;

import com.aliasi.corpus.ObjectHandler;
import com.aliasi.hmm.HmmCharLmEstimator;
import com.aliasi.lm.LanguageModel;
import com.aliasi.lm.NGramBoundaryLM;
import com.aliasi.lm.NGramProcessLM;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Compilable;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.HashMap;

/* loaded from: input_file:com/aliasi/chunk/CharLmRescoringChunker.class */
public class CharLmRescoringChunker extends AbstractCharLmRescoringChunker<CharLmHmmChunker, NGramProcessLM, NGramBoundaryLM> implements ObjectHandler<Chunking>, Compilable {
    final int mNGram;
    final int mNumChars;
    final double mInterpolationRatio;
    char mNextCodeChar;

    /* loaded from: input_file:com/aliasi/chunk/CharLmRescoringChunker$Externalizer.class */
    static class Externalizer extends AbstractExternalizable {
        private static final long serialVersionUID = 3555143657918695241L;
        final CharLmRescoringChunker mChunker;

        public Externalizer() {
            this(null);
        }

        public Externalizer(CharLmRescoringChunker charLmRescoringChunker) {
            this.mChunker = charLmRescoringChunker;
        }

        /* JADX WARN: Multi-variable type inference failed */
        @Override // com.aliasi.util.AbstractExternalizable, java.io.Externalizable
        public void writeExternal(ObjectOutput objectOutput) throws IOException {
            ((CharLmHmmChunker) this.mChunker.baseChunker()).compileTo(objectOutput);
            objectOutput.writeInt(this.mChunker.numChunkingsRescored());
            String[] strArr = (String[]) this.mChunker.mTypeToLM.keySet().toArray(Strings.EMPTY_STRING_ARRAY);
            objectOutput.writeInt(strArr.length);
            for (int i = 0; i < strArr.length; i++) {
                objectOutput.writeUTF(strArr[i]);
                objectOutput.writeChar(this.mChunker.typeToChar(strArr[i]));
                ((NGramBoundaryLM) this.mChunker.mTypeToLM.get(strArr[i])).compileTo(objectOutput);
            }
            this.mChunker.outLM().compileTo(objectOutput);
        }

        @Override // com.aliasi.util.AbstractExternalizable
        public Object read(ObjectInput objectInput) throws ClassNotFoundException, IOException {
            NBestChunker nBestChunker = (NBestChunker) objectInput.readObject();
            int readInt = objectInput.readInt();
            int readInt2 = objectInput.readInt();
            HashMap hashMap = new HashMap();
            HashMap hashMap2 = new HashMap();
            for (int i = 0; i < readInt2; i++) {
                String readUTF = objectInput.readUTF();
                char readChar = objectInput.readChar();
                LanguageModel.Sequence sequence = (LanguageModel.Sequence) objectInput.readObject();
                hashMap.put(readUTF, Character.valueOf(readChar));
                hashMap2.put(readUTF, sequence);
            }
            return new AbstractCharLmRescoringChunker(nBestChunker, readInt, (LanguageModel.Process) objectInput.readObject(), hashMap, hashMap2);
        }
    }

    public CharLmRescoringChunker(TokenizerFactory tokenizerFactory, int i, int i2, int i3, double d) {
        super(new CharLmHmmChunker(tokenizerFactory, new HmmCharLmEstimator(i2, i3, d)), i, new NGramProcessLM(i2, i3, d), new HashMap(), new HashMap());
        this.mNextCodeChar = (char) 65532;
        this.mNGram = i2;
        this.mNumChars = i3;
        this.mInterpolationRatio = d;
    }

    public CharLmRescoringChunker(TokenizerFactory tokenizerFactory, int i, int i2, int i3, double d, boolean z) {
        super(new CharLmHmmChunker(tokenizerFactory, new HmmCharLmEstimator(i2, i3, d), z), i, new NGramProcessLM(i2, i3, d), new HashMap(), new HashMap());
        this.mNextCodeChar = (char) 65532;
        this.mNGram = i2;
        this.mNumChars = i3;
        this.mInterpolationRatio = d;
    }

    @Override // com.aliasi.corpus.ObjectHandler
    public void handle(Chunking chunking) {
        ((ObjectHandler) baseChunker()).handle(chunking);
        String charSequence = chunking.charSequence().toString();
        char c = 65534;
        int i = 0;
        for (Chunk chunk : orderedSet(chunking)) {
            int start = chunk.start();
            int end = chunk.end();
            if (i > start) {
                throw new IllegalArgumentException("Chunk overlap for chunk=" + chunk + " in chunking=" + chunking);
            }
            String type = chunk.type();
            createTypeIfNecessary(type);
            char typeToChar = typeToChar(type);
            trainOutLM(charSequence.substring(i, start), c, typeToChar);
            trainTypeLM(type, charSequence.substring(start, end));
            i = end;
            c = typeToChar;
        }
        trainOutLM(charSequence.substring(i), c, (char) 65533);
    }

    @Override // com.aliasi.util.Compilable
    public void compileTo(ObjectOutput objectOutput) throws IOException {
        objectOutput.writeObject(new Externalizer(this));
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void trainDictionary(CharSequence charSequence, String str) {
        ((CharLmHmmChunker) baseChunker()).trainDictionary(charSequence, str);
        trainTypeLM(str, charSequence);
    }

    public void trainOut(CharSequence charSequence) {
        outLM().train(charSequence);
    }

    void createTypeIfNecessary(String str) {
        if (this.mTypeToChar.containsKey(str)) {
            return;
        }
        char c = this.mNextCodeChar;
        this.mNextCodeChar = (char) (c - 1);
        this.mTypeToChar.put(str, Character.valueOf(c));
        this.mTypeToLM.put(str, new NGramBoundaryLM(this.mNGram, this.mNumChars, this.mInterpolationRatio, (char) 65535));
    }

    void trainTypeLM(String str, CharSequence charSequence) {
        createTypeIfNecessary(str);
        ((NGramBoundaryLM) this.mTypeToLM.get(str)).train(charSequence);
    }

    void trainOutLM(String str, char c, char c2) {
        outLM().train(c + str + c2);
        outLM().substringCounter().decrementUnigram(c);
    }
}
