package com.aliasi.test.unit.chunk;

import com.aliasi.chunk.BioTagChunkCodec;
import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.ChunkFactory;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.ChunkingImpl;
import com.aliasi.chunk.TagChunkCodec;
import com.aliasi.crf.ForwardBackwardTagLattice;
import com.aliasi.sentences.SentenceChunker;
import com.aliasi.symbol.SymbolTable;
import com.aliasi.tag.StringTagging;
import com.aliasi.tag.TagLattice;
import com.aliasi.tag.Tagging;
import com.aliasi.test.unit.Asserts;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Math;
import com.aliasi.util.ScoredObject;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import junit.framework.Assert;
import org.junit.Test;

/* loaded from: input_file:com/aliasi/test/unit/chunk/BioTagChunkCodecTest.class */
public class BioTagChunkCodecTest {
    @Test
    public void testToTagging() {
        BioTagChunkCodec bioTagChunkCodec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        ChunkingImpl chunkingImpl = new ChunkingImpl("This chunk. This is more.");
        chunkingImpl.add(ChunkFactory.createChunk(0, 11, SentenceChunker.SENTENCE_CHUNK_TYPE));
        Tagging<String> tagging = bioTagChunkCodec.toTagging(chunkingImpl);
        Assert.assertEquals(tagging.tag(2), "I_S");
        Assert.assertEquals(tagging.tag(3), "O");
        System.out.println(tagging);
    }

    /* JADX WARN: Type inference failed for: r4v20, types: [double[], double[][]] */
    /* JADX WARN: Type inference failed for: r4v8, types: [double[], double[][]] */
    /* JADX WARN: Type inference failed for: r5v22, types: [double[], double[][]] */
    /* JADX WARN: Type inference failed for: r5v9, types: [double[], double[][]] */
    @Test
    public void testNBestChunks() {
        BioTagChunkCodec bioTagChunkCodec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        Assert.assertFalse(bioTagChunkCodec.nBestChunks(new ForwardBackwardTagLattice(Collections.emptyList(), Collections.emptyList(), new double[0][0], new double[0][0], new double[0][0][0], 0.0d), new int[0], new int[0], 100).hasNext());
        assertIterator(bioTagChunkCodec.nBestChunks(new ForwardBackwardTagLattice(Arrays.asList("John"), Arrays.asList("O", "B_PER", "I_PER"), new double[]{new double[]{-1.0d, -3.0d, -100.0d}}, new double[]{new double[]{0.0d, 0.0d, 0.0d}}, new double[0][0][0], -1.5d), new int[]{0}, new int[]{4}, 100), ChunkFactory.createChunk(0, 4, "PER", -1.5d));
        ForwardBackwardTagLattice forwardBackwardTagLattice = new ForwardBackwardTagLattice(Arrays.asList("John"), Arrays.asList("O", "B_PER", "I_PER", "B_LOC", "I_LOC"), new double[]{new double[]{-1.0d, -3.0d, -100.0d, -5.0d, -200.0d}}, new double[]{new double[]{0.0d, 0.0d, 0.0d, 0.0d, 0.0d}}, new double[0][0][0], -2.5d);
        assertIterator(bioTagChunkCodec.nBestChunks(forwardBackwardTagLattice, new int[]{0}, new int[]{4}, 100), ChunkFactory.createChunk(0, 4, "PER", -0.5d), ChunkFactory.createChunk(0, 4, "LOC", -2.5d));
        assertIterator(bioTagChunkCodec.nBestChunks(forwardBackwardTagLattice, new int[]{0}, new int[]{4}, 1), ChunkFactory.createChunk(0, 4, "PER", -0.5d));
        Random random = new Random(42L);
        assertRandomLattice(random, "John", new HashSet(Arrays.asList("PER")), 100, bioTagChunkCodec);
        assertRandomLattice(random, "John", new HashSet(Arrays.asList("PER", "LOC")), 100, bioTagChunkCodec);
        assertRandomLattice(random, "John ran", new HashSet(Arrays.asList("PER", "LOC")), 100, bioTagChunkCodec);
        assertRandomLattice(random, "Mary jumped", new HashSet(Arrays.asList("PER", "LOC")), 100, bioTagChunkCodec);
        assertRandomLattice(random, "Mary likes John", new HashSet(Arrays.asList("PER", "LOC")), 100, bioTagChunkCodec);
    }

    void assertRandomLattice(Random random, String str, Set<String> set, int i, TagChunkCodec tagChunkCodec) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        Tokenizer tokenizer = IndoEuropeanTokenizerFactory.INSTANCE.tokenizer(str.toCharArray(), 0, str.length());
        Iterator<String> it = tokenizer.iterator();
        while (it.hasNext()) {
            arrayList3.add(it.next());
            arrayList.add(Integer.valueOf(tokenizer.lastTokenStartPosition()));
            arrayList2.add(Integer.valueOf(tokenizer.lastTokenEndPosition()));
        }
        assertNBestChunks(randomLattice(set, arrayList3, random), toInts(arrayList), toInts(arrayList2), i, str, tagChunkCodec);
    }

    static int[] toInts(List<Integer> list) {
        int[] iArr = new int[list.size()];
        for (int i = 0; i < iArr.length; i++) {
            iArr[i] = list.get(i).intValue();
        }
        return iArr;
    }

    TagLattice randomLattice(Set<String> set, List<String> list, Random random) {
        ArrayList arrayList = new ArrayList(1 + (2 * set.size()));
        arrayList.add("O");
        for (String str : set) {
            arrayList.add(BioTagChunkCodec.BEGIN_TAG_PREFIX + str);
            arrayList.add(BioTagChunkCodec.IN_TAG_PREFIX + str);
        }
        double[][] dArr = new double[list.size()][arrayList.size()];
        dArr[0] = randomArray(random, arrayList.size());
        for (int i = 2; i < arrayList.size(); i += 2) {
            dArr[0][i] = Double.NEGATIVE_INFINITY;
        }
        double[][][] dArr2 = new double[list.size() - 1][arrayList.size()];
        for (int i2 = 0; i2 < dArr2.length; i2++) {
            for (int i3 = 0; i3 < dArr2[i2].length; i3++) {
                dArr2[i2][i3] = randomArray(random, arrayList.size());
            }
        }
        for (int i4 = 0; i4 < dArr2.length; i4++) {
            for (int i5 = 2; i5 < dArr2[i4].length; i5 += 2) {
                for (int i6 = 0; i6 < dArr2[i4].length; i6++) {
                    if (i6 != i5 - 1 && i6 != i5) {
                        dArr2[i4][i6][i5] = Double.NEGATIVE_INFINITY;
                    }
                }
            }
        }
        double[] dArr3 = new double[arrayList.size()];
        for (int i7 = 1; i7 < list.size(); i7++) {
            for (int i8 = 0; i8 < arrayList.size(); i8++) {
                for (int i9 = 0; i9 < arrayList.size(); i9++) {
                    dArr3[i9] = dArr[i7 - 1][i9] + dArr2[i7 - 1][i9][i8];
                }
                dArr[i7][i8] = Math.logSumOfExponentials(dArr3);
            }
        }
        double logSumOfExponentials = Math.logSumOfExponentials(dArr[dArr.length - 1]);
        double[][] dArr4 = new double[list.size()][arrayList.size()];
        int size = list.size() - 1;
        while (true) {
            size--;
            if (size < 0) {
                return new ForwardBackwardTagLattice(list, arrayList, dArr, dArr4, dArr2, logSumOfExponentials);
            }
            for (int i10 = 0; i10 < arrayList.size(); i10++) {
                for (int i11 = 0; i11 < arrayList.size(); i11++) {
                    dArr3[i11] = dArr4[size + 1][i11] + dArr2[size][i10][i11];
                }
                dArr4[size][i10] = Math.logSumOfExponentials(dArr3);
            }
        }
    }

    static double[] randomArray(Random random, int i) {
        double[] dArr = new double[i];
        for (int i2 = 0; i2 < dArr.length; i2++) {
            dArr[i2] = (-10.0d) * random.nextDouble();
        }
        return dArr;
    }

    void assertNBestChunks(TagLattice tagLattice, int[] iArr, int[] iArr2, int i, String str, TagChunkCodec tagChunkCodec) {
        List<Chunk> bruteForce = bruteForce(tagLattice, iArr, iArr2, str, tagChunkCodec);
        Iterator<Chunk> nBestChunks = tagChunkCodec.nBestChunks(tagLattice, iArr, iArr2, i);
        for (int i2 = 0; i2 < i && nBestChunks.hasNext(); i2++) {
            assertEqualScoredChunks(bruteForce.get(i2), nBestChunks.next());
        }
        Assert.assertFalse(nBestChunks.hasNext());
    }

    void assertEqualScoredChunks(Chunk chunk, Chunk chunk2) {
        Assert.assertEquals(chunk.start(), chunk2.start());
        Assert.assertEquals(chunk.end(), chunk2.end());
        Assert.assertEquals(chunk.type(), chunk2.type());
        Assert.assertEquals(chunk.score(), chunk2.score(), 0.1d);
    }

    List<Chunk> bruteForce(TagLattice<String> tagLattice, int[] iArr, int[] iArr2, String str, TagChunkCodec tagChunkCodec) {
        HashMap hashMap = new HashMap();
        if (tagLattice.numTokens() == 0) {
            return new ArrayList(0);
        }
        SymbolTable tagSymbolTable = tagLattice.tagSymbolTable();
        ArrayList arrayList = new ArrayList();
        add(arrayList, 0, tagLattice, new String[tagLattice.numTokens()]);
        ArrayList arrayList2 = new ArrayList();
        for (String[] strArr : arrayList) {
            StringTagging stringTagging = new StringTagging(tagLattice.tokenList(), (List<String>) Arrays.asList(strArr), str, iArr, iArr2);
            if (tagChunkCodec.legalTags(strArr)) {
                Chunking chunking = tagChunkCodec.toChunking(stringTagging);
                double logForward = tagLattice.logForward(0, tagSymbolTable.symbolToID(strArr[0])) - tagLattice.logZ();
                for (int i = 1; i < strArr.length; i++) {
                    logForward += tagLattice.logTransition(i - 1, tagSymbolTable.symbolToID(strArr[i - 1]), tagSymbolTable.symbolToID(strArr[i]));
                }
                double logBackward = logForward + tagLattice.logBackward(tagLattice.numTokens() - 1, tagSymbolTable.symbolToID(strArr[tagLattice.numTokens() - 1]));
                arrayList2.add(Double.valueOf(logBackward));
                for (Chunk chunk : chunking.chunkSet()) {
                    List list = (List) hashMap.get(chunk);
                    if (list == null) {
                        list = new ArrayList();
                        hashMap.put(chunk, list);
                    }
                    list.add(Double.valueOf(logBackward));
                }
            }
        }
        double[] dArr = new double[arrayList2.size()];
        for (int i2 = 0; i2 < dArr.length; i2++) {
            dArr[i2] = ((Double) arrayList2.get(i2)).doubleValue();
        }
        Math.logSumOfExponentials(dArr);
        ArrayList arrayList3 = new ArrayList();
        for (Map.Entry entry : hashMap.entrySet()) {
            Chunk chunk2 = (Chunk) entry.getKey();
            List list2 = (List) entry.getValue();
            double[] dArr2 = new double[list2.size()];
            for (int i3 = 0; i3 < dArr2.length; i3++) {
                dArr2[i3] = ((Double) list2.get(i3)).doubleValue();
            }
            arrayList3.add(ChunkFactory.createChunk(chunk2.start(), chunk2.end(), chunk2.type(), Math.logSumOfExponentials(dArr2)));
        }
        Collections.sort(arrayList3, ScoredObject.reverseComparator());
        return arrayList3;
    }

    /* JADX WARN: Multi-variable type inference failed */
    void add(List<String[]> list, int i, TagLattice<String> tagLattice, String[] strArr) {
        if (i == tagLattice.numTokens()) {
            list.add(strArr.clone());
            return;
        }
        for (int i2 = 0; i2 < tagLattice.numTags(); i2++) {
            strArr[i] = tagLattice.tag(i2);
            add(list, i + 1, tagLattice, strArr);
        }
    }

    void assertIterator(Iterator<Chunk> it, Chunk... chunkArr) {
        for (Chunk chunk : chunkArr) {
            Assert.assertTrue(it.hasNext());
            Chunk next = it.next();
            Assert.assertEquals(chunk.start(), next.start());
            Assert.assertEquals(chunk.end(), next.end());
            Assert.assertEquals(chunk.type(), next.type());
            Assert.assertEquals(chunk.score(), next.score(), 0.1d);
        }
        Assert.assertFalse(it.hasNext());
    }

    @Test
    public void testLegalTagSubSequence() {
        BioTagChunkCodec bioTagChunkCodec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("O"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("B_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("I_PER"));
        Assert.assertFalse(bioTagChunkCodec.legalTagSubSequence("F"));
        Assert.assertFalse(bioTagChunkCodec.legalTagSubSequence("M_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("O", "B_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("I_PER", "O"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("B_PER", "B_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("B_PER", "I_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("B_PER", "I_PER", "I_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("B_PER", "I_PER", "I_PER", "O"));
        Assert.assertTrue(bioTagChunkCodec.legalTagSubSequence("O", "B_PER", "I_PER", "I_PER", "O"));
        Assert.assertFalse(bioTagChunkCodec.legalTagSubSequence("O", "I_PER"));
        Assert.assertFalse(bioTagChunkCodec.legalTagSubSequence("B_LOC", "I_PER"));
    }

    @Test
    public void testLegalTags() {
        BioTagChunkCodec bioTagChunkCodec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        Assert.assertTrue(bioTagChunkCodec.legalTags("O"));
        Assert.assertTrue(bioTagChunkCodec.legalTags("B_PER"));
        Assert.assertFalse(bioTagChunkCodec.legalTags("F"));
        Assert.assertFalse(bioTagChunkCodec.legalTags("M_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTags("O", "B_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTags("B_PER", "B_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTags("B_PER", "I_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTags("B_PER", "I_PER", "I_PER"));
        Assert.assertTrue(bioTagChunkCodec.legalTags("B_PER", "I_PER", "I_PER", "O"));
        Assert.assertTrue(bioTagChunkCodec.legalTags("O", "B_PER", "I_PER", "I_PER", "O"));
        Assert.assertFalse(bioTagChunkCodec.legalTags("I_PER", "O"));
        Assert.assertFalse(bioTagChunkCodec.legalTags("I_PER"));
        Assert.assertFalse(bioTagChunkCodec.legalTags("O", "I_PER"));
        Assert.assertFalse(bioTagChunkCodec.legalTags("B_LOC", "I_PER"));
    }

    @Test
    public void testBioCodecTagSet() {
        Assert.assertEquals(new HashSet(Arrays.asList("O", "B_PER", "I_PER", "B_LOC", "I_LOC")), new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false).tagSet(new HashSet(Arrays.asList("PER", "LOC"))));
    }

    @Test
    public void testEncodable() throws IOException, ClassNotFoundException {
        TagChunkCodec bioTagChunkCodec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, true);
        ChunkingImpl chunkingImpl = new ChunkingImpl("John Jones Mary and Mr. J. J. Jones ran to Washington.");
        Chunk createChunk = ChunkFactory.createChunk(0, 10, "PER");
        Chunk createChunk2 = ChunkFactory.createChunk(11, 15, "PER");
        Chunk createChunk3 = ChunkFactory.createChunk(24, 35, "PER");
        Chunk createChunk4 = ChunkFactory.createChunk(43, 53, "LOC");
        chunkingImpl.add(createChunk2);
        chunkingImpl.add(createChunk4);
        chunkingImpl.add(createChunk3);
        chunkingImpl.add(createChunk);
        assertEncodable(bioTagChunkCodec, chunkingImpl);
        ChunkingImpl chunkingImpl2 = new ChunkingImpl("John ran");
        Chunk createChunk5 = ChunkFactory.createChunk(0, 4, "PER");
        Chunk createChunk6 = ChunkFactory.createChunk(0, 8, "LOC");
        chunkingImpl2.add(createChunk5);
        chunkingImpl2.add(createChunk6);
        assertNotEncodable(bioTagChunkCodec, chunkingImpl2);
        ChunkingImpl chunkingImpl3 = new ChunkingImpl("John ran");
        chunkingImpl3.add(ChunkFactory.createChunk(0, 5, "PER"));
        assertNotEncodable(bioTagChunkCodec, chunkingImpl3);
        ChunkingImpl chunkingImpl4 = new ChunkingImpl("John ran");
        chunkingImpl4.add(ChunkFactory.createChunk(1, 4, "PER"));
        assertNotEncodable(bioTagChunkCodec, chunkingImpl4);
        assertNotEncodable(bioTagChunkCodec, chunkingImpl4);
        ChunkingImpl chunkingImpl5 = new ChunkingImpl("John ran");
        chunkingImpl5.add(ChunkFactory.createChunk(5, 5, "LOC"));
        assertNotEncodable(bioTagChunkCodec, chunkingImpl5);
        ChunkingImpl chunkingImpl6 = new ChunkingImpl("John ran");
        Assert.assertTrue(bioTagChunkCodec.isEncodable(chunkingImpl6));
        chunkingImpl6.add(ChunkFactory.createChunk(0, 8, "LOC"));
        assertEncodable(bioTagChunkCodec, chunkingImpl6);
        ChunkingImpl chunkingImpl7 = new ChunkingImpl("Mr. John Jones ran to Washington.");
        Chunk createChunk7 = ChunkFactory.createChunk(4, 14, "PER");
        Chunk createChunk8 = ChunkFactory.createChunk(22, 32, "LOC");
        chunkingImpl7.add(createChunk7);
        chunkingImpl7.add(createChunk8);
        assertEncodable(bioTagChunkCodec, chunkingImpl7);
    }

    @Test
    public void testDecodable() throws IOException, ClassNotFoundException {
        BioTagChunkCodec bioTagChunkCodec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, true);
        assertDecodable(bioTagChunkCodec, new StringTagging((List<String>) Arrays.asList("John", "ran", "to", "Washington", "DC"), (List<String>) Arrays.asList("B_PER", "O", "O", "B_LOC", "I_LOC"), "John ran to Washington DC", new int[]{0, 5, 9, 12, 23}, new int[]{4, 8, 11, 22, 25}));
        assertNotDecodable(bioTagChunkCodec, new StringTagging((List<String>) Arrays.asList("John", "ny", "ran", "."), (List<String>) Arrays.asList("B_PER", "I_PER", "O", "O"), "Johnny ran.", new int[]{0, 4, 7, 10}, new int[]{4, 6, 10, 11}));
    }

    void assertEncodable(TagChunkCodec tagChunkCodec, Chunking chunking) throws IOException, ClassNotFoundException {
        assertEncodable2(tagChunkCodec, chunking);
        assertEncodable2((TagChunkCodec) AbstractExternalizable.serializeDeserialize((Serializable) tagChunkCodec), chunking);
    }

    void assertEncodable2(TagChunkCodec tagChunkCodec, Chunking chunking) {
        Assert.assertTrue(tagChunkCodec.isEncodable(chunking));
        StringTagging stringTagging = tagChunkCodec.toStringTagging(chunking);
        Assert.assertTrue(tagChunkCodec.isDecodable(stringTagging));
        Chunking chunking2 = tagChunkCodec.toChunking(stringTagging);
        Assert.assertEquals(chunking, chunking2);
        Assert.assertEquals(stringTagging, tagChunkCodec.toStringTagging(chunking2));
    }

    void assertNotEncodable(TagChunkCodec tagChunkCodec, Chunking chunking) {
        Assert.assertFalse(tagChunkCodec.isEncodable(chunking));
        try {
            tagChunkCodec.toTagging(chunking);
            Assert.fail();
        } catch (IllegalArgumentException e) {
            Asserts.succeed();
        }
    }

    void assertDecodable(TagChunkCodec tagChunkCodec, StringTagging stringTagging) throws IOException, ClassNotFoundException {
        assertDecodable2(tagChunkCodec, stringTagging);
        assertDecodable2((TagChunkCodec) AbstractExternalizable.serializeDeserialize((Serializable) tagChunkCodec), stringTagging);
    }

    void assertDecodable2(TagChunkCodec tagChunkCodec, StringTagging stringTagging) {
        Assert.assertTrue(tagChunkCodec.isDecodable(stringTagging));
        Chunking chunking = tagChunkCodec.toChunking(stringTagging);
        Assert.assertTrue(tagChunkCodec.isEncodable(chunking));
        StringTagging stringTagging2 = tagChunkCodec.toStringTagging(chunking);
        Assert.assertEquals(stringTagging, stringTagging2);
        Assert.assertEquals(chunking, tagChunkCodec.toChunking(stringTagging2));
    }

    void assertNotDecodable(TagChunkCodec tagChunkCodec, StringTagging stringTagging) {
        Assert.assertFalse(tagChunkCodec.isDecodable(stringTagging));
        try {
            tagChunkCodec.toChunking(stringTagging);
            Assert.fail();
        } catch (IllegalArgumentException e) {
            Asserts.succeed();
        }
    }
}
