package com.aliasi.test.unit.dict;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.ChunkFactory;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.ChunkingImpl;
import com.aliasi.dict.DictionaryEntry;
import com.aliasi.dict.ExactDictionaryChunker;
import com.aliasi.dict.MapDictionary;
import com.aliasi.dict.TrieDictionary;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.LowerCaseTokenizerFactory;
import com.aliasi.tokenizer.RegExTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.xml.XHtmlWriter;
import java.io.IOException;
import java.util.Set;
import junit.framework.Assert;
import org.junit.Test;

/* loaded from: input_file:com/aliasi/test/unit/dict/ExactDictionaryChunkerTest.class */
public class ExactDictionaryChunkerTest {
    TokenizerFactory TOKENIZER_FACTORY = IndoEuropeanTokenizerFactory.INSTANCE;
    String regex = "[a-zA-Z]+|[0-9]+";
    TokenizerFactory REGEX_TOKENIZER_FACTORY = new RegExTokenizerFactory(this.regex);

    @Test
    public void testComposedFactories() {
        LowerCaseTokenizerFactory lowerCaseTokenizerFactory = new LowerCaseTokenizerFactory(new RegExTokenizerFactory("([a-z]+)|([A-Z]+)|([0-9]+)"));
        MapDictionary mapDictionary = new MapDictionary();
        mapDictionary.addEntry(new DictionaryEntry("p-53", "entry1"));
        Set<Chunk> chunkSet = new ExactDictionaryChunker(mapDictionary, lowerCaseTokenizerFactory, true, true).chunk("bar mP53wt.").chunkSet();
        Assert.assertEquals(1, chunkSet.size());
        Assert.assertEquals("entry1", chunkSet.iterator().next().type());
    }

    @Test
    public void testNulls() {
        Assert.assertNotNull(this.REGEX_TOKENIZER_FACTORY);
        Assert.assertNotNull(this.REGEX_TOKENIZER_FACTORY.tokenizer("P53 should match both as should p53.".toCharArray(), 0, "P53 should match both as should p53.".length()).tokenize());
    }

    @Test
    public void testCaseSensitivity2() {
        TrieDictionary trieDictionary = new TrieDictionary();
        trieDictionary.addEntry(new DictionaryEntry("P53", "human"));
        trieDictionary.addEntry(new DictionaryEntry("p53", "mouse"));
        ExactDictionaryChunker exactDictionaryChunker = new ExactDictionaryChunker(trieDictionary, this.REGEX_TOKENIZER_FACTORY, true, false);
        exactDictionaryChunker.chunk("P53 should match both as should p53.");
        assertChunking(exactDictionaryChunker, "P53 should match both as should p53.", new Chunk[]{ChunkFactory.createChunk(0, 3, "mouse", 1.0d), ChunkFactory.createChunk(0, 3, "human", 1.0d), ChunkFactory.createChunk(32, 35, "mouse", 1.0d), ChunkFactory.createChunk(32, 35, "human", 1.0d)});
    }

    @Test
    public void testTokenSensitivity() {
        TrieDictionary trieDictionary = new TrieDictionary();
        trieDictionary.addEntry(new DictionaryEntry("p-53", "human"));
        trieDictionary.addEntry(new DictionaryEntry("p53", "mouse"));
        ExactDictionaryChunker exactDictionaryChunker = new ExactDictionaryChunker(trieDictionary, this.REGEX_TOKENIZER_FACTORY, true, true);
        exactDictionaryChunker.chunk("p53 should match both as should p-53.");
        assertChunking(exactDictionaryChunker, "p53 should match both as should p-53.", new Chunk[]{ChunkFactory.createChunk(0, 3, "mouse", 1.0d), ChunkFactory.createChunk(0, 3, "human", 1.0d), ChunkFactory.createChunk(32, 36, "mouse", 1.0d), ChunkFactory.createChunk(32, 36, "human", 1.0d)});
    }

    @Test
    public void testEmptyDictionary() {
        new ExactDictionaryChunker(new MapDictionary(), this.TOKENIZER_FACTORY, true, false).chunk("John ran");
    }

    @Test
    public void testCaseSensitivity() {
        MapDictionary mapDictionary = new MapDictionary();
        mapDictionary.addEntry(new DictionaryEntry("50 Cent", "PERSON", 1.0d));
        mapDictionary.addEntry(new DictionaryEntry("xyz120 DVD Player", "DB_ID_1232", 1.0d));
        Chunk createChunk = ChunkFactory.createChunk(0, 7, "PERSON", 1.0d);
        assertChunking(new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, false), "50 Cent is worth more than 50 cent.", new Chunk[]{ChunkFactory.createChunk(27, 34, "PERSON", 1.0d), createChunk});
        assertChunking(new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, true), "50 Cent is worth more than 50 cent.", new Chunk[]{createChunk});
    }

    @Test
    public void testOverlapsCase() {
        MapDictionary mapDictionary = new MapDictionary();
        mapDictionary.addEntry(new DictionaryEntry("john smith", "PER", 7.0d));
        mapDictionary.addEntry(new DictionaryEntry("smith and barney", "ORG", 3.0d));
        mapDictionary.addEntry(new DictionaryEntry("smith", "LOC", 2.0d));
        mapDictionary.addEntry(new DictionaryEntry("smith", "PER", 5.0d));
        Chunk createChunk = ChunkFactory.createChunk(0, 10, "PER", 7.0d);
        Chunk createChunk2 = ChunkFactory.createChunk(5, 10, "PER", 5.0d);
        Chunk createChunk3 = ChunkFactory.createChunk(5, 10, "LOC", 2.0d);
        Chunk[] chunkArr = {createChunk, createChunk2, createChunk3, ChunkFactory.createChunk(5, 21, "ORG", 3.0d)};
        Chunk[] chunkArr2 = {createChunk2, createChunk3};
        Chunk[] chunkArr3 = {createChunk};
        Chunk[] chunkArr4 = {createChunk2};
        ExactDictionaryChunker exactDictionaryChunker = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, true);
        assertChunking(exactDictionaryChunker, "john smith and barney", chunkArr);
        assertChunking(exactDictionaryChunker, "JohN smith AND Barney", chunkArr2);
        ExactDictionaryChunker exactDictionaryChunker2 = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, false, true);
        assertChunking(exactDictionaryChunker2, "john smith and barney", chunkArr3);
        assertChunking(exactDictionaryChunker2, "JohN smith AND Barney", chunkArr4);
        ExactDictionaryChunker exactDictionaryChunker3 = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, false);
        assertChunking(exactDictionaryChunker3, "john smith and barney", chunkArr);
        assertChunking(exactDictionaryChunker3, "JohN smith AND Barney", chunkArr);
        ExactDictionaryChunker exactDictionaryChunker4 = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, false, false);
        assertChunking(exactDictionaryChunker4, "john smith and barney", chunkArr3);
        assertChunking(exactDictionaryChunker4, "JohN smith AND Barney", chunkArr3);
    }

    @Test
    public void testBoundaries() {
        MapDictionary mapDictionary = new MapDictionary();
        mapDictionary.addEntry(new DictionaryEntry("john smith", "PER", 7.0d));
        ExactDictionaryChunker exactDictionaryChunker = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, true);
        Chunk[] chunkArr = new Chunk[0];
        assertChunking(exactDictionaryChunker, "john", chunkArr);
        assertChunking(exactDictionaryChunker, "smith john", chunkArr);
        assertChunking(exactDictionaryChunker, "john smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker, "john smith smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker, "john smith frank", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker, "then john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker, "john john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker, "john john smith smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
    }

    void assertChunking(ExactDictionaryChunker exactDictionaryChunker, String str, Chunk[] chunkArr) {
        Chunking chunk = exactDictionaryChunker.chunk(str);
        ChunkingImpl chunkingImpl = new ChunkingImpl(str);
        for (Chunk chunk2 : chunkArr) {
            chunkingImpl.add(chunk2);
        }
        Assert.assertEquals(chunkingImpl, chunk);
    }

    @Test
    public void testSuffixes() {
        MapDictionary mapDictionary = new MapDictionary();
        mapDictionary.addEntry(new DictionaryEntry("john smith", "PER", 7.0d));
        mapDictionary.addEntry(new DictionaryEntry("Barry J. Jones III", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("Barry", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("Jones", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("J. Barry Johnson", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("I.B.M.", "ORG", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("Dean Witter", "PER", 3.0d));
        mapDictionary.addEntry(new DictionaryEntry("Dean Witter", "ORG", 7.0d));
        mapDictionary.addEntry(new DictionaryEntry("a b c", "PER", 7.0d));
        mapDictionary.addEntry(new DictionaryEntry(XHtmlWriter.B, "PER", 5.2d));
        ExactDictionaryChunker exactDictionaryChunker = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, true);
        assertChunking(exactDictionaryChunker, "a b d", new Chunk[]{ChunkFactory.createChunk(2, 3, "PER", 5.2d)});
        assertChunking(exactDictionaryChunker, "J. Barry Warwick", new Chunk[]{ChunkFactory.createChunk(3, 8, "PER", 5.0d)});
    }

    @Test
    public void testSerialization() throws IOException, ClassNotFoundException {
        MapDictionary mapDictionary = new MapDictionary();
        mapDictionary.addEntry(new DictionaryEntry("john smith", "PER", 7.0d));
        ExactDictionaryChunker exactDictionaryChunker = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, true);
        Assert.assertTrue(exactDictionaryChunker.caseSensitive());
        Assert.assertTrue(exactDictionaryChunker.returnAllMatches());
        Chunk[] chunkArr = new Chunk[0];
        assertChunking(exactDictionaryChunker, "john", chunkArr);
        ExactDictionaryChunker exactDictionaryChunker2 = (ExactDictionaryChunker) AbstractExternalizable.serializeDeserialize(exactDictionaryChunker);
        Assert.assertTrue(exactDictionaryChunker2.caseSensitive());
        Assert.assertTrue(exactDictionaryChunker2.returnAllMatches());
        assertChunking(exactDictionaryChunker2, "john", chunkArr);
        assertChunking(exactDictionaryChunker2, "smith john", chunkArr);
        assertChunking(exactDictionaryChunker2, "john smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "john smith smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "john smith frank", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "then john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "john john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "john john smith smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
    }

    @Test
    public void testSerialization2() throws IOException, ClassNotFoundException {
        MapDictionary mapDictionary = new MapDictionary();
        mapDictionary.addEntry(new DictionaryEntry("john smith", "PER", 7.0d));
        mapDictionary.addEntry(new DictionaryEntry("Barry J. Jones III", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("Barry", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("Jones", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("J. Barry Johnson", "PER", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("I.B.M.", "ORG", 5.0d));
        mapDictionary.addEntry(new DictionaryEntry("Dean Witter", "PER", 3.0d));
        mapDictionary.addEntry(new DictionaryEntry("Dean Witter", "ORG", 7.0d));
        ExactDictionaryChunker exactDictionaryChunker = new ExactDictionaryChunker(mapDictionary, this.TOKENIZER_FACTORY, true, true);
        Assert.assertTrue(exactDictionaryChunker.caseSensitive());
        Assert.assertTrue(exactDictionaryChunker.returnAllMatches());
        ExactDictionaryChunker exactDictionaryChunker2 = (ExactDictionaryChunker) AbstractExternalizable.serializeDeserialize(exactDictionaryChunker);
        Assert.assertTrue(exactDictionaryChunker2.caseSensitive());
        Assert.assertTrue(exactDictionaryChunker2.returnAllMatches());
        Chunk[] chunkArr = new Chunk[0];
        assertChunking(exactDictionaryChunker2, "john", chunkArr);
        assertChunking(exactDictionaryChunker2, "smith john", chunkArr);
        assertChunking(exactDictionaryChunker2, "J. Barry Warwick", new Chunk[]{ChunkFactory.createChunk(3, 8, "PER", 5.0d)});
        assertChunking(exactDictionaryChunker2, "john smith smith", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "john smith frank", new Chunk[]{ChunkFactory.createChunk(0, 10, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "then john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "john john smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
        assertChunking(exactDictionaryChunker2, "john john smith smith", new Chunk[]{ChunkFactory.createChunk(5, 15, "PER", 7.0d)});
    }
}
