package com.aliasi.test.unit.tokenizer;

import com.aliasi.test.unit.Asserts;
import com.aliasi.tokenizer.CharacterTokenizerFactory;
import com.aliasi.tokenizer.EnglishStopTokenizerFactory;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.LowerCaseTokenizerFactory;
import com.aliasi.tokenizer.NGramTokenizerFactory;
import com.aliasi.tokenizer.RegExTokenizerFactory;
import com.aliasi.tokenizer.Tokenization;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.tokenizer.WhitespaceNormTokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.util.Arrays;
import junit.framework.Assert;
import org.junit.Test;

/* loaded from: input_file:com/aliasi/test/unit/tokenizer/TokenizationTest.class */
public class TokenizationTest {
    @Test
    public void testConstructorBase() {
        Assert.assertNotNull(new Tokenization("John ran", Arrays.asList("John", "ran"), Arrays.asList(Strings.EMPTY_STRING, Strings.EMPTY_STRING, Strings.EMPTY_STRING), new int[]{0, 5}, new int[]{4, 8}));
    }

    @Test(expected = IllegalArgumentException.class)
    public void testConstructorEx1() {
        Assert.assertNotNull(new Tokenization("John ran", Arrays.asList("John", "ran"), Arrays.asList(Strings.EMPTY_STRING, Strings.EMPTY_STRING), new int[]{0, 5}, new int[]{4, 8}));
    }

    @Test(expected = IllegalArgumentException.class)
    public void testConstructorEx2() {
        Assert.assertNotNull(new Tokenization("John ran", Arrays.asList("John", "ran"), Arrays.asList(Strings.EMPTY_STRING, Strings.EMPTY_STRING, Strings.EMPTY_STRING), new int[]{0, -5}, new int[]{4, 8}));
    }

    @Test(expected = IllegalArgumentException.class)
    public void testConstructorEx3() {
        Assert.assertNotNull(new Tokenization("John ran", Arrays.asList("John", "ran"), Arrays.asList(Strings.EMPTY_STRING, Strings.EMPTY_STRING, Strings.EMPTY_STRING), new int[]{0, 5}, new int[]{4, 9}));
    }

    @Test(expected = IllegalArgumentException.class)
    public void testConstructorEx4() {
        Assert.assertNotNull(new Tokenization("John ran", Arrays.asList("John", "ran"), Arrays.asList(Strings.EMPTY_STRING, Strings.EMPTY_STRING, Strings.EMPTY_STRING), new int[]{0, 5, 6}, new int[]{4, 8}));
    }

    @Test(expected = IllegalArgumentException.class)
    public void testConstructorEx5() {
        Assert.assertNotNull(new Tokenization("John ran", Arrays.asList("John", "ran"), Arrays.asList(Strings.EMPTY_STRING, Strings.EMPTY_STRING, Strings.EMPTY_STRING), new int[]{0}, new int[]{4, 8}));
    }

    @Test
    public void testDirect() {
        Asserts.assertFullEquals(new Tokenization("John ran.\n", IndoEuropeanTokenizerFactory.INSTANCE), new Tokenization("John ran.\n", Arrays.asList("John", "ran", "."), Arrays.asList(Strings.EMPTY_STRING, Strings.SINGLE_SPACE_STRING, Strings.EMPTY_STRING, "\n"), new int[]{0, 5, 8}, new int[]{4, 8, 9}));
    }

    @Test
    public void testGetters() {
        Tokenization tokenization = new Tokenization("John ran.\n", IndoEuropeanTokenizerFactory.INSTANCE);
        Assert.assertEquals("John ran.\n", tokenization.text());
        Assert.assertEquals(3, tokenization.numTokens());
        Assert.assertEquals("John", tokenization.token(0));
        Assert.assertEquals("ran", tokenization.token(1));
        Assert.assertEquals(".", tokenization.token(2));
        Assert.assertEquals(Strings.EMPTY_STRING, tokenization.whitespace(0));
        Assert.assertEquals(Strings.SINGLE_SPACE_STRING, tokenization.whitespace(1));
        Assert.assertEquals(Strings.EMPTY_STRING, tokenization.whitespace(2));
        Assert.assertEquals("\n", tokenization.whitespace(3));
        Assert.assertEquals(0, tokenization.tokenStart(0));
        Assert.assertEquals(5, tokenization.tokenStart(1));
        Assert.assertEquals(8, tokenization.tokenStart(2));
        Assert.assertEquals(4, tokenization.tokenEnd(0));
        Assert.assertEquals(8, tokenization.tokenEnd(1));
        Assert.assertEquals(9, tokenization.tokenEnd(2));
        Assert.assertEquals(Arrays.asList("John", "ran", "."), tokenization.tokenList());
        Assert.assertEquals(Arrays.asList(Strings.EMPTY_STRING, Strings.SINGLE_SPACE_STRING, Strings.EMPTY_STRING, "\n"), tokenization.whitespaceList());
        org.junit.Assert.assertArrayEquals(new String[]{"John", "ran", "."}, tokenization.tokens());
        org.junit.Assert.assertArrayEquals(new String[]{Strings.EMPTY_STRING, Strings.SINGLE_SPACE_STRING, Strings.EMPTY_STRING, "\n"}, tokenization.whitespaces());
    }

    @Test
    public void testDifferent() throws IOException {
        Tokenization tokenization = new Tokenization("John ran.", IndoEuropeanTokenizerFactory.INSTANCE);
        Tokenization tokenization2 = new Tokenization("Mary ran.", IndoEuropeanTokenizerFactory.INSTANCE);
        Assert.assertFalse(tokenization.equals(tokenization2));
        Assert.assertFalse(tokenization2.equals(tokenization));
        Tokenization tokenization3 = (Tokenization) AbstractExternalizable.serializeDeserialize(tokenization);
        Tokenization tokenization4 = (Tokenization) AbstractExternalizable.serializeDeserialize(tokenization2);
        Assert.assertFalse(tokenization.equals(tokenization4));
        Assert.assertFalse(tokenization2.equals(tokenization3));
        Assert.assertFalse(tokenization3.equals(tokenization2));
        Assert.assertFalse(tokenization4.equals(tokenization));
    }

    @Test
    public void testToks() {
        assertTokenizations(Strings.EMPTY_STRING);
        assertTokenizations("John");
        assertTokenizations("John ran.");
        assertTokenizations("he does not run and i do   be\nwalk.");
    }

    void assertTokenizations(String str) {
        assertTokenizations(str, IndoEuropeanTokenizerFactory.INSTANCE, CharacterTokenizerFactory.INSTANCE, new RegExTokenizerFactory("\\s+"), new NGramTokenizerFactory(2, 3), new LowerCaseTokenizerFactory(IndoEuropeanTokenizerFactory.INSTANCE), new EnglishStopTokenizerFactory(new LowerCaseTokenizerFactory(IndoEuropeanTokenizerFactory.INSTANCE)), new WhitespaceNormTokenizerFactory(IndoEuropeanTokenizerFactory.INSTANCE));
    }

    void assertTokenizations(String str, TokenizerFactory... tokenizerFactoryArr) {
        for (TokenizerFactory tokenizerFactory : tokenizerFactoryArr) {
            Asserts.assertFullEquals(new Tokenization(("AB" + str + "C").toCharArray(), 2, str.length(), tokenizerFactory), new Tokenization(str, tokenizerFactory));
        }
    }
}
