package utils;

import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.Chunking;
import com.aliasi.sentences.MedlineSentenceModel;
import com.aliasi.sentences.SentenceChunker;
import com.aliasi.sentences.SentenceModel;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.RegExTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import relations.TData;

/* loaded from: input_file:utils/SentenceSplitter.class */
public class SentenceSplitter {
    static final TokenizerFactory TOKENIZER_FACTORY = IndoEuropeanTokenizerFactory.INSTANCE;
    static TokenizerFactory TOKENIZER = new RegExTokenizerFactory("(\\/|\\+|-|'|\\d|\\p{L})++|\\S");
    static final SentenceModel SENTENCE_MODEL = new MedlineSentenceModel();
    static final SentenceChunker SENTENCE_CHUNKER = new SentenceChunker(TOKENIZER_FACTORY, SENTENCE_MODEL);

    /* loaded from: input_file:utils/SentenceSplitter$BioSemSentence.class */
    public static class BioSemSentence {
        public String text;
        public int begin;
        public int end;
        List<TData> prots;

        public BioSemSentence(String str, int i, int i2) {
            this.text = str;
            this.begin = i;
            this.end = i2;
        }

        public BioSemSentence() {
        }
    }

    public static List<BioSemSentence> spliter(String str) {
        char[] charArray = str.toCharArray();
        ArrayList arrayList = new ArrayList();
        Chunking chunk = SENTENCE_CHUNKER.chunk(charArray, 0, charArray.length);
        Set<Chunk> chunkSet = chunk.chunkSet();
        if (chunkSet.size() < 1) {
            System.out.println("No sentence found in input '" + str + "'.");
            return arrayList;
        }
        String charSequence = chunk.charSequence().toString();
        for (Chunk chunk2 : chunkSet) {
            int start = chunk2.start();
            int end = chunk2.end();
            arrayList.add(new BioSemSentence(charSequence.substring(start, end), start, end));
        }
        return arrayList;
    }

    public static List<String>[] wordSpliter(String str) {
        ArrayList[] arrayListArr = {new ArrayList(), new ArrayList()};
        char[] charArray = str.toCharArray();
        TOKENIZER.tokenizer(charArray, 0, charArray.length).tokenize(arrayListArr[0], arrayListArr[1]);
        return arrayListArr;
    }

    public static void main(String[] strArr) {
        Iterator<String> it = wordSpliter("In Th17 cells that expressed PRO31 as well as PRO32 mRNA (Figure6C), PRO33 and PRO34 mRNA were undetectable (data not shown), whereas that of PRO35 was high (Figure6C) (Ivanov etal., 2007).")[0].iterator();
        while (it.hasNext()) {
            System.out.println(it.next());
        }
    }
}
