package com.xebia.functional.tokenizer;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Ref;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.SequencesKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* compiled from: GptBytePairEncoding.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��^\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0010\b\n��\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010!\n��\n\u0002\u0010 \n\u0002\b\f\n\u0002\u0010\u0012\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u000b\n\u0002\b\u0005\b��\u0018��2\u00020\u0001B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004J3\u0010\u0010\u001a\u00020\b2\f\u0010\u0011\u001a\b\u0012\u0004\u0012\u00020\b0\u00122\f\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\b0\u00142\b\u0010\u0015\u001a\u0004\u0018\u00010\bH\u0002¢\u0006\u0002\u0010\u0016J#\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\b0\u00142\u0006\u0010\u0018\u001a\u00020\u0007H\u0002ø\u0001\u0001ø\u0001��¢\u0006\u0004\b\u0019\u0010\u001aJ\u0010\u0010\u001b\u001a\u00020\b2\u0006\u0010\u001c\u001a\u00020\nH\u0016J\u0010\u0010\u001d\u001a\u00020\b2\u0006\u0010\u001c\u001a\u00020\nH\u0016J\u0016\u0010\u001e\u001a\u00020\n2\f\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020\b0\u0014H\u0016J\u0016\u0010 \u001a\u00020!2\f\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020\b0\u0014H\u0016J\u0010\u0010\"\u001a\u00020!2\u0006\u0010#\u001a\u00020\bH\u0002J\u0018\u0010$\u001a\b\u0012\u0004\u0012\u00020\b0\u00142\b\u0010\u001c\u001a\u0004\u0018\u00010\nH\u0016J\u001a\u0010$\u001a\u00020%2\b\u0010\u001c\u001a\u0004\u0018\u00010\n2\u0006\u0010\u0015\u001a\u00020\bH\u0016J!\u0010&\u001a\u00020%2\b\u0010\u001c\u001a\u0004\u0018\u00010\n2\b\u0010\u0015\u001a\u0004\u0018\u00010\bH\u0002¢\u0006\u0002\u0010'J\u0016\u0010(\u001a\b\u0012\u0004\u0012\u00020\b0\u00142\u0006\u0010\u001c\u001a\u00020\nH\u0016J\u0018\u0010(\u001a\u00020%2\u0006\u0010\u001c\u001a\u00020\n2\u0006\u0010\u0015\u001a\u00020\bH\u0016J!\u0010)\u001a\u00020%2\b\u0010\u001c\u001a\u0004\u0018\u00010\n2\b\u0010\u0015\u001a\u0004\u0018\u00010\bH\u0002¢\u0006\u0002\u0010'J=\u0010*\u001a\u0004\u0018\u00010\b2\u0006\u0010\u0018\u001a\u00020\u00072\f\u0010+\u001a\b\u0012\u0004\u0012\u00020,0\u00142\u0006\u0010-\u001a\u00020\b2\u0006\u0010.\u001a\u00020\bH\u0002ø\u0001\u0001ø\u0001��¢\u0006\u0004\b/\u00100J\u001f\u00101\u001a\u0002022\b\u00103\u001a\u0004\u0018\u00010\b2\u0006\u00104\u001a\u00020\bH\u0002¢\u0006\u0002\u00105J\u001f\u00106\u001a\u0002022\b\u00103\u001a\u0004\u0018\u00010\b2\u0006\u00104\u001a\u00020\bH\u0002¢\u0006\u0002\u00105R\u001d\u0010\u0005\u001a\u000e\u0012\u0004\u0012\u00020\u0007\u0012\u0004\u0012\u00020\b0\u0006X\u0082\u0004ø\u0001��¢\u0006\u0002\n��R\u0014\u0010\t\u001a\u00020\nX\u0096\u0004¢\u0006\b\n��\u001a\u0004\b\u000b\u0010\fR\u000e\u0010\r\u001a\u00020\u000eX\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010\u000f\u001a\u000e\u0012\u0004\u0012\u00020\n\u0012\u0004\u0012\u00020\b0\u0006X\u0082\u0004¢\u0006\u0002\n��\u0082\u0002\u000b\n\u0002\b\u0019\n\u0005\b¡\u001e0\u0001¨\u00067"}, d2 = {"Lcom/xebia/functional/tokenizer/GptBytePairEncoding;", "Lcom/xebia/functional/tokenizer/Encoding;", "params", "Lcom/xebia/functional/tokenizer/GptBytePairEncodingParams;", "(Lcom/xebia/functional/tokenizer/GptBytePairEncodingParams;)V", "encoder", "Lcom/xebia/functional/tokenizer/TokenEncoder;", "Lcom/xebia/functional/tokenizer/ImmutableByteArray;", "", "name", "", "getName", "()Ljava/lang/String;", "pattern", "Lkotlin/text/Regex;", "specialTokensEncoder", "addTokens", "out", "", "tokensToAdd", "", "maxTokens", "(Ljava/util/List;Ljava/util/List;Ljava/lang/Integer;)I", "bytePairMerge", "piece", "bytePairMerge-ce7gLuk", "([B)Ljava/util/List;", "countTokens", "text", "countTokensOrdinary", "decode", "tokens", "decodeBytes", "", "decodeToken", "token", "encode", "Lcom/xebia/functional/tokenizer/EncodingResult;", "encodeInternal", "(Ljava/lang/String;Ljava/lang/Integer;)Lcom/xebia/functional/tokenizer/EncodingResult;", "encodeOrdinary", "encodeOrdinaryInternal", "getRank", "parts", "Lcom/xebia/functional/tokenizer/PieceIndexToRank;", "startIndex", "skip", "getRank-3mex4yY", "([BLjava/util/List;II)Ljava/lang/Integer;", "maxTokenCountNotReached", "", "maxTokenCount", "tokenCount", "(Ljava/lang/Integer;I)Z", "maxTokenCountReached", "xef-tokenizer"})
@SourceDebugExtension({"SMAP\nGptBytePairEncoding.kt\nKotlin\n*S Kotlin\n*F\n+ 1 GptBytePairEncoding.kt\ncom/xebia/functional/tokenizer/GptBytePairEncoding\n+ 2 _Sequences.kt\nkotlin/sequences/SequencesKt___SequencesKt\n+ 3 ImmutableByteArray.kt\ncom/xebia/functional/tokenizer/ImmutableByteArray\n+ 4 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,256:1\n1313#2,2:257\n9#3:259\n33#3:261\n1#4:260\n*S KotlinDebug\n*F\n+ 1 GptBytePairEncoding.kt\ncom/xebia/functional/tokenizer/GptBytePairEncoding\n*L\n58#1:257,2\n166#1:259\n251#1:261\n*E\n"})
/* loaded from: input_file:com/xebia/functional/tokenizer/GptBytePairEncoding.class */
public final class GptBytePairEncoding implements Encoding {

    @NotNull
    private final String name;

    @NotNull
    private final Regex pattern;

    @NotNull
    private final TokenEncoder<ImmutableByteArray, Integer> encoder;

    @NotNull
    private final TokenEncoder<String, Integer> specialTokensEncoder;

    public GptBytePairEncoding(@NotNull GptBytePairEncodingParams gptBytePairEncodingParams) {
        Intrinsics.checkNotNullParameter(gptBytePairEncodingParams, "params");
        this.name = gptBytePairEncodingParams.getName();
        this.pattern = gptBytePairEncodingParams.getRegex();
        this.encoder = TokenEncoderKt.TokenEncoder(gptBytePairEncodingParams.getEncoder(), new GptBytePairEncoding$encoder$1(ImmutableByteArray.Companion));
        this.specialTokensEncoder = TokenEncoderKt.TokenEncoder(gptBytePairEncodingParams.getSpecialTokensEncoder());
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    @NotNull
    public String getName() {
        return this.name;
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    @NotNull
    public List<Integer> encode(@Nullable String str) {
        return encodeInternal(str, null).getTokens();
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    @NotNull
    public EncodingResult encode(@Nullable String str, int i) {
        return encodeInternal(str, Integer.valueOf(i));
    }

    private final EncodingResult encodeInternal(String str, Integer num) {
        if (str == null) {
            return new EncodingResult(CollectionsKt.emptyList(), false);
        }
        Iterator<String> it = this.specialTokensEncoder.getDecodedTokens().iterator();
        while (it.hasNext()) {
            if (StringsKt.contains$default(str, it.next(), false, 2, (Object) null)) {
                throw new UnsupportedOperationException("Encoding special tokens is not supported yet.");
            }
        }
        return encodeOrdinaryInternal(str, num);
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    @NotNull
    public List<Integer> encodeOrdinary(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "text");
        return encodeOrdinaryInternal(str, null).getTokens();
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    @NotNull
    public EncodingResult encodeOrdinary(@NotNull String str, int i) {
        Intrinsics.checkNotNullParameter(str, "text");
        return encodeOrdinaryInternal(str, Integer.valueOf(i));
    }

    private final EncodingResult encodeOrdinaryInternal(String str, final Integer num) {
        if (str == null) {
            return new EncodingResult(CollectionsKt.emptyList(), false);
        }
        List<Integer> createListBuilder = CollectionsKt.createListBuilder();
        final Ref.IntRef intRef = new Ref.IntRef();
        Iterator it = SequencesKt.takeWhile(Regex.findAll$default(this.pattern, str, 0, 2, (Object) null), new Function1<MatchResult, Boolean>() { // from class: com.xebia.functional.tokenizer.GptBytePairEncoding$encodeOrdinaryInternal$out$1$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull MatchResult matchResult) {
                boolean maxTokenCountNotReached;
                Intrinsics.checkNotNullParameter(matchResult, "it");
                maxTokenCountNotReached = GptBytePairEncoding.this.maxTokenCountNotReached(num, intRef.element);
                return Boolean.valueOf(maxTokenCountNotReached);
            }
        }).iterator();
        while (it.hasNext()) {
            byte[] m21fromliLppeg = ImmutableByteArray.Companion.m21fromliLppeg(StringsKt.encodeToByteArray(((MatchResult) it.next()).getValue()));
            if (this.encoder.containsDecodedToken(ImmutableByteArray.m16boximpl(m21fromliLppeg))) {
                createListBuilder.add(this.encoder.encode(ImmutableByteArray.m16boximpl(m21fromliLppeg)));
                intRef.element++;
            } else {
                intRef.element += addTokens(createListBuilder, m6bytePairMergece7gLuk(m21fromliLppeg), num);
            }
        }
        List build = CollectionsKt.build(createListBuilder);
        if (num != null) {
            int i = 0;
            int size = build.size();
            if (0 <= size) {
                while (true) {
                    List<Integer> subList = build.subList(0, build.size() - i);
                    String decode = decode(subList);
                    if (!StringsKt.startsWith$default(str, decode, false, 2, (Object) null)) {
                        if (i == size) {
                            break;
                        }
                        i++;
                    } else {
                        return new EncodingResult(subList, str.length() > decode.length());
                    }
                }
            }
        }
        return new EncodingResult(build, false);
    }

    private final int addTokens(List<Integer> list, List<Integer> list2, Integer num) {
        if (num == null) {
            list.addAll(list2);
            return list2.size();
        }
        List<Integer> subList = list2.subList(0, Math.min(num.intValue() - list.size(), list2.size()));
        list.addAll(subList);
        return subList.size();
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    public int countTokens(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "text");
        return encode(str).size();
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    public int countTokensOrdinary(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "text");
        return encodeOrdinary(str).size();
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    @NotNull
    public String decode(@NotNull List<Integer> list) {
        Intrinsics.checkNotNullParameter(list, "tokens");
        return StringsKt.decodeToString(decodeBytes(list));
    }

    @Override // com.xebia.functional.tokenizer.Encoding
    @NotNull
    public byte[] decodeBytes(@NotNull List<Integer> list) {
        Intrinsics.checkNotNullParameter(list, "tokens");
        List createListBuilder = CollectionsKt.createListBuilder();
        Iterator<Integer> it = list.iterator();
        while (it.hasNext()) {
            for (byte b : decodeToken(it.next().intValue())) {
                createListBuilder.add(Byte.valueOf(b));
            }
        }
        return CollectionsKt.toByteArray(CollectionsKt.build(createListBuilder));
    }

    /* renamed from: bytePairMerge-ce7gLuk, reason: not valid java name */
    private final List<Integer> m6bytePairMergece7gLuk(byte[] bArr) {
        int length = bArr.length + 1;
        ArrayList arrayList = new ArrayList(length);
        for (int i = 0; i < length; i++) {
            arrayList.add(new PieceIndexToRank(i, Integer.MAX_VALUE));
        }
        ArrayList arrayList2 = arrayList;
        int size = arrayList2.size() - 2;
        for (int i2 = 0; i2 < size; i2++) {
            Integer m7getRank3mex4yY = m7getRank3mex4yY(bArr, arrayList2, i2, 0);
            if (m7getRank3mex4yY != null) {
                arrayList2.get(i2).setRank(m7getRank3mex4yY.intValue());
            }
        }
        while (arrayList2.size() > 1) {
            int i3 = 0;
            int i4 = Integer.MAX_VALUE;
            int size2 = arrayList2.size() - 1;
            for (int i5 = 0; i5 < size2; i5++) {
                int rank = arrayList2.get(i5).getRank();
                if (rank < i4) {
                    i4 = rank;
                    i3 = i5;
                }
            }
            if (i4 == Integer.MAX_VALUE) {
                break;
            }
            PieceIndexToRank pieceIndexToRank = arrayList2.get(i3);
            Integer m7getRank3mex4yY2 = m7getRank3mex4yY(bArr, arrayList2, i3, 1);
            pieceIndexToRank.setRank(m7getRank3mex4yY2 != null ? m7getRank3mex4yY2.intValue() : Integer.MAX_VALUE);
            if (i3 > 0) {
                PieceIndexToRank pieceIndexToRank2 = arrayList2.get(i3 - 1);
                Integer m7getRank3mex4yY3 = m7getRank3mex4yY(bArr, arrayList2, i3 - 1, 1);
                pieceIndexToRank2.setRank(m7getRank3mex4yY3 != null ? m7getRank3mex4yY3.intValue() : Integer.MAX_VALUE);
            }
            arrayList2.remove(i3 + 1);
        }
        List createListBuilder = CollectionsKt.createListBuilder(arrayList2.size());
        int size3 = arrayList2.size() - 1;
        for (int i6 = 0; i6 < size3; i6++) {
            createListBuilder.add(this.encoder.encode(ImmutableByteArray.m16boximpl(ImmutableByteArray.m10getBytesBetweenVUPqfk(bArr, arrayList2.get(i6).getIndex(), arrayList2.get(i6 + 1).getIndex()))));
        }
        return CollectionsKt.build(createListBuilder);
    }

    private final boolean maxTokenCountReached(Integer num, int i) {
        return num != null && Intrinsics.compare(num.intValue(), i) <= 0;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public final boolean maxTokenCountNotReached(Integer num, int i) {
        return !maxTokenCountReached(num, i);
    }

    /* renamed from: getRank-3mex4yY, reason: not valid java name */
    private final Integer m7getRank3mex4yY(byte[] bArr, List<PieceIndexToRank> list, int i, int i2) {
        if (i + i2 + 2 >= list.size()) {
            return null;
        }
        return this.encoder.encodeIfPresent(ImmutableByteArray.m16boximpl(ImmutableByteArray.m10getBytesBetweenVUPqfk(bArr, list.get(i).getIndex(), list.get(i + i2 + 2).getIndex())));
    }

    private final byte[] decodeToken(int i) {
        ImmutableByteArray decodeIfPresent = this.encoder.decodeIfPresent(Integer.valueOf(i));
        byte[] m17unboximpl = decodeIfPresent != null ? decodeIfPresent.m17unboximpl() : null;
        if (m17unboximpl != null) {
            byte[] copyOf = Arrays.copyOf(m17unboximpl, m17unboximpl.length);
            Intrinsics.checkNotNullExpressionValue(copyOf, "copyOf(this, size)");
            if (copyOf != null) {
                return copyOf;
            }
        }
        String decodeIfPresent2 = this.specialTokensEncoder.decodeIfPresent(Integer.valueOf(i));
        if (decodeIfPresent2 != null) {
            return StringsKt.encodeToByteArray(decodeIfPresent2);
        }
        throw new IllegalArgumentException("Unknown token for decoding: " + i);
    }
}
