package de.datexis.retrieval.preprocess;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import org.apache.commons.lang.StringEscapeUtils;
import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/datexis/retrieval/preprocess/WikipediaUrlPreprocessor.class */
public class WikipediaUrlPreprocessor implements TokenPreProcess {
    protected final Logger log = LoggerFactory.getLogger(getClass());

    public String preProcess(String str) {
        return cleanWikiPageTitle(str);
    }

    public static String cleanWikiPageTitle(String str) {
        String replaceAll = str.replaceFirst("^.+\\/wiki\\/", "").replaceFirst("#.+$", "").replaceAll("%(?![0-9A-F][0-9A-F])", "%25");
        try {
            replaceAll = URLDecoder.decode(replaceAll, "UTF-8");
        } catch (UnsupportedEncodingException | IllegalArgumentException e) {
        }
        return StringEscapeUtils.unescapeHtml(replaceAll).replace(" ", "_").trim();
    }
}
