package com.digitalpebble.stormcrawler.parse.filter;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.parse.ParseFilter;
import com.digitalpebble.stormcrawler.parse.ParseResult;
import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
import org.w3c.dom.DocumentFragment;

/* loaded from: input_file:com/digitalpebble/stormcrawler/parse/filter/MimeTypeNormalization.class */
public class MimeTypeNormalization extends ParseFilter {
    @Override // com.digitalpebble.stormcrawler.parse.ParseFilter
    public void filter(String str, byte[] bArr, DocumentFragment documentFragment, ParseResult parseResult) {
        Metadata metadata = parseResult.get(str).getMetadata();
        String firstValue = metadata.getFirstValue("parse.Content-Type");
        metadata.setValue("format", StringUtils.isBlank(firstValue) ? "unknown" : firstValue.toLowerCase().contains("html") ? "html" : firstValue.toLowerCase().contains("pdf") ? "pdf" : firstValue.toLowerCase().contains("word") ? "word" : firstValue.toLowerCase().contains("excel") ? "excel" : firstValue.toLowerCase().contains("powerpoint") ? "powerpoint" : firstValue.toLowerCase().startsWith("video/") ? "video" : firstValue.toLowerCase().startsWith("image/") ? "image" : firstValue.toLowerCase().startsWith("audio/") ? "audio" : "other");
    }
}
