package org.osjava.scraping;

import com.generationjava.config.Config;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.lang.StringUtils;
import org.osjava.norbert.NoRobotClient;
import org.osjava.norbert.NoRobotException;
import org.osjava.oscube.container.Session;

/* loaded from: input_file:org/osjava/scraping/AbstractHttpFetcher.class */
public abstract class AbstractHttpFetcher implements Fetcher {
    private static final String SESSION_CACHE_CODE = "HTTPCLIENT";

    public abstract int getDefaultPort();

    protected abstract void startSession(URL url, int i, HttpClient httpClient, Config config, Session session);

    @Override // org.osjava.scraping.Fetcher
    public Page fetch(String str, Config config, Session session) throws FetchingException {
        PostMethod getMethod;
        List<String> list;
        List<String> list2;
        int indexOf;
        try {
            String str2 = null;
            if (config.has("method") && "POST".equalsIgnoreCase(new StringBuffer().append("").append(config.get("method")).toString()) && (indexOf = str.indexOf("?")) != -1) {
                str2 = str.substring(indexOf + 1);
                str = str.substring(0, indexOf);
            }
            URL url = new URL(str);
            if (!config.has("norobots.override")) {
                String str3 = "osjava-scraping-engine";
                if (config.has("header") && (list2 = config.getList("header")) != null) {
                    for (String str4 : list2) {
                        String substringBefore = StringUtils.substringBefore(str4, "=");
                        String substringAfter = StringUtils.substringAfter(str4, "=");
                        if ("User-Agent".equals(substringBefore)) {
                            str3 = substringAfter;
                        }
                    }
                }
                if (checkIllegal(url, str3)) {
                    throw new FetchingException(new StringBuffer().append("Not allowed to fetch url: ").append(str).append(" due to the NoRobots RFQ. ").toString());
                }
            }
            HttpClient httpClient = (HttpClient) session.get(SESSION_CACHE_CODE);
            if (httpClient == null) {
                httpClient = new HttpClient();
                session.put(SESSION_CACHE_CODE, httpClient);
            }
            if (str2 != null) {
                PostMethod postMethod = new PostMethod(url.getFile());
                String[] split = StringUtils.split(str2, "&");
                for (int i = 0; i < split.length; i++) {
                    String[] split2 = StringUtils.split(split[i], "=");
                    if (split2.length == 2) {
                        postMethod.addParameter(split2[0], split2[1]);
                    } else {
                        System.err.println(new StringBuffer().append("Bad post pair: ").append(split[i]).toString());
                    }
                }
                getMethod = postMethod;
            } else {
                getMethod = new GetMethod(url.getFile());
            }
            if (config.has("header") && (list = config.getList("header")) != null) {
                for (String str5 : list) {
                    getMethod.addRequestHeader(StringUtils.substringBefore(str5, "="), StringUtils.substringAfter(str5, "="));
                }
            }
            int port = url.getPort();
            if (port == -1) {
                port = getDefaultPort();
            }
            startSession(url, port, httpClient, config, session);
            if (config.has("timeout")) {
                httpClient.setTimeout(config.getInt("timeout"));
            }
            int executeMethod = httpClient.executeMethod(getMethod);
            if (executeMethod != 200) {
                throw new FetchingException(new StringBuffer().append("Unable to fetch from ").append(str).append(" due to error code ").append(executeMethod).toString());
            }
            Header responseHeader = getMethod.getResponseHeader("Content-Type");
            String str6 = "unknown";
            if (responseHeader != null) {
                str6 = responseHeader.toExternalForm().toLowerCase();
                if (!str6.startsWith("content-type: text") && !str6.startsWith("content-type: plain")) {
                    throw new FetchingException(new StringBuffer().append("Not going to fetch a non-text file from ").append(str).append(". Type is: ").append(str6).toString());
                }
            }
            String responseBodyAsString = getMethod.getResponseBodyAsString();
            getMethod.releaseConnection();
            MemoryPage memoryPage = new MemoryPage(responseBodyAsString, str6);
            String stringBuffer = new StringBuffer().append(url.getProtocol()).append("://").append(url.getHost()).toString();
            if (url.getPort() != -1) {
                stringBuffer = new StringBuffer().append(stringBuffer).append(":").append(url.getPort()).toString();
            }
            String path = url.getPath();
            int lastIndexOf = path.lastIndexOf("/");
            if (lastIndexOf != -1) {
                stringBuffer = new StringBuffer().append(stringBuffer).append(path.substring(0, lastIndexOf)).toString();
            }
            memoryPage.setDocumentBase(stringBuffer);
            return memoryPage;
        } catch (IOException e) {
            throw new FetchingException(new StringBuffer().append("Error fetching from ").append(str).append(". ").append(e.getMessage()).toString(), e);
        }
    }

    private boolean checkIllegal(URL url, String str) throws MalformedURLException {
        NoRobotClient noRobotClient = new NoRobotClient(str);
        try {
            noRobotClient.parse(toBase(url));
            return !noRobotClient.isUrlAllowed(url);
        } catch (NoRobotException e) {
            return false;
        }
    }

    private URL toBase(URL url) throws MalformedURLException {
        return new URL(new StringBuffer().append(url.getProtocol()).append("://").append(url.getHost()).append(url.getPort() == -1 ? "" : new StringBuffer().append(":").append(url.getPort()).toString()).append("/").toString());
    }
}
