package org.osjava.scraping.parser;

import com.generationjava.config.Config;
import com.generationjava.scrape.HtmlScraper;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import org.osjava.oscube.container.Result;
import org.osjava.oscube.container.Session;
import org.osjava.oscube.container.SingleResult;
import org.osjava.scraping.AbstractParser;
import org.osjava.scraping.Page;
import org.osjava.scraping.ParsingException;

/* loaded from: input_file:org/osjava/scraping/parser/UrlScraper.class */
public abstract class UrlScraper extends AbstractParser {
    @Override // org.osjava.scraping.AbstractParser, org.osjava.scraping.Parser
    public Result parse(Page page, Config config, Session session) throws ParsingException {
        HtmlScraper htmlScraper = new HtmlScraper();
        try {
            htmlScraper.scrape(page.readAsString());
            String scrapeUrl = scrapeUrl(htmlScraper);
            if (scrapeUrl == null) {
                throw new ParsingException(new StringBuffer().append("Unable to find url for : ").append(config.getContext()).toString());
            }
            try {
                return scrapeUrl.indexOf("://") != -1 ? new SingleResult(new URL(scrapeUrl)) : scrapeUrl.startsWith("/") ? new SingleResult(new URL(new StringBuffer().append(toBase(new URL(page.getDocumentBase()))).append(scrapeUrl).toString())) : new SingleResult(new URL(new StringBuffer().append(page.getDocumentBase()).append("/").append(scrapeUrl).toString()));
            } catch (MalformedURLException e) {
                throw new ParsingException(new StringBuffer().append("Unable to parse url: ").append(scrapeUrl).toString(), e);
            }
        } catch (IOException e2) {
            throw new ParsingException("Unable to read page. ", e2);
        }
    }

    protected abstract String scrapeUrl(HtmlScraper htmlScraper);

    private URL toBase(URL url) throws MalformedURLException {
        return new URL(new StringBuffer().append(url.getProtocol()).append("://").append(url.getHost()).append(url.getPort() == -1 ? "" : new StringBuffer().append(":").append(url.getPort()).toString()).append("/").toString());
    }
}
