package it.unimi.dsi.law.warc.tool;

import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.law.warc.io.GZWarcRecord;
import it.unimi.dsi.law.warc.io.InspectableBufferedInputStream;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.law.warc.parser.HTMLParser;
import it.unimi.dsi.law.warc.parser.Parser;
import it.unimi.dsi.law.warc.util.WarcHttpResponse;
import it.unimi.dsi.logging.ProgressLogger;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/warc/tool/SummarizeWarc.class */
public class SummarizeWarc {
    private static final Logger LOGGER = LoggerFactory.getLogger(SummarizeWarc.class);
    static final int IO_BUFFER_SIZE = 65536;

    public static void run(FastBufferedInputStream fastBufferedInputStream, boolean z, Object2LongFunction<CharSequence> object2LongFunction, int i, OutputStream outputStream) throws IOException, WarcRecord.FormatException {
        WarcRecord gZWarcRecord = z ? new GZWarcRecord() : new WarcRecord();
        WarcRecord warcRecord = new WarcRecord();
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, "records");
        int[] iArr = new int[object2LongFunction.size()];
        progressLogger.logInterval = 10000L;
        progressLogger.start("Summarizing...");
        while (gZWarcRecord.read(fastBufferedInputStream) != -1) {
            progressLogger.update();
            int i2 = (int) object2LongFunction.getLong(gZWarcRecord.header.subjectUri.getHost());
            if (i2 != -1 && iArr[i2] < i) {
                iArr[i2] = iArr[i2] + 1;
                warcRecord.copy(gZWarcRecord);
                warcRecord.write(outputStream);
            }
        }
        progressLogger.done();
    }

    public static void runWithRedirects(FastBufferedInputStream fastBufferedInputStream, boolean z, Object2LongFunction<CharSequence> object2LongFunction, int i, OutputStream outputStream) throws IOException, WarcRecord.FormatException {
        WarcRecord gZWarcRecord = z ? new GZWarcRecord() : new WarcRecord();
        WarcRecord warcRecord = new WarcRecord();
        HTMLParser hTMLParser = new HTMLParser();
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, "records");
        WarcHttpResponse warcHttpResponse = new WarcHttpResponse();
        InspectableBufferedInputStream inspectableBufferedInputStream = new InspectableBufferedInputStream();
        int[] iArr = new int[object2LongFunction.size()];
        progressLogger.logInterval = 10000L;
        progressLogger.start("Summarizing...");
        while (gZWarcRecord.read(fastBufferedInputStream) != -1) {
            progressLogger.update();
            int i2 = (int) object2LongFunction.getLong(gZWarcRecord.header.subjectUri.getHost());
            if (i2 != -1 && iArr[i2] < i) {
                iArr[i2] = iArr[i2] + 1;
                inspectableBufferedInputStream.connect(gZWarcRecord.block);
                gZWarcRecord.block = inspectableBufferedInputStream;
                inspectableBufferedInputStream.fillAndRewind();
                if (warcHttpResponse.fromWarcRecord(gZWarcRecord) && warcHttpResponse.status() / 100 == 3) {
                    hTMLParser.parse(warcHttpResponse, Parser.NULL_LINK_RECEIVER);
                    LOGGER.debug("Got a redirect to: " + hTMLParser.location());
                }
                inspectableBufferedInputStream.rewind();
                warcRecord.copy(gZWarcRecord);
                warcRecord.write(outputStream);
                inspectableBufferedInputStream.close();
            }
        }
        progressLogger.done();
    }

    public static void main(String[] strArr) throws Exception {
        InputStream fileInputStream;
        SimpleJSAP simpleJSAP = new SimpleJSAP(SummarizeWarc.class.getName(), "Summarizator for warc files.", new Parameter[]{new Switch("gzip", 'z', "gzip", "Tells if the warc is compressed."), new UnflaggedOption("warcFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The Warc input file basename (if - stdin will be used)."), new UnflaggedOption("hostsFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The filename of the signed minimal perfect hash of hosts to put in the summary"), new UnflaggedOption("maxPages", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, true, false, "The maximum number of pages per host to put in the summary.")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            System.exit(1);
        }
        boolean z = parse.getBoolean("gzip");
        String string = parse.getString("warcFile");
        Object2LongFunction object2LongFunction = (Object2LongFunction) BinIO.loadObject(parse.getString("hostsFile"));
        int i = parse.getInt("maxPages");
        if (string.equals("-")) {
            fileInputStream = System.in;
        } else {
            fileInputStream = new FileInputStream(new File(string + ".warc" + (z ? ".gz" : "")));
        }
        FastBufferedInputStream fastBufferedInputStream = new FastBufferedInputStream(fileInputStream, 65536);
        FastBufferedOutputStream fastBufferedOutputStream = new FastBufferedOutputStream(System.out, 65536);
        run(fastBufferedInputStream, z, object2LongFunction, i, fastBufferedOutputStream);
        fastBufferedInputStream.close();
        fastBufferedOutputStream.close();
    }
}
