package org.galagosearch.core.parse;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;

/* loaded from: input_file:org/galagosearch/core/parse/TrecTextParser.class */
public class TrecTextParser implements DocumentStreamParser {
    BufferedReader reader;

    public TrecTextParser(BufferedReader bufferedReader) throws FileNotFoundException, IOException {
        this.reader = bufferedReader;
    }

    public String waitFor(String str) throws IOException {
        String readLine;
        do {
            readLine = this.reader.readLine();
            if (readLine == null) {
                return null;
            }
        } while (!readLine.startsWith(str));
        return readLine;
    }

    public String parseDocNumber() throws IOException {
        String str;
        String readLine;
        String waitFor = waitFor("<DOCNO>");
        while (true) {
            str = waitFor;
            if (!str.contains("</DOCNO>") && (readLine = this.reader.readLine()) != null) {
                waitFor = String.valueOf(str) + readLine;
            }
        }
        return new String(str.substring(str.indexOf("<DOCNO>") + 7, str.indexOf("</DOCNO>")).trim());
    }

    @Override // org.galagosearch.core.parse.DocumentStreamParser
    public Document nextDocument() throws IOException {
        if (waitFor("<DOC>") == null) {
            return null;
        }
        String parseDocNumber = parseDocNumber();
        StringBuffer stringBuffer = new StringBuffer();
        String[] strArr = {"<TEXT>", "<HEADLINE>", "<TITLE>", "<HL>", "<HEAD>", "<TTL>", "<DD>", "<DATE>", "<LP>", "<LEADPARA>"};
        String[] strArr2 = {"</TEXT>", "</HEADLINE>", "</TITLE>", "</HL>", "</HEAD>", "</TTL>", "</DD>", "</DATE>", "</LP>", "</LEADPARA>"};
        int i = -1;
        while (true) {
            String readLine = this.reader.readLine();
            if (readLine != null && !readLine.startsWith("</DOC>")) {
                if (readLine.startsWith("<")) {
                    if (i >= 0 && readLine.startsWith(strArr2[i])) {
                        i = -1;
                        stringBuffer.append(readLine);
                        stringBuffer.append('\n');
                    } else if (i < 0) {
                        int i2 = 0;
                        while (true) {
                            if (i2 >= strArr.length) {
                                break;
                            }
                            if (readLine.startsWith(strArr[i2])) {
                                i = i2;
                                break;
                            }
                            i2++;
                        }
                    }
                }
                if (i >= 0) {
                    stringBuffer.append(readLine);
                    stringBuffer.append('\n');
                }
            }
        }
        return new Document(parseDocNumber, stringBuffer.toString());
    }
}
