/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.international.arabic.process.ArabicLexer;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.util.StringUtils;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Properties;

public class ArabicTokenizer<T extends HasWord>
extends AbstractTokenizer<T> {
    private final ArabicLexer lexer;
    private static final Properties atbOptions;

    public static ArabicTokenizer<CoreLabel> newArabicTokenizer(Reader r, Properties lexerProperties) {
        return new ArabicTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), lexerProperties);
    }

    public ArabicTokenizer(Reader r, LexedTokenFactory<T> tf, Properties lexerProperties) {
        this.lexer = new ArabicLexer(r, tf, lexerProperties);
    }

    @Override
    protected T getNext() {
        try {
            HasWord nextToken = null;
            while ((nextToken = (HasWord)this.lexer.next()) != null && nextToken.word().length() == 0) {
            }
            return (T)nextToken;
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    public static TokenizerFactory<CoreLabel> factory() {
        return ArabicTokenizerFactory.newTokenizerFactory();
    }

    public static TokenizerFactory<CoreLabel> atbFactory() {
        TokenizerFactory<CoreLabel> tf = ArabicTokenizerFactory.newTokenizerFactory();
        for (String option : atbOptions.stringPropertyNames()) {
            tf.setOptions(option);
        }
        return tf;
    }

    public static void main(String[] args) {
        Properties tokenizerOptions;
        if (args.length > 0 && args[0].contains("help")) {
            System.err.printf("Usage: java %s [OPTIONS] < file%n", ArabicTokenizer.class.getName());
            System.err.printf("%nOptions:%n", new Object[0]);
            System.err.println("   -help : Print this message. See javadocs for all normalization options.");
            System.err.println("   -atb  : Tokenization for the parsing experiments in Green and Manning (2010)");
            System.exit(-1);
        }
        TokenizerFactory<CoreLabel> tf = (tokenizerOptions = StringUtils.argsToProperties(args)).containsKey("atb") ? ArabicTokenizer.atbFactory() : ArabicTokenizer.factory();
        for (String option : tokenizerOptions.stringPropertyNames()) {
            tf.setOptions(option);
        }
        tf.setOptions("tokenizeNLs");
        int nLines = 0;
        int nTokens = 0;
        String encoding = "UTF-8";
        try {
            Tokenizer<CoreLabel> tokenizer = tf.getTokenizer(new InputStreamReader(System.in, "UTF-8"));
            boolean printSpace = false;
            while (tokenizer.hasNext()) {
                ++nTokens;
                String word = tokenizer.next().word();
                if (word.equals("*NL*")) {
                    ++nLines;
                    printSpace = false;
                    System.out.println();
                    continue;
                }
                if (printSpace) {
                    System.out.print(" ");
                }
                System.out.print(word);
                printSpace = true;
            }
        }
        catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        System.err.printf("Done! Tokenized %d lines (%d tokens)%n", nLines, nTokens);
    }

    static {
        String[] optionToks;
        atbOptions = new Properties();
        String optionsStr = "normArDigits,normArPunc,normAlif,removeDiacritics,removeTatweel,removeQuranChars";
        for (String option : optionToks = optionsStr.split(",")) {
            atbOptions.put(option, "true");
        }
    }

    public static class ArabicTokenizerFactory<T extends HasWord>
    implements TokenizerFactory<T>,
    Serializable {
        private static final long serialVersionUID = 946818805507187330L;
        protected final LexedTokenFactory<T> factory;
        protected Properties lexerProperties = new Properties();

        public static TokenizerFactory<CoreLabel> newTokenizerFactory() {
            return new ArabicTokenizerFactory<CoreLabel>(new CoreLabelTokenFactory());
        }

        private ArabicTokenizerFactory(LexedTokenFactory<T> factory) {
            this.factory = factory;
        }

        @Override
        public Iterator<T> getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r) {
            return new ArabicTokenizer<T>(r, this.factory, this.lexerProperties);
        }

        @Override
        public void setOptions(String options) {
            String[] optionList;
            for (String option : optionList = options.split(",")) {
                this.lexerProperties.put(option, "true");
            }
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r, String extraOptions) {
            this.setOptions(extraOptions);
            return this.getTokenizer(r);
        }
    }
}

