/*
 * Decompiled with CFR 0.152.
 */
package de.jetwick.snacktory;

import de.jetwick.snacktory.ArticleTextExtractor;
import de.jetwick.snacktory.Converter;
import de.jetwick.snacktory.JResult;
import de.jetwick.snacktory.SCache;
import de.jetwick.snacktory.SHelper;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HtmlFetcher {
    private static final Logger logger;
    private String referrer = "http://jetsli.de/crawler";
    private String userAgent = "Mozilla/5.0 (compatible; Jetslide; +" + this.referrer + ")";
    private String cacheControl = "max-age=0";
    private String language = "en-us";
    private String accept = "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
    private String charset = "UTF-8";
    private SCache cache;
    private AtomicInteger cacheCounter = new AtomicInteger(0);
    private int maxTextLength = -1;
    private ArticleTextExtractor extractor = new ArticleTextExtractor();
    private Set<String> furtherResolveNecessary = new LinkedHashSet<String>(){
        {
            this.add("bit.ly");
            this.add("cli.gs");
            this.add("deck.ly");
            this.add("fb.me");
            this.add("feedproxy.google.com");
            this.add("flic.kr");
            this.add("fur.ly");
            this.add("goo.gl");
            this.add("is.gd");
            this.add("ink.co");
            this.add("j.mp");
            this.add("lnkd.in");
            this.add("on.fb.me");
            this.add("ow.ly");
            this.add("plurl.us");
            this.add("sns.mx");
            this.add("snurl.com");
            this.add("su.pr");
            this.add("t.co");
            this.add("tcrn.ch");
            this.add("tl.gd");
            this.add("tiny.cc");
            this.add("tinyurl.com");
            this.add("tmi.me");
            this.add("tr.im");
            this.add("twurl.nl");
        }
    };

    public static void main(String[] args) throws Exception {
        BufferedReader reader = new BufferedReader(new FileReader("urls.txt"));
        String line = null;
        LinkedHashSet<String> existing = new LinkedHashSet<String>();
        while ((line = reader.readLine()) != null) {
            int index1 = line.indexOf("\"");
            int index2 = line.indexOf("\"", index1 + 1);
            String url = line.substring(index1 + 1, index2);
            String domainStr = SHelper.extractDomain(url, true);
            String counterStr = "";
            if (existing.contains(domainStr)) {
                counterStr = "2";
            } else {
                existing.add(domainStr);
            }
            String html = new HtmlFetcher().fetchAsString(url, 20000);
            String outFile = domainStr + counterStr + ".html";
            BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
            writer.write(html);
            writer.close();
        }
        reader.close();
    }

    public void setExtractor(ArticleTextExtractor extractor) {
        this.extractor = extractor;
    }

    public ArticleTextExtractor getExtractor() {
        return this.extractor;
    }

    public HtmlFetcher setCache(SCache cache) {
        this.cache = cache;
        return this;
    }

    public SCache getCache() {
        return this.cache;
    }

    public int getCacheCounter() {
        return this.cacheCounter.get();
    }

    public HtmlFetcher clearCacheCounter() {
        this.cacheCounter.set(0);
        return this;
    }

    public HtmlFetcher setMaxTextLength(int maxTextLength) {
        this.maxTextLength = maxTextLength;
        return this;
    }

    public int getMaxTextLength() {
        return this.maxTextLength;
    }

    public void setAccept(String accept) {
        this.accept = accept;
    }

    public void setCharset(String charset) {
        this.charset = charset;
    }

    public void setCacheControl(String cacheControl) {
        this.cacheControl = cacheControl;
    }

    public String getLanguage() {
        return this.language;
    }

    public void setLanguage(String language) {
        this.language = language;
    }

    public String getReferrer() {
        return this.referrer;
    }

    public HtmlFetcher setReferrer(String referrer) {
        this.referrer = referrer;
        return this;
    }

    public String getUserAgent() {
        return this.userAgent;
    }

    public void setUserAgent(String userAgent) {
        this.userAgent = userAgent;
    }

    public String getAccept() {
        return this.accept;
    }

    public String getCacheControl() {
        return this.cacheControl;
    }

    public String getCharset() {
        return this.charset;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
        String lowerUrl;
        JResult res;
        String originalUrl = url;
        String gUrl = SHelper.getUrlFromUglyGoogleRedirect(url = SHelper.removeHashbang(url));
        if (gUrl != null) {
            url = gUrl;
        } else {
            gUrl = SHelper.getUrlFromUglyFacebookRedirect(url);
            if (gUrl != null) {
                url = gUrl;
            }
        }
        if (resolve) {
            res = this.getFromCache(url, originalUrl);
            if (res != null) {
                return res;
            }
            String resUrl = this.getResolvedUrl(url, timeout);
            if (resUrl.isEmpty()) {
                if (logger.isDebugEnabled()) {
                    logger.warn("resolved url is empty. Url is: " + url);
                }
                JResult result = new JResult();
                if (this.cache != null) {
                    this.cache.put(url, result);
                }
                return result.setUrl(url);
            }
            if (resUrl != null && resUrl.trim().length() > url.length()) {
                url = SHelper.useDomainOfFirstArg4Second(url, resUrl);
            }
        }
        if ((res = this.getFromCache(url, originalUrl)) != null) {
            return res;
        }
        JResult result = new JResult();
        result.setUrl(url);
        result.setOriginalUrl(originalUrl);
        result.setDate(SHelper.estimateDate(url));
        if (this.cache != null) {
            this.cache.put(originalUrl, result);
            this.cache.put(url, result);
        }
        if (!(SHelper.isDoc(lowerUrl = url.toLowerCase()) || SHelper.isApp(lowerUrl) || SHelper.isPackage(lowerUrl))) {
            if (SHelper.isVideo(lowerUrl) || SHelper.isAudio(lowerUrl)) {
                result.setVideoUrl(url);
            } else if (SHelper.isImage(lowerUrl)) {
                result.setImageUrl(url);
            } else {
                this.extractor.extractContent(result, this.fetchAsString(url, timeout));
                if (result.getFaviconUrl().isEmpty()) {
                    result.setFaviconUrl(SHelper.getDefaultFavicon(url));
                }
                result.setFaviconUrl(HtmlFetcher.fixUrl(url, result.getFaviconUrl()));
                result.setImageUrl(HtmlFetcher.fixUrl(url, result.getImageUrl()));
                result.setVideoUrl(HtmlFetcher.fixUrl(url, result.getVideoUrl()));
                result.setRssUrl(HtmlFetcher.fixUrl(url, result.getRssUrl()));
            }
        }
        result.setText(this.lessText(result.getText()));
        JResult jResult = result;
        synchronized (jResult) {
            result.notifyAll();
        }
        return result;
    }

    public String lessText(String text) {
        if (text == null) {
            return "";
        }
        if (this.maxTextLength >= 0 && text.length() > this.maxTextLength) {
            return text.substring(0, this.maxTextLength);
        }
        return text;
    }

    private static String fixUrl(String url, String urlOrPath) {
        return SHelper.useDomainOfFirstArg4Second(url, urlOrPath);
    }

    public String fetchAsString(String urlAsString, int timeout) throws MalformedURLException, IOException {
        return this.fetchAsString(urlAsString, timeout, true);
    }

    public String fetchAsString(String urlAsString, int timeout, boolean includeSomeGooseOptions) throws MalformedURLException, IOException {
        HttpURLConnection hConn = this.createUrlConnection(urlAsString, timeout, includeSomeGooseOptions);
        hConn.setInstanceFollowRedirects(true);
        InputStream is = hConn.getInputStream();
        String enc = Converter.extractEncoding(hConn.getContentType());
        String res = this.createConverter(urlAsString).streamToString(is, enc);
        if (logger.isDebugEnabled()) {
            logger.debug(res.length() + " FetchAsString:" + urlAsString);
        }
        return res;
    }

    public Converter createConverter(String url) {
        return new Converter(url);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public String getResolvedUrl(String urlAsString, int timeout) {
        String string;
        int responseCode;
        String newUrl;
        block14: {
            HttpURLConnection hConn;
            block13: {
                newUrl = null;
                responseCode = -1;
                hConn = this.createUrlConnection(urlAsString, timeout, true);
                hConn.setInstanceFollowRedirects(false);
                hConn.setRequestMethod("HEAD");
                hConn.connect();
                responseCode = hConn.getResponseCode();
                hConn.getInputStream().close();
                if (responseCode != 200) break block13;
                String string2 = urlAsString;
                if (logger.isDebugEnabled()) {
                    logger.debug(responseCode + " url:" + urlAsString + " resolved:" + newUrl);
                }
                return string2;
            }
            newUrl = hConn.getHeaderField("Location");
            if (responseCode / 100 != 3 || newUrl == null) break block14;
            newUrl = newUrl.replaceAll(" ", "+");
            if (urlAsString.startsWith("http://bit.ly") || urlAsString.startsWith("http://is.gd")) {
                newUrl = HtmlFetcher.encodeUriFromHeader(newUrl);
            }
            if (this.furtherResolveNecessary.contains(SHelper.extractDomain(newUrl, true))) {
                newUrl = this.getResolvedUrl(newUrl, timeout);
            }
            String string3 = newUrl;
            if (logger.isDebugEnabled()) {
                logger.debug(responseCode + " url:" + urlAsString + " resolved:" + newUrl);
            }
            return string3;
        }
        try {
            string = urlAsString;
        }
        catch (Exception ex) {
            String string4;
            try {
                logger.error("getResolvedUrl:" + urlAsString + " Error:" + ex.getMessage());
                string4 = "";
            }
            catch (Throwable throwable) {
                if (logger.isDebugEnabled()) {
                    logger.debug(responseCode + " url:" + urlAsString + " resolved:" + newUrl);
                }
                throw throwable;
            }
            if (logger.isDebugEnabled()) {
                logger.debug(responseCode + " url:" + urlAsString + " resolved:" + newUrl);
            }
            return string4;
        }
        if (logger.isDebugEnabled()) {
            logger.debug(responseCode + " url:" + urlAsString + " resolved:" + newUrl);
        }
        return string;
    }

    static String encodeUriFromHeader(String badLocation) {
        StringBuilder sb = new StringBuilder();
        for (char ch : badLocation.toCharArray()) {
            if (ch < '\u0080') {
                sb.append(ch);
                continue;
            }
            sb.append(String.format("%%%02X", ch));
        }
        return sb.toString();
    }

    private HttpURLConnection createUrlConnection(String urlAsStr, int timeout, boolean includeSomeGooseOptions) throws MalformedURLException, IOException {
        URL url = new URL(urlAsStr);
        HttpURLConnection hConn = (HttpURLConnection)url.openConnection(Proxy.NO_PROXY);
        hConn.setRequestProperty("User-Agent", this.userAgent);
        hConn.setRequestProperty("Accept", this.accept);
        if (includeSomeGooseOptions) {
            hConn.setRequestProperty("Accept-Language", this.language);
            hConn.setRequestProperty("content-charset", this.charset);
            hConn.addRequestProperty("Referer", this.referrer);
            hConn.setRequestProperty("Cache-Control", this.cacheControl);
        }
        hConn.setConnectTimeout(timeout);
        hConn.setReadTimeout(timeout);
        return hConn;
    }

    private JResult getFromCache(String url, String originalUrl) throws Exception {
        JResult res;
        if (this.cache != null && (res = this.cache.get(url)) != null) {
            res.setUrl(url);
            res.setOriginalUrl(originalUrl);
            this.cacheCounter.addAndGet(1);
            return res;
        }
        return null;
    }

    static {
        SHelper.enableCookieMgmt();
        SHelper.enableUserAgentOverwrite();
        SHelper.enableAnySSL();
        logger = LoggerFactory.getLogger(HtmlFetcher.class);
    }
}

