当前位置: 代码迷 >> JavaScript >> 需要在PhantomJS中打开一个URL数组
  详细解决方案

需要在PhantomJS中打开一个URL数组

热度:26   发布时间:2023-06-05 14:19:26.0

我已经在phantomJs中创建了一个脚本。 它的作用是,它从特定的页面中获取了一些正常工作的元素。

这是代码:

var page = new WebPage(), testindex = 0, loadInProgress = false, fs = require('fs'), i = 0, j = 0, k = 0;

page.onConsoleMessage = function(msg) { console.log(msg); };
page.onLoadStarted    = function() { loadInProgress = true; console.log("load started"); };
page.onLoadFinished   = function() { loadInProgress = false; console.log("load finished"); };

// Sets the User Agent
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.107 Safari/537.36';

// Enable/Disable Javascript
// page.settings.javascriptEnabled = false;

var steps = [

    function() { //Load Page
        page.open("http://www.example.com/mobiles/");
        page.injectJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js");
    },

    function() { //Fetch Products
        page.onCallback = function(result) {
            var fs = require('fs');
            fs.write('product-list.csv', result, 'w+');
        };

        page.evaluate(function() {
            var arr_mainList = new Array();
            var arr_innerList = new Array();

            try {
                for (i = 0; i < (document.getElementsByClassName("item_grid")[0].getElementsByTagName("ul").length); i++) {
                    arr_mainList.push(document.getElementsByClassName("lap_thu_box")[i]);

                    window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].textContent + ", ");
                    //window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href + ", ");

                    var myWindow = window.open(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href);
                    console.log(myWindow.getElementsByClassName("item_desc")[0].textContent);
                    myWindow.close();

                    if (arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href.length > 43) {
                        var innerURL = arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href;
                        console.log(innerURL);
                    }

                    window.callPhantom(arr_mainList[i].getElementsByTagName("img")[0].getAttribute("data-original") + ", ");

                    arr_innerList.push(arr_mainList[i]); 

                    for (j = 0; j < (document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length); j++) {                 
                        if ((j+1) < document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length) {
                            window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + " | ");
                        }
                        else {
                            window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + ", ");
                        }
                    };
                    //window.callPhantom(", ");
                    window.callPhantom(arr_innerList[i].getElementsByClassName("cat_price")[0].textContent.replace(/,/g, ""));
                    window.callPhantom("\n");
                };

                loadInProgress = true;
                console.log("Successful.");
            }
            catch(ex) {
                console.log("Failed: " + ex);
            }
        });
    }
];

interval = setInterval(function() {
    if (!loadInProgress && typeof steps[testindex] == "function") {
    console.log("step " + (testindex + 1));
    steps[testindex]();
    testindex++;
    }

    if (typeof steps[testindex] != "function") {
        setTimeout(function() {
            //fs.write('product-list.html', page.content, 'w');
            console.log("test complete!");
            phantom.exit();
        }, 100);
    }
}, 5000);

现在,如果我运行该程序,我将在csv文件中获取所有信息。 除非转到window.open,否则phantomJs都会停止。 我知道我无法在page.evaluate内部打开新页面。 但是我需要获取产品描述,并将其添加到csv文件中,以代替产品链接。 我已经搜索了几个小时,任何帮助都会很好。 注意:我的限制是我必须使用phantomJs。

我已经稍微修改了您的脚本。 因此,您现在可以做任何您想做的事。 请记住不要将很多物品报废,否则会遇到内存问题。 因此,如果在使用过的网站中存在分页,请为其使用新功能。 在此代码中,我假设您需要每个设备的描述,但您也可以访问其他元素。

注意:您可能知道跨域策略不允许我们使用javascript / jQuery访问iFrame,这将是一个巨大的缺陷。 您必须添加

--web安全=无

在cmd / terminal中执行脚本时标记。

var page = new WebPage(), innerPage = new WebPage(), testindex = 0, loadInProgress = false, fs = require('fs'), i = 0, j = 0, k = 0;

page.onConsoleMessage = function(msg) { console.log(msg); };
page.onLoadStarted    = function() { loadInProgress = true; console.log("load started"); };
page.onLoadFinished   = function() { loadInProgress = false; console.log("load finished"); };

// Sets the User Agent
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.107 Safari/537.36';

// Enable/Disable Javascript
//page.settings.javascriptEnabled = false;

//IMPORTANT FLAGS
//--web-security=yes/no

var steps = [
  function() { //Load Page
    page.open("http://www.example.com/mobiles-apple/", function() {
        page.injectJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js");

        page.evaluate(function() {
            try {
                $("#main1").append('<div id="inner-data_iframes"></div>');

                for (i = 0; i < (document.getElementsByClassName("item_grid")[0].getElementsByTagName("ul").length); i++) {
                    var iFrameAdd = document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href;
                    $("#inner-data_iframes").append('<iframe id="myIframe' + [i] + '" src="' + iFrameAdd + '"></iframe>');
                    window.document.body.scrollTop = document.body.scrollHeight;
                }
                console.log("Mission Successful.");
            }
            catch(ex) {
                console.log("Failed to add iFrame.");
            }
        });
    });
  },

  function() { //Fetch Products
      page.onCallback = function(result) {
          var fs = require('fs');
          fs.write('product-list.csv', result, 'w+');
    };

    page.evaluate(function() {
        var arr_mainList = new Array();
        var arr_innerList = new Array();

        try {
            for (i = 0; i < (document.getElementsByClassName("item_grid")[0].getElementsByTagName("ul").length); i++) {
                arr_mainList.push(document.getElementsByClassName("lap_thu_box")[i]);

                window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].textContent + ", ");

                var desc = $("#myIframe" + [i]).contents().find(".item_desc").html();
                desc = desc.replace(/,/g, "");
                window.callPhantom(desc + ", ");

                window.callPhantom(arr_mainList[i].getElementsByTagName("img")[0].getAttribute("data-original") + ", ");

                arr_innerList.push(arr_mainList[i]); 

                for (j = 0; j < (document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length); j++) {

                    if ((j+1) < document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length) {
                        window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + " | ");
                    }
                    else {
                        window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + ", ");
                    }
                }

                window.callPhantom(arr_innerList[i].getElementsByClassName("cat_price")[0].textContent.replace(/,/g, ""));
                window.callPhantom("\n");
            }

                loadInProgress = true;
                console.log("Successful.");
            }
            catch(ex) {
                console.log("Failed: " + ex);
            }
        });
    }
];

interval = setInterval(function() {
    if (!loadInProgress && typeof steps[testindex] == "function") {
        console.log("step " + (testindex + 1));
        steps[testindex]();
        testindex++;
    }

    if (typeof steps[testindex] != "function") {
        setTimeout(function(){
            //fs.write('product-list.html', page.content, 'w');
            console.log("test complete!");
            phantom.exit();
        }, 100);
    }
}, 5000);