需要使用selenium从多个网页捕捉数据

网页: http : //www.forbes.com/companies/icbc/

package selenium; import java.util.List; import java.util.concurrent.TimeUnit; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.openqa.selenium.By; import org.openqa.selenium.By.ByTagName; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.ie.InternetExplorerDriver; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; public class ForbesTest { WebDriver driver; String url; @Before public void setUp() throws Exception { System.setProperty("webdriver.ie.driver","D:\\IEDriverServer_x64_2.53.1\\IEDriverServer.exe"); driver=new InternetExplorerDriver(); driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); url="http://www.forbes.com/companies/icbc/"; driver.get(url); } @After public void tearDown() throws Exception { driver.quit(); driver.close(); } @Test public void test() throws InterruptedException { Thread.sleep(10000); WebElement tab=driver.findElement(By.className("large")); Thread.sleep(1000); String text= tab.getText(); System.out.println(text); WebElement col1=driver.findElement(By.tagName("dt")); //Thread.sleep(1000); String industry= col1.getText(); if(industry.matches("Industry")){ System.out.println(industry); WebElement col2=driver.findElement(By.tagName("dd")); //Thread.sleep(1000); String industryName= col2.getText(); System.out.println(industryName); } String forbesWebsite= driver.getCurrentUrl(); System.out.println(forbesWebsite); WebElement nextPage=driver.findElement(By.className("next-number")); nextPage.click(); driver.close(); } } 

我想了解排名,公司,国家,销售,销售排名,利润,排名利润,资产,排名资产,市场价值,排名市值,行业,成立,公司网站,员工,总部城市,CEO名称,Forbes.com公司信息页面和年份

要获取行业的文本:

 String industryName= driver.findElement(By.xpath("//*[contains(text(),'Industry')]//following::dd[1]")).getText(); 

获取文字成立时间:

 String Founded= driver.findElement(By.xpath("//*[contains(text(),'Founded')]//following::dd[1]")).getText(); 

所以你只需要用下面的文本replaceString

 xpath = //*[contains(text(),'String')]//following::dd[1]