知识大全 Java获取网络文件并插入数据库

Posted 数据库

篇首语:案头见蠹鱼,犹胜凡俦侣。本文由小常识网(cha138.com)小编为大家整理,主要介绍了知识大全 Java获取网络文件并插入数据库相关的知识,希望对你有一定的参考价值。

Java获取网络文件并插入数据库  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!

  抓取各大网站的数据插入数据库 这样就不用为没有数据而烦恼了

  获取百度的歌曲名 歌手和链接!!

  package webTools;

  import java io BufferedReader;

  import java io IOException;

  import java io InputStreamReader;

  import java io UnsupportedEncodingException;

  import MalformedURLException;

  import URL;

  import java util ArrayList;

  import java util HashMap;

  import java util List;

  import java util regex Matcher;

  import java util regex Pattern;

  import dbTools DBTools;

  public class IOTOWeb

  public String getHtmlContent(String URL)

  URL url = null;

  String rowContent = ;

  StringBuffer Content = new StringBuffer();

  try

  url = new URL(URL);

  BufferedReader in = new BufferedReader(new InputStreamReader(url

   openStream() gb ));

  while ((rowContent = in readLine()) != null)

  Content append(rowContent);

  

  in close();

   catch (MalformedURLException e)

  // TODO Auto generated catch block

  e printStackTrace();

   catch (UnsupportedEncodingException e)

  // TODO Auto generated catch block

  e printStackTrace();

   catch (IOException e)

  // TODO Auto generated catch block

  e printStackTrace();

  

  return Content toString();

  

  public List getLink(String Content)

  ArrayList listLink = new ArrayList();

  String regex = <td[^>]*>[\\\\(]*<a[^>]*href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)[\\\\)]*[\\\\s]*</td> ;

  Pattern pattern = pile(regex Pattern DOTALL);

  Matcher matcher = pattern matcher(Content);

  while (matcher find())

  listLink add(matcher group());

  

  return listLink;

  

  public List<String> getHref(String Content)

  String regex;

  List listtHref = new ArrayList();

  regex = href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))\\ ;

  Pattern pa = pile(regex Pattern DOTALL);

  Matcher ma = pa matcher(Content);

  while (ma find())

  listtHref add(ma group() replaceFirst( href=\\ ) replace( \\

   ));

  

  return listtHref;

  

  public List<String> getPerson(String Content)

  String regex;

  List list = new ArrayList();

  regex = ]*href=(\\"([^\\"]*)\\"|\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)\\\\ >\\\\(<a[^>]*href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)\\\\) ;

  Pattern pa = pile(regex Pattern DOTALL);

  Matcher ma = pa matcher(Content);

  while (ma find())

  list add(ma group() replaceFirst( href=\\ ) replace( \\ ));

  

  return list;

  

  public List<String> getSongName(String Content)

  String regex;

  List listPerson = new ArrayList();

  regex = <a[^>]*href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)</a>\\\\s ;

  Pattern pa = pile(regex Pattern DOTALL);

  Matcher ma = pa matcher(Content);

  while (ma find())

  listPerson add(ma group());

  

  return listPerson;

  

  public String getMainContent(String Content)

  String regex = <table width=\\ %\\ align=\\ center\\ cellpadding=\\ \\ cellspacing=\\ \\ class=\\ list\\ >( *?)</table> ;

  StringBuffer mainContent = new StringBuffer();

  Pattern pattern = pile(regex Pattern DOTALL);

  Matcher matcher = pattern matcher(Content);

  while (matcher find())

  mainContent append(matcher group());

  

  return mainContent toString();

  

  public String outTag(final String s)

  return s replaceAll( < *?> );

  

  DBTools dbTools = new DBTools();

  public void getFromBaiduMap (String URL) throws Throwable

  HashMap ContentMap = new HashMap();

  String Content = getHtmlContent(URL);

  String mainContent = getMainContent(Content);

  List listLink = getLink(mainContent);

  for (int j = ; j < listLink size(); j++)

  String tdTag = listLink get(j) toString();

  List songNameList = getSongName(tdTag);

  String songName = outTag(songNameList get( ) toString());

  List personList = getPerson(tdTag);

  String songPerson = ;

  if (personList size() != )

  for (int n = ; n < personList size(); n++)

  // System out println(personList get(n) toString());

  songPerson = outTag(personList get(n) toString());

  

   else

  songPerson = 无 ;

  

  // System out print(songNameList get( ) toString());

  List hrefList = getHref(songNameList get( ) toString());

  String songHref = hrefList get( ) toString();

  System out println();

  String sql = insert into song(songName songPerson songHref) values(? ? ?) ;

  ArrayList list_values = new ArrayList();

  list_values add(songName);

  list_values add(songPerson);

  list_values add(songHref);

  dbTools update(sql list_values);

  

  

  

  DBTools数据库链接类

  package dbTools;

  import java util ArrayList;

  import java sql *;

  public class DBTools

  private PreparedStatement preparedStatement;

  private ResultSet resultSet;

  private Connection connection;

  public DBTools()

  try

  Class forName( mysql jdbc Driver );

   catch (ClassNotFoundException e)

  // TODO Auto generated catch block

  e printStackTrace();

  

  try

  connection = DriverManager getConnection(

   jdbc:mysql://localhost: /TestURL root zhuyi );

   catch (SQLException e)

  // TODO Auto generated catch block

  e printStackTrace();

  

  

  public ArrayList query(String sql ArrayList list_values) throws Throwable

  ArrayList listRows = new ArrayList();

  preparedStatement = connection prepareStatement(sql);

  for (int i = ; i < list_values size(); i++)

  preparedStatement setObject(i + list_values get(i));

  

  resultSet = preparedStatement executeQuery();

  while (resultSet next())

  String[] rowinfo = new String[resultSet getMetaData()

   getColumnCount()];

  for (int i = ; i < rowinfo length; i++)

  rowinfo[i] = resultSet getString(i + );

  

  listRows add(rowinfo);

  

  return listRows;

  

  public void update(String sql ArrayList list_values) throws Throwable

  preparedStatement = connection prepareStatement(sql);

  for (int i = ; i < list_values size(); i++)

  preparedStatement setObject(i + list_values get(i));

  

  preparedStatement executeUpdate();

  preparedStatement close();

  

  

  Servlet调用

  代码

  package controller;

  import java io IOException;

  import java io PrintWriter;

  import java util List;

  import javax servlet ServletException;

  import javax servlet HttpServlet;

  import javax servlet HttpServletRequest;

  import javax servlet HttpServletResponse;

  import webTools IOTOWeb;

  public class TestURL extends HttpServlet

  /**

  * Constructor of the object

  */

  public TestURL()

  super();

  

  /**

  * Destruction of the servlet <br>

  */

  public void destroy()

  super destroy(); // Just puts destroy string in log

  // Put your code here

  

  /**

  * The doGet method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to get

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  public void doGet(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException

  try

  IOTOWeb iotoWeb = new IOTOWeb();

  iotoWeb getFromBaiduMap ( ?id= ?top );

   catch (Throwable e)

  // TODO Auto generated catch block

  e printStackTrace();

  

  

  /**

  * The doPost method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to

  * post

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  public void doPost(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException

  response setContentType( text/ );

  PrintWriter out = response getWriter();

  out

   println( <!DOCTYPE HTML PUBLIC \\ //W C//DTD HTML Transitional//EN\\ > );

  out println( <HTML> );

  out println(   <HEAD><TITLE>A Servlet</TITLE></HEAD> );

  out println(   <BODY> );

  out print(     This is );

  out print(this getClass());

  out println( using the POST method );

  out println(   </BODY> );

  out println( </HTML> );

  out flush();

  out close();

  

  /**

  * Initialization of the servlet <br>

  *

  * @throws ServletException

  *             if an error occurs

  */

  public void init() throws ServletException

  // Put your code here

  

  

  获取金书网的图书名

  代码

  package webTools;

  import java io BufferedReader;

  import java io InputStreamReader;

  import URL;

  import java util ArrayList;

  import java util List;

  import java util regex Matcher;

  import java util regex Pattern;

  import dbTools DBTools;

  public class GetBook

  public String getHtmlContent(String URL) throws Throwable

  URL url = null;

  String rowContent = ;

  StringBuffer Content = new StringBuffer();

  url = new URL(URL);

  BufferedReader in = new BufferedReader(new InputStreamReader(url

   openStream() gb ));

  while ((rowContent = in readLine()) != null)

  Content append(rowContent);

  

  in close();

  return Content toString();

  

  public String getBookName(String Content)

  String bookName = ;

  String regex = <span class=\\ style \\ >[^>]*</span> ;

  Pattern pattern = pile(regex Pattern DOTALL);

  Matcher matcher = pattern matcher(Content);

  if (matcher find())

  bookName = matcher group();

  

  return bookName;

  

  public String outTag(final String s)

  return s replaceAll( < *?> );

  

  DBTools dbtools = new DBTools();

  public void getFromJINSHU(String URL) throws Throwable

  String Content = getHtmlContent(URL);

  String bookName = outTag(getBookName(Content));

  if (bookName != null && ! equals(bookName))

  System out println(bookName);

  String sql = insert into bookinfo(bookName) values(?) ;

  ArrayList list_values = new ArrayList();

  list_values add(bookName);

  dbtools update(sql list_values);

  

  

  

  调用Servlet

  代码

  package controller;

  import java io IOException;

  import java io PrintWriter;

  import javax servlet ServletException;

  import javax servlet HttpServlet;

  import javax servlet HttpServletRequest;

  import javax servlet HttpServletResponse;

  import webTools GetBook;

  public class TestBook extends HttpServlet

  /**

  * Constructor of the object

  */

  public TestBook()

  super();

  

  /**

  * Destruction of the servlet <br>

  */

  public void destroy()

  super destroy(); // Just puts destroy string in log

  // Put your code here

  

  /**

  * The doGet method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to get

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  int i = ;

  public void doGet(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException

  GetBook bookinfo = new GetBook();

  for (; i < ; i++)

  String bookURL = /booksinfo/ / + i

  + l ;

  try

  bookinfo getFromJINSHU(bookURL);

   catch (Throwable e)

  i++;

  doPost(request response);

  

  

  

  /**

  * The doPost method of the servlet <br>

  *

  * This method is called when a form has its tag value method equals to

  * post

  *

  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred

  */

  public void doPost(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException

  GetBook bookinfo = new GetBook();

  for (; i < ; i++)

  String bookURL = /booksinfo/ / + i

  + l ;

  try

  bookinfo getFromJINSHU(bookURL);

   catch (Throwable e)

  i++;

  doGet(request response);

  

  

  

  /**

  * Initialization of the servlet <br>

  *

  * @throws ServletException

  *             if an error occurs

  */

  public void init() throws ServletException

  // Put your code here

  

  

cha138/Article/program/Java/hx/201311/25707

相关参考

知识大全 Java获取网络主机信息

Java获取网络主机信息  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!  InetAddress类

知识大全 Java对话框获取目录、文件路径

Java对话框获取目录、文件路径  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!  publicFi

知识大全 在JAVA文件中获取该项目的相对路径

在JAVA文件中获取该项目的相对路径  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧! &n

知识大全 Java实现获取指定路径下的指定格式的文件

Java实现获取指定路径下的指定格式的文件  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!  获取指

知识大全 用Java制作网络文件下载系统

用Java制作网络文件下载系统  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!  由于Linux操作

知识大全 Java实现基于http协议的网络文件下载

Java实现基于http协议的网络文件下载  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!  HTT

知识大全 java资源获取分析

  在开发java程序的过程中我们经常要做的一件事就是获取资源那么什么是资源呢?说白了在计算机里那就是一堆数据只是这堆数据对我们的java程序有多种表现形式一般来说有FileURLInputStrea

知识大全 在*.SQL文件里获取并使用变量

在*.SQL文件里获取并使用变量  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!  熟悉Oracle

知识大全 网络数据流的java处理

  郭洪锋()该文章对编写客户服务器应用的java程序员有所帮助可以解决程序在对方出现故障的时候继续稳定运行  前言java程序要处理很多的网络数据网络数据发送和接收以及数据流的处理是java程序要特

知识大全 Java网络数据库编程及其应用

Java网络数据库编程及其应用  以下文字资料是由(全榜网网www.cha138.com)小编为大家搜集整理后发布的内容,让我们赶快一起来看一下吧!  随着电子商务及动态