知识大全 Java获取网络文件并插入数据库

  抓取各大网站的数据插入数据库 这样就不用为没有数据而烦恼了

  获取百度的歌曲名 歌手和链接!!

  package webTools;

  import java io BufferedReader;

  import java io IOException;

  import java io InputStreamReader;

  import java io UnsupportedEncodingException;

  import MalformedURLException;

  import URL;

  import java util ArrayList;

  import java util HashMap;

  import java util List;

  import java util regex Matcher;

  import java util regex Pattern;

  import dbTools DBTools;

  public class IOTOWeb

  public String getHtmlContent(String URL)

  URL url = null;

  String rowContent = ;

  StringBuffer Content = new StringBuffer();


  url = new URL(URL);

  BufferedReader in = new BufferedReader(new InputStreamReader(url

   openStream() gb ));

  while ((rowContent = in readLine()) != null)

  Content append(rowContent);


  in close();

   catch (MalformedURLException e)

  // TODO Auto generated catch block

  e printStackTrace();

   catch (UnsupportedEncodingException e)

  // TODO Auto generated catch block

  e printStackTrace();

   catch (IOException e)

  // TODO Auto generated catch block

  e printStackTrace();


  return Content toString();


  public List getLink(String Content)

  ArrayList listLink = new ArrayList();

  String regex = <td[^>]*>[\\\\(]*<a[^>]*href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)[\\\\)]*[\\\\s]*</td> ;

  Pattern pattern = pile(regex Pattern DOTALL);

  Matcher matcher = pattern matcher(Content);

  while (matcher find())

  listLink add(matcher group());


  return listLink;


  public List<String> getHref(String Content)

  String regex;

  List listtHref = new ArrayList();

  regex = href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))\\ ;

  Pattern pa = pile(regex Pattern DOTALL);

  Matcher ma = pa matcher(Content);

  while (ma find())

  listtHref add(ma group() replaceFirst( href=\\ ) replace( \\



  return listtHref;


  public List<String> getPerson(String Content)

  String regex;

  List list = new ArrayList();

  regex = ]*href=(\\"([^\\"]*)\\"|\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)\\\\ >\\\\(<a[^>]*href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)\\\\) ;

  Pattern pa = pile(regex Pattern DOTALL);

  Matcher ma = pa matcher(Content);

  while (ma find())

  list add(ma group() replaceFirst( href=\\ ) replace( \\ ));


  return list;


  public List<String> getSongName(String Content)

  String regex;

  List listPerson = new ArrayList();

  regex = <a[^>]*href=(\\ ([^\\ ]*)\\ |\\ ([^\\ ]*)\\ |([^\\\\s>]*))[^>]*>( *?)</a>\\\\s ;

  Pattern pa = pile(regex Pattern DOTALL);

  Matcher ma = pa matcher(Content);

  while (ma find())

  listPerson add(ma group());


  return listPerson;


  public String getMainContent(String Content)

  String regex = <table width=\\ %\\ align=\\ center\\ cellpadding=\\ \\ cellspacing=\\ \\ class=\\ list\\ >( *?)</table> ;

  StringBuffer mainContent = new StringBuffer();

  Pattern pattern = pile(regex Pattern DOTALL);

  Matcher matcher = pattern matcher(Content);

  while (matcher find())

  mainContent append(matcher group());


  return mainContent toString();


  public String outTag(final String s)

  return s replaceAll( < *?> );


  DBTools dbTools = new DBTools();

  public void getFromBaiduMap (String URL) throws Throwable

  HashMap ContentMap = new HashMap();

  String Content = getHtmlContent(URL);

  String mainContent = getMainContent(Content);

  List listLink = getLink(mainContent);

  for (int j = ; j < listLink size(); j++)

  String tdTag = listLink get(j) toString();

  List songNameList = getSongName(tdTag);

  String songName = outTag(songNameList get( ) toString());

  List personList = getPerson(tdTag);

  String songPerson = ;

  if (personList size() != )

  for (int n = ; n < personList size(); n++)

  // System out println(personList get(n) toString());

  songPerson = outTag(personList get(n) toString());



  songPerson = 无 ;


  // System out print(songNameList get( ) toString());

  List hrefList = getHref(songNameList get( ) toString());

  String songHref = hrefList get( ) toString();

  System out println();

  String sql = insert into song(songName songPerson songHref) values(? ? ?) ;

  ArrayList list_values = new ArrayList();

  list_values add(songName);

  list_values add(songPerson);

  list_values add(songHref);

  dbTools update(sql list_values);





  package dbTools;

  import java util ArrayList;

  import java sql *;

  public class DBTools

  private PreparedStatement preparedStatement;

  private ResultSet resultSet;

  private Connection connection;

  public DBTools()


  Class forName( mysql jdbc Driver );

   catch (ClassNotFoundException e)

  // TODO Auto generated catch block

  e printStackTrace();



  connection = DriverManager getConnection(

   jdbc:mysql://localhost: /TestURL root zhuyi );

   catch (SQLException e)

  // TODO Auto generated catch block

  e printStackTrace();



  public ArrayList query(String sql ArrayList list_values) throws Throwable

  ArrayList listRows = new ArrayList();

  preparedStatement = connection prepareStatement(sql);

  for (int i = ; i < list_values size(); i++)

  preparedStatement setObject(i + list_values get(i));


  resultSet = preparedStatement executeQuery();

  while (resultSet next())

  String[] rowinfo = new String[resultSet getMetaData()


  for (int i = ; i < rowinfo length; i++)

  rowinfo[i] = resultSet getString(i + );


  listRows add(rowinfo);


  return listRows;


  public void update(String sql ArrayList list_values) throws Throwable

  preparedStatement = connection prepareStatement(sql);

  for (int i = ; i < list_values size(); i++)

  preparedStatement setObject(i + list_values get(i));


  preparedStatement executeUpdate();

  preparedStatement close();





  package controller;

  import java io IOException;

  import java io PrintWriter;

  import java util List;

  import javax servlet ServletException;

  import javax servlet HttpServlet;

  import javax servlet HttpServletRequest;

  import javax servlet HttpServletResponse;

  import webTools IOTOWeb;

  public class TestURL extends HttpServlet


  * Constructor of the object


  public TestURL()




  * Destruction of the servlet <br>


  public void destroy()

  super destroy(); // Just puts destroy string in log

  // Put your code here



  * The doGet method of the servlet <br>


  * This method is called when a form has its tag value method equals to get


  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred


  public void doGet(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException


  IOTOWeb iotoWeb = new IOTOWeb();

  iotoWeb getFromBaiduMap ( ?id= ?top );

   catch (Throwable e)

  // TODO Auto generated catch block

  e printStackTrace();




  * The doPost method of the servlet <br>


  * This method is called when a form has its tag value method equals to

  * post


  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred


  public void doPost(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException

  response setContentType( text/ );

  PrintWriter out = response getWriter();


   println( <!DOCTYPE HTML PUBLIC \\ //W C//DTD HTML Transitional//EN\\ > );

  out println( <HTML> );

  out println(   <HEAD><TITLE>A Servlet</TITLE></HEAD> );

  out println(   <BODY> );

  out print(     This is );

  out print(this getClass());

  out println( using the POST method );

  out println(   </BODY> );

  out println( </HTML> );

  out flush();

  out close();



  * Initialization of the servlet <br>


  * @throws ServletException

  *             if an error occurs


  public void init() throws ServletException

  // Put your code here





  package webTools;

  import java io BufferedReader;

  import java io InputStreamReader;

  import URL;

  import java util ArrayList;

  import java util List;

  import java util regex Matcher;

  import java util regex Pattern;

  import dbTools DBTools;

  public class GetBook

  public String getHtmlContent(String URL) throws Throwable

  URL url = null;

  String rowContent = ;

  StringBuffer Content = new StringBuffer();

  url = new URL(URL);

  BufferedReader in = new BufferedReader(new InputStreamReader(url

   openStream() gb ));

  while ((rowContent = in readLine()) != null)

  Content append(rowContent);


  in close();

  return Content toString();


  public String getBookName(String Content)

  String bookName = ;

  String regex = <span class=\\ style \\ >[^>]*</span> ;

  Pattern pattern = pile(regex Pattern DOTALL);

  Matcher matcher = pattern matcher(Content);

  if (matcher find())

  bookName = matcher group();


  return bookName;


  public String outTag(final String s)

  return s replaceAll( < *?> );


  DBTools dbtools = new DBTools();

  public void getFromJINSHU(String URL) throws Throwable

  String Content = getHtmlContent(URL);

  String bookName = outTag(getBookName(Content));

  if (bookName != null && ! equals(bookName))

  System out println(bookName);

  String sql = insert into bookinfo(bookName) values(?) ;

  ArrayList list_values = new ArrayList();

  list_values add(bookName);

  dbtools update(sql list_values);






  package controller;

  import java io IOException;

  import java io PrintWriter;

  import javax servlet ServletException;

  import javax servlet HttpServlet;

  import javax servlet HttpServletRequest;

  import javax servlet HttpServletResponse;

  import webTools GetBook;

  public class TestBook extends HttpServlet


  * Constructor of the object


  public TestBook()




  * Destruction of the servlet <br>


  public void destroy()

  super destroy(); // Just puts destroy string in log

  // Put your code here



  * The doGet method of the servlet <br>


  * This method is called when a form has its tag value method equals to get


  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred


  int i = ;

  public void doGet(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException

  GetBook bookinfo = new GetBook();

  for (; i < ; i++)

  String bookURL = /booksinfo/ / + i

  + l ;


  bookinfo getFromJINSHU(bookURL);

   catch (Throwable e)


  doPost(request response);





  * The doPost method of the servlet <br>


  * This method is called when a form has its tag value method equals to

  * post


  * @param request

  *            the request send by the client to the server

  * @param response

  *            the response send by the server to the client

  * @throws ServletException

  *             if an error occurred

  * @throws IOException

  *             if an error occurred


  public void doPost(HttpServletRequest request HttpServletResponse response)

  throws ServletException IOException

  GetBook bookinfo = new GetBook();

  for (; i < ; i++)

  String bookURL = /booksinfo/ / + i

  + l ;


  bookinfo getFromJINSHU(bookURL);

   catch (Throwable e)


  doGet(request response);





  * Initialization of the servlet <br>


  * @throws ServletException

  *             if an error occurs


  public void init() throws ServletException

  // Put your code here





