JavaWeb 新冠肺炎知识与防护自动问答系统
1、课设说明
1. 问题描述
新冠肺炎给全国人民带来巨大的威胁与灾难。为了更加有效的战胜它,首先需要尽可能了解它的特性与相关的防护知识。本选题的目的是:使用Java语言与技术,设计并实现一个基于因特网平台的“关于新冠肺炎知识与防护自动问答系统”。
2. 系统主要功能
1)知识库扩充或增加更新功能。系统能人工或自动从互联网上爬取(或抽取或获取)与“新冠肺炎”相关的知识并整理成系统内部的某种格式(支持全文检索的方式),存储入库。
2)人机问答子系统。提供丰富的(各种友好的界面–如可能的话如:语音界面?卡通拟人化界面?)人机界面,用于进行人机问答。
3)系统分为客户机部分、服务器部分、数据库部分。客户机部分可以是:网页形式、微信小程序形式、微信公众号形式、app形式、PC机上运行的正常程序的形式。服务器部分:用于与客户机进行网络通信与信息传输部分、互联网新冠相关知识爬取子系统部分、知识整理、索引、入库部分。数据库部分:支持全文问答的数据库组织与管理。
4)也可以做一个单独的PC机程序(上述三部分集成在一个程序中),单独安装并运行。
2、系统实现
该问答系统采用Maven技术,MVC三层架构,主要分为:
系统功能框架图如下:
1. 准备工作
1)导入所需的maven依赖
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
| <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> </dependency> <dependency> <groupId>javax.servlet</groupId> <artifactId>javax.servlet-api</artifactId> <version>4.0.1</version> </dependency> <dependency> <groupId>javax.servlet.jsp</groupId> <artifactId>jsp-api</artifactId> <version>2.2</version> </dependency> <dependency> <groupId>javax.servlet.jsp.jstl</groupId> <artifactId>jstl-api</artifactId> <version>1.2</version> </dependency> <dependency> <groupId>taglibs</groupId> <artifactId>standard</artifactId> <version>1.1.2</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>8.0.23</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpcore</artifactId> <version>4.4.14</version> </dependency>
<dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.13</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.13.1</version> </dependency>
<dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.12.0</version> </dependency>
<dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <version>1.18.18</version> <scope>provided</scope> </dependency>
<dependency> <groupId>com.hankcs</groupId> <artifactId>hanlp</artifactId> <version>portable-1.8.0</version> </dependency>
<dependency> <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> <version>3.2.2</version> </dependency>
</dependencies>
|
2)编写连接数据库的配置文件,命名为db.properties
1 2 3 4
| driver=com.mysql.cj.jdbc.Driver url=jdbc:mysql://localhost:3306/crawler?serverTimezone=UTC username=root password=333
|
3)编写操作数据库的公共类BaseDao
在类加载时读取db.properties配置文件,完成初始化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
| package com.zqy.dao;
import java.io.IOException; import java.io.InputStream; import java.sql.*; import java.util.Properties;
public class BaseDao { private static String driver; private static String url; private static String username; private static String password;
static{ Properties prop = new Properties(); InputStream is = BaseDao.class.getClassLoader().getResourceAsStream("db.properties"); try { prop.load(is); } catch (IOException e) { e.printStackTrace(); } driver = prop.getProperty("driver"); url = prop.getProperty("url"); username = prop.getProperty("username"); password = prop.getProperty("password"); } public static Connection getConn(){ Connection conn = null; try { Class.forName(driver); conn = DriverManager.getConnection(url, username, password); } catch (Exception e) { e.printStackTrace(); } return conn; } public static ResultSet execute(Connection conn, PreparedStatement ps, ResultSet rs, String sql, Object[] params) throws SQLException { ps = conn.prepareStatement(sql); for (int i = 0; i < params.length; i++) { ps.setObject(i+1,params[i]); } rs = ps.executeQuery(); return rs; } public static int execute(Connection conn,PreparedStatement ps,String sql,Object[] params) throws SQLException { ps = conn.prepareStatement(sql); for (int i = 0; i < params.length; i++) { ps.setObject(i+1,params[i]); } int updateRows = ps.executeUpdate(); return updateRows; }
public static boolean closeResource(Connection conn,PreparedStatement ps,ResultSet rs){ boolean flag = true; if(rs != null){ try { rs.close(); rs = null; } catch (SQLException e) { e.printStackTrace(); flag = false; } } if(ps != null){ try { ps.close(); ps = null; } catch (SQLException e) { e.printStackTrace(); flag = false; } } if(conn != null){ try { conn.close(); conn = null; } catch (SQLException e) { e.printStackTrace(); flag = false; } } return flag; } }
|
4)表设计
- 用户表(user)设计
字段名 |
类型 |
长度 |
约束 |
自增 |
备注 |
id |
int |
0 |
Pk |
√ |
主键 |
userName |
varchar |
255 |
Unique |
|
用户名 |
userPassword |
varchar |
255 |
Not null |
|
密码 |
- 新冠问答表(covid)设计
字段名 |
类型 |
长度 |
约束 |
自增 |
备注 |
id |
Int |
0 |
Pk |
√ |
主键 |
question |
Varchar |
255 |
Unique |
|
问题 |
answer |
Text |
0 |
Not null |
|
答案 |
createDate |
datetime |
0 |
|
|
更新日期 |
5) 主类User、QA的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| package com.zqy.pojo;
public class User { private int id; private String userName; private String userPassword;
public int getId() { return id; }
public void setId(int id) { this.id = id; }
public String getUserName() { return userName; }
public void setUserName(String userName) { this.userName = userName; }
public String getUserPassword() { return userPassword; }
public void setUserPassword(String userPassword) { this.userPassword = userPassword; } }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| package com.zqy.pojo;
import java.util.Date;
public class QA { private int id; private String question; private String answer; private Date creationDate;
public int getId() { return id; }
public void setId(int id) { this.id = id; }
public String getQuestion() { return question; }
public void setQuestion(String question) { this.question = question; }
public String getAnswer() { return answer; }
public void setAnswer(String answer) { this.answer = answer; }
public Date getCreationDate() { return creationDate; }
public void setCreationDate(Date creationDate) { this.creationDate = creationDate; } }
|
6)编写CharacterEncodingFilter(字符编码过滤器)类
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| package com.zqy.filter;
import javax.servlet.*; import java.io.IOException;
public class CharacterEncodingFilter implements Filter {
@Override public void init(FilterConfig filterConfig) throws ServletException {
}
@Override public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws IOException, ServletException { servletRequest.setCharacterEncoding("utf-8"); servletResponse.setCharacterEncoding("utf-8"); filterChain.doFilter(servletRequest,servletResponse); }
@Override public void destroy() {
} }
|
web.xml中对应配置
1 2 3 4 5 6 7 8 9
| <filter> <filter-name>CharacterEncodingFilter</filter-name> <filter-class>com.zqy.filter.CharacterEncodingFilter</filter-class> </filter> <filter-mapping> <filter-name>CharacterEncodingFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping>
|
2. 用户模块
业务流程图如下:
采用自底向上实现的方式编写代码
1)UserDao接口的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| package com.zqy.dao.user;
import com.zqy.pojo.User; import java.sql.Connection; import java.sql.SQLException;
public interface UserDao { public boolean register(Connection conn, String userName, String userPassword) throws SQLException; public User login(Connection conn, String userName, String userPassword) throws SQLException; public boolean updatePwd(Connection conn, int id, String userPassword) throws SQLException; }
|
2)接口实现类UserDaoImpl的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
| package com.zqy.dao.user.impl;
import com.zqy.dao.BaseDao; import com.zqy.dao.user.UserDao; import com.zqy.pojo.User; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException;
public class UserDaoImpl implements UserDao {
@Override public boolean register(Connection conn, String userName, String userPassword) throws SQLException { PreparedStatement pstm = null; ResultSet rs = null; boolean flag = false; if(conn != null){ String sql = "insert into user(userName,userPassword) VALUES (?,?)"; Object[] params = {userName,userPassword}; int rows = BaseDao.execute(conn,pstm,sql,params); if(rows == 1){ flag = true; } BaseDao.closeResource(null,pstm,rs); } return flag; }
@Override public User login(Connection conn, String userName, String userPassword) throws SQLException { PreparedStatement pstm = null; ResultSet rs = null; User user = null; if(conn != null){ String sql = "select * from user where userName = ? and userPassword = ?"; Object[] params = {userName,userPassword}; rs = BaseDao.execute(conn,pstm,rs,sql,params); if(rs.next()){ user = new User(); user.setId(rs.getInt("id")); user.setUserName(rs.getString("userName")); user.setUserPassword(rs.getString("userPassword")); } BaseDao.closeResource(null,pstm,rs); } return user; }
@Override public boolean updatePwd(Connection conn, int id, String userPassword) throws SQLException { PreparedStatement pstm = null; ResultSet rs = null; boolean flag = false; if(conn != null){ String sql = "update user set userPassword=? where id=?"; Object[] params = {userPassword,id}; int rows = BaseDao.execute(conn,pstm,sql,params); if(rows == 1){ flag = true; } BaseDao.closeResource(null,pstm,rs); } return flag; } }
|
3)UserService业务层接口的编写
1 2 3 4 5 6 7 8 9 10 11 12
| package com.zqy.service.user;
import com.zqy.pojo.User;
public interface UserService { public User login(String userName, String userPassword); public boolean register(String userName, String userPassword); public boolean updatePwd(int id, String userPassword); }
|
4)接口实现类UserServiceImpl的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
| package com.zqy.service.user.impl;
import com.zqy.dao.BaseDao; import com.zqy.dao.user.UserDao; import com.zqy.dao.user.impl.UserDaoImpl; import com.zqy.pojo.User; import com.zqy.service.user.UserService;
import java.sql.Connection; import java.sql.SQLException;
public class UserServiceImpl implements UserService { private UserDao userDao; public UserServiceImpl() { userDao = new UserDaoImpl(); }
@Override public User login(String userName, String userPassword) { Connection connection = BaseDao.getConn(); User user = null; try { user = userDao.login(connection,userName,userPassword); } catch (SQLException e) { e.printStackTrace(); }finally { BaseDao.closeResource(connection,null,null); } return user; }
@Override public boolean register(String userName, String userPassword) { Connection connection = BaseDao.getConn(); boolean flag = false; try { flag = userDao.register(connection,userName,userPassword); } catch (SQLException e) { e.printStackTrace(); }finally { BaseDao.closeResource(connection,null,null); } return flag; }
@Override public boolean updatePwd(int id, String userPassword) { Connection connection = BaseDao.getConn(); boolean flag = false; try { flag = userDao.updatePwd(connection,id,userPassword); } catch (SQLException e) { e.printStackTrace(); }finally { BaseDao.closeResource(connection,null,null); } return flag; } }
|
5)用户模块Servlet层的编写
1. LoginServlet的编写,负责处理用户的注册、登录
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
| package com.zqy.servlet.user;
import com.zqy.pojo.User; import com.zqy.service.user.UserService; import com.zqy.service.user.impl.UserServiceImpl; import com.zqy.util.Constants;
import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException;
public class LoginServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { UserService userService = new UserServiceImpl();
String op = req.getParameter("operation");
String userName = req.getParameter("userName"); String userPassword = req.getParameter("userPassword");
if("register".equals(op)){ User user = new User(); user.setUserName(userName); user.setUserPassword(userPassword); boolean flag = userService.register(userName,userPassword); if(flag){ req.setAttribute("msg","<font color='green'>注册成功,请输入用户名密码登陆</font>"); }else{ req.setAttribute("msg", "注册失败,请检查!"); } req.getRequestDispatcher("loginAndRegister.jsp").forward(req, resp); }else{ User user = userService.login(userName, userPassword); if(user != null){ req.getSession().setAttribute(Constants.USER_SESSION,user); req.getRequestDispatcher("main.jsp").forward(req, resp); }else{ req.setAttribute("msg", "用户名或密码错误"); req.getRequestDispatcher("loginAndRegister.jsp").forward(req, resp); } } }
@Override protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { doGet(req, resp); } }
|
2. UpdatePwdServlet的编写,负责处理用户修改密码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| package com.zqy.servlet.user;
import com.zqy.pojo.User; import com.zqy.service.user.UserService; import com.zqy.service.user.impl.UserServiceImpl; import com.zqy.util.Constants;
import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException;
public class UpdatePwdServlet extends HttpServlet {
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { UserService userService = new UserServiceImpl();
String oldpass = request.getParameter("oldpass");
User user = (User)request.getSession().getAttribute(Constants.USER_SESSION); if(oldpass.equals(user.getUserPassword())){ String newpass = request.getParameter("newpass"); boolean flag = userService.updatePwd(user.getId(),newpass); if(flag){ user.setUserPassword(newpass); request.getSession().setAttribute(Constants.USER_SESSION, user); request.setAttribute("success", "密码修改成功"); request.getRequestDispatcher("updatePassword.jsp").forward(request, response); }else{ request.setAttribute("error", "密码修改失败"); request.getRequestDispatcher("updatePassword.jsp").forward(request, response); } }else{ request.setAttribute("error", "原始密码不正确"); request.getRequestDispatcher("updatePassword.jsp").forward(request, response); }
}
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request,response); }
}
|
3. 对应在web.xml文件中添加请求映射
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| <servlet> <servlet-name>login</servlet-name> <servlet-class>com.zqy.servlet.user.LoginServlet</servlet-class> </servlet> <servlet-mapping> <servlet-name>login</servlet-name> <url-pattern>/login</url-pattern> </servlet-mapping>
<servlet> <servlet-name>updatePwd</servlet-name> <servlet-class>com.zqy.servlet.user.UpdatePwdServlet</servlet-class> </servlet> <servlet-mapping> <servlet-name>updatePwd</servlet-name> <url-pattern>/updatePwd</url-pattern> </servlet-mapping>
|
6)前端jsp页面代码
1. loginAndRegister.jsp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
| <%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>登录注册界面</title> </head> <style> table{ position:absolute; top:5cm; left:15cm; width:400px; height:250px; border:2px solid #2BA8E8; } .input{ width:260px; height:30px; font-size:20px; } .btn{ cursor:pointer; height:40px; width:70px; font-size:20px; font-weight: bold; } .title{ font-style: italic; } </style> <body bgcolor="#f5f5f5" text="black"> <!-- 表单开始 --> <form name="form1" method="get"> <input type="hidden" name="operation" value="register"/> <table border="1"> <tr> <th colspan="2" style="font-size:30px;font-family: 华文行楷">用户登陆注册</th> </tr> <tr> <th colspan="2" style="color:red" id="msg">${msg}</th> </tr> <tr> <td class="title">USERNAME:</td> <td><input type="text" class="input" name="userName"></td> </tr> <tr> <td class="title">PASSWORD:</td> <td><input type="password" class="input" name="userPassword"></td> </tr> <tr> <td colspan="2" align="center"> <input type="button" value="登陆" class="btn" onclick="return login()"/> <input type="button" value="注册" class="btn" onclick="return register()"/> </td> </tr> <tr> <td colspan="3" align="center">copyRight ©<i> 2020 Zhang QingYang</i></td> </tr> </table> </form> <!-- 表单结束 --> </body> <script> var form1 = document.form1; var msg = document.getElementById("msg"); function login(){ var operation = form1.operation.value; form1.operation.value = "login"; var username = form1.userName.value;
var password = form1.userPassword.value; if(username.length<1){ msg.innerText="请输入用户名"; return false; }else{ msg.innerText=""; } if(password.length<1){ msg.innerText="请输入密码"; return false; }else{ msg.innerText=""; } form1.action="${pageContext.request.contextPath}/login"; form1.submit(); } function register(){ form1.operation.value = "register"; var username = form1.userName.value; var password = form1.userPassword.value; if(username.length<1){ msg.innerText="请输入用户名"; return false; }else if(username.length<3 || username.length>8){ msg.innerText="用户名长度在3~8之间"; return false; }else{ msg.innerText=""; } if(password.length<1){ msg.innerText="请输入密码"; return false; }else if(password.length<6 || password.length>18){ msg.innerText="密码长度在6~18之间"; return false; }else{ msg.innerText=""; } form1.action="${pageContext.request.contextPath}/login"; form1.submit(); } </script> </html>
|
在web.xml中设置成欢迎界面
1 2 3 4
| <welcome-file-list> <welcome-file>loginAndRegister.jsp</welcome-file> </welcome-file-list>
|
在web.xml中配置映射请求
1 2 3 4 5 6 7 8 9
| <servlet> <servlet-name>login</servlet-name> <servlet-class>com.zqy.servlet.user.LoginServlet</servlet-class> </servlet> <servlet-mapping> <servlet-name>login</servlet-name> <url-pattern>/login</url-pattern> </servlet-mapping>
|
2. updatePassword.jsp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
| <%@ page language="java" contentType="text/html; charset=utf-8" pageEncoding="utf-8"%> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>My JSP 'updatePassword.jsp' starting page</title> </head> <style> .input{ height:30px; width:250px; font-size:20px; } table{ position:absolute; left:30%; border:2px solid purple; } .btn{ cursor:pointer; height:40px; width:70px; font-size:20px; font-weight: bold; border:0; background-color: green; color:white; } </style> <body bgcolor="#f5f5f5"> <form onsubmit="return check()" name="form1" method="post"> <table> <tr> <th colspan="3" style="font-size:25;font-style: oblique">用户密码修改</th> </tr> <tr> <th colspan="3" style="color:green">${success}</th> </tr> <tr> <td align="right">原始密码:</td> <td><input type="password" name="oldpass" class="input"></td> <td width="200px" id="e1" style="color:red;font-weight:bold">${error}</td> </tr> <tr> <td align="right">新密码:</td> <td><input type="password" name="newpass" class="input"></td> <td width="200px" id="e2" style="color:red;font-weight:bold"></td> </tr> <tr> <td align="right">重复密码:</td> <td><input type="password" name="repass" class="input" onblur="return reCheck()"></td> <td width="200px" id="e3" style="color:red;font-weight:bold"></td> </tr> <tr> <th colspan="3"> <input type="submit" value="提交" class="btn"/> <input type="button" value="取消" class="btn"/> </th> </tr> </table> </form> </body> <script> function reCheck(){ var form1 = document.form1; var newpass = form1.newpass.value; var repass = form1.repass.value; var e3 = document.getElementById("e3"); if(repass.length<1){ e3.innerText="请输入重复密码"; return false; }else{ if(repass != newpass){ e3.innerText="两次密码不一致"; return false; }else{ e3.innerText=""; return true; } } } function check(){ var form1 = document.form1; var oldpass = form1.oldpass.value; var newpass = form1.newpass.value; var e1 = document.getElementById("e1"); var e2 = document.getElementById("e2"); var e3 = document.getElementById("e3"); if(oldpass.length < 1){ e1.innerText="请输入原始密码"; return false; }else{ e1.innerText=""; } if(newpass.length < 1){ e2.innerText="请输入新密码"; return false; }else{ e2.innerText=""; } var v = reCheck(); if(v == false){ return false; } if(oldpass == newpass){ e2.innerText = "密码未被改变" return false; }else{ e2.innerText=""; } form1.action="/updatePwd"; } </script> </html>
|
在web.xml中配置映射请求
1 2 3 4 5 6 7 8 9
| <servlet> <servlet-name>updatePwd</servlet-name> <servlet-class>com.zqy.servlet.user.UpdatePwdServlet</servlet-class> </servlet> <servlet-mapping> <servlet-name>updatePwd</servlet-name> <url-pattern>/updatePwd</url-pattern> </servlet-mapping>
|
7)运行测试,略
3. 问答模块
1)★ 新冠信息的爬取
这里我尝试过爬取百度知道有关新冠肺炎的知识信息,但由于其反爬机制较为强大并未成功,经过尝试后选择对搜狗问问进行爬取。
1. 分析网页
https://www.sogou.com/sogou?query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E&insite=wenwen.sogou.com&pid=sogou-wsse-a9e18cb5dd9d3ab4&rcer=&page=1&ie=utf8
可以发现page=””表示页数,所以可以通过改变page后的参数来设置爬取的页数。
这些a标签中的href就是每个问答对的链接,通过Jsoup的select(“a[target=’_blank’]”);可以爬去到这些href
获取到上述链接后,对每一个链接进行访问,进入到详细的问答页面,同样F12查看网页源代码,找到问题和答案部分。
同样对他们的标签进行分析,使用Jsoup进行解析,代码见下文。
可以看到网页源代码中的答案有许多无效字符,使用正则表达式进行无效字符的剔除即可。
问题字数较少,所以仅保留中文字符即可,正则表达式:replaceAll(“[ ^\ \u4E00 - \ \u9FA5] “,””)
针对答案的特殊字符,使用正则表达式:replaceAll(“[\ \u25c6~\ \u25c7]|[\ \——]|[\ \▲]|[\ \△]]”,””)
2. 编写HttpUtil工具类,负责获取HTML页面
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
| package com.zqy.util.crawler;
import org.apache.http.HttpHost; import org.apache.http.HttpStatus; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.util.EntityUtils;
import java.io.IOException; import java.util.Random;
public class HttpUtil { private PoolingHttpClientConnectionManager cm;
public HttpUtil() { this.cm = new PoolingHttpClientConnectionManager(); this.cm.setMaxTotal(100);
this.cm.setDefaultMaxPerRoute(10); }
public String doGetHtml(String url){ CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(this.cm).build(); HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/74.0.3729.169 Safari/537.36");
httpGet.setConfig(this.getConfig()); CloseableHttpResponse response = null; try { response = httpClient.execute(httpGet);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK){ if(response.getEntity() != null){ return EntityUtils.toString(response.getEntity(), "utf-8"); } } } catch (IOException e) { e.printStackTrace(); }finally { if(response != null){ try { response.close(); } catch (IOException e) { e.printStackTrace(); } } } return ""; }
private RequestConfig getConfig() { RequestConfig config = RequestConfig.custom() .setConnectTimeout(20000) .setConnectionRequestTimeout(20000) .setSocketTimeout(10000) .build(); return config; } }
|
3. 编写Spider类,负责解析HTML页面,返回爬取结果
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
| package com.zqy.util.crawler;
import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.ArrayList;
public class Spider { private ArrayList<String> hrefList = new ArrayList(); private ArrayList<ArrayList<String>> QAs = new ArrayList<>();
public void getHref(int page){ HttpUtil httpUtil = new HttpUtil(); String url = "https://www.sogou.com/sogou?query=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E&insite=wenwen.sogou.com&pid=sogou-wsse-a9e18cb5dd9d3ab4&rcer=&page="+page+"&ie=utf8"; String html = httpUtil.doGetHtml(url); Document document= Jsoup.parse(html); Elements postItems = document.getElementsByClass("fb"); for(Element postItem : postItems){ Elements link=postItem.select("a[target='_blank']"); hrefList.add(link.attr("href")); } } public ArrayList<String> getQuestionAndAnswer(String url){ ArrayList<String> temp = new ArrayList<>();
HttpUtil httpUtil = new HttpUtil(); String html = httpUtil.doGetHtml(url);
Document document = Jsoup.parse(html);
Element title = document.getElementById("question_title"); if(title != null){ String question = title.text().replaceAll("[^\\u4E00-\\u9FA5]",""); temp.add(question); }
Elements preTag = document.getElementsByTag("pre"); if(preTag != null){ String answer = preTag.get(0).text().replaceAll("[\\u25c6~\\u25c7]|[\\——]|[\\▲]|[\\△]]","").trim(); temp.add(answer); }
return temp; }
public ArrayList<ArrayList<String>> spiderQuestionAndAnswer(int page){ getHref(page); for(String href:hrefList){ QAs.add(getQuestionAndAnswer(href)); } return QAs; }
}
|
4. 设置定时任务,实现定时爬取
我尝试过一次爬取多个页面,当一次爬取5个页面时就会被反爬机制捕获,并将导致短时间内无法再次进行爬取,于是这里采用设置定时任务的方式,每隔固定时间爬取一个页面的数据存入数据库,从而避免了IP的封杀。
- 建立一个TaskListener类,继承ServletContextListener,调用TimerManager类的默认构建方法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| package com.zqy.servlet.time;
import com.zqy.util.crawler.TimerManager;
import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener;
public class TaskListener implements ServletContextListener { public void contextInitialized(ServletContextEvent sce) { new TimerManager(); }
public void contextDestroyed(ServletContextEvent sce) { } }
|
在web.xml中设置任务监听器
1 2 3 4
| <listener> <listener-class>com.zqy.servlet.time.TaskListener</listener-class> </listener>
|
- 这里引入常量类Constants,将频繁更改的固定数值放入其中,方便后续代码的优化。
1 2 3 4 5 6 7 8 9 10 11 12 13
| package com.zqy.util;
public class Constants { public final static String USER_SESSION = "userSession"; public static int page = 13; public final static long PERIOD_DAY = 24 * 60 * 60 * 1000; public final static int UPDATE_HOUR = 13; public final static int UPDATE_MINUTE = 0; public final static int UPDATE_SECOND = 0; public final static int bestAnsNum = 3; }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
| package com.zqy.util.crawler;
import com.zqy.util.Constants;
import java.util.Calendar; import java.util.Date; import java.util.Timer;
public class TimerManager { public TimerManager() { Calendar calendar = Calendar.getInstance();
calendar.set(Calendar.HOUR_OF_DAY, Constants.UPDATE_HOUR); calendar.set(Calendar.MINUTE, Constants.UPDATE_MINUTE); calendar.set(Calendar.SECOND, Constants.UPDATE_SECOND);
Date date=calendar.getTime(); System.out.println("第一次执行定时任务的时间:"+date); if (date.before(new Date())) { date = this.addDay(date, 1); System.out.println(date); }
Timer timer = new Timer();
Task task = new Task(); timer.schedule(task,date,Constants.PERIOD_DAY); }
public Date addDay(Date date, int num) { Calendar startDT = Calendar.getInstance(); startDT.setTime(date); startDT.add(Calendar.DAY_OF_MONTH, num); return startDT.getTime(); } }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| package com.zqy.util.crawler;
import com.zqy.service.qa.QAService; import com.zqy.service.qa.impl.QAServiceImpl; import com.zqy.util.Constants; import org.junit.Test; import java.util.ArrayList; import java.util.TimerTask;
public class Task extends TimerTask { @Override @Test public void run() { System.out.println("当前爬取页号为:"+ Constants.page); Spider spider = new Spider(); ArrayList<ArrayList<String>> qaArray = spider.spiderQuestionAndAnswer(Constants.page); Constants.page += 1; QAService qaService = new QAServiceImpl(); qaService.updateQA(qaArray); } }
|
5. 运行测试,略
2)QADao接口的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| package com.zqy.dao.qa;
import com.zqy.pojo.QA; import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.List;
public interface QADao { public boolean updateQA(Connection conn, String question, String answer) throws SQLException; public ArrayList<String> getBestAns(Connection conn, String question) throws SQLException; public String getAns(Connection conn, int id) throws SQLException; public int getTotalPage(Connection connection, int pageSize) throws SQLException; List<QA> findAllQA(Connection connection, int pageSize, int currentPage) throws SQLException; }
|
3)接口实现类QADaoImpl的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
| package com.zqy.dao.qa.impl;
import com.zqy.dao.BaseDao; import com.zqy.dao.qa.QADao; import com.zqy.pojo.QA; import com.zqy.pojo.User; import com.zqy.service.qa.QAService; import com.zqy.service.qa.impl.QAServiceImpl; import com.zqy.util.Constants; import com.zqy.util.textSimilarity.CosineSimilarity; import org.junit.Test; import java.sql.*; import java.util.*; import java.util.Date;
public class QADaoImpl implements QADao { @Override public boolean updateQA(Connection conn, String question, String answer) throws SQLException { PreparedStatement pstm = null; ResultSet rs = null; boolean flag = false; if(conn != null){ String sql = "insert ignore into covid(question,answer,creationDate) VALUES (?,?,?)"; pstm = conn.prepareStatement(sql); pstm.setString(1, question); pstm.setString(2, answer); pstm.setTimestamp(3, new Timestamp(System.currentTimeMillis())); if(pstm.executeUpdate() == 1){ flag = true; } BaseDao.closeResource(null,pstm,rs); } return flag; }
@Override public ArrayList<String> getBestAns(Connection conn, String question) throws SQLException { PreparedStatement pstm = null; ResultSet rs = null; ArrayList<String> ans = new ArrayList<>(); if(conn != null){ String sql = "select * from covid"; pstm = conn.prepareStatement(sql); rs = pstm.executeQuery(); double maxScore = 0.0; double fullScore = 1.0; long fullScoreToLong = Double.doubleToLongBits(fullScore); int bestAnsNum = Constants.bestAnsNum; List<HashMap<String,Object>> lst = new ArrayList<>(); while (rs.next()){ double similarityScore = CosineSimilarity.getSimilarity(rs.getString("question"),question); HashMap<String,Object> hm = new HashMap<>(); hm.put("question",rs.getString("question")); hm.put("score",similarityScore); hm.put("id",rs.getInt("id")); lst.add(hm); } lst.sort((o1, o2) -> { long score1 = Double.doubleToLongBits((Double) o1.get("score")); long score2 = Double.doubleToLongBits((Double) o2.get("score")); if (score1 > score2) { return -1; } else if (score1 < score2) { return 1; } else { return 0; } }); for (int i = 0; i < bestAnsNum; i++) { StringBuffer sb = new StringBuffer(); sb.append("问题:"+lst.get(i).get("question")+"\n"); sb.append("答案:"+getAns(conn, (Integer) lst.get(i).get("id"))+'\n'); sb.append("答案评分:"+lst.get(i).get("score")+"\n\n"); ans.add(sb.toString()); } BaseDao.closeResource(null,pstm,rs); } return ans; }
@Override public String getAns(Connection conn, int id) throws SQLException { PreparedStatement pstm = null; ResultSet rs = null; String ans = ""; if(conn != null){ String sql = "select answer from covid where id = ?"; Object[] params = {id}; rs = BaseDao.execute(conn,pstm,rs,sql,params); while(rs.next()){ ans = rs.getString("answer"); } BaseDao.closeResource(null,pstm,rs); } return ans; }
@Override public int getTotalPage(Connection conn, int pageSize) throws SQLException{ PreparedStatement pstm = null; ResultSet rs = null; int totalNum = 0; int totalPage = 0; if(conn != null){ String sql = "select count(*) from covid"; pstm = conn.prepareStatement(sql); rs = pstm.executeQuery(); if (rs.next()){ totalNum = rs.getInt(1); } if(totalNum % pageSize == 0){ totalPage = totalNum / pageSize; }else { totalPage = totalNum / pageSize + 1; } BaseDao.closeResource(null,pstm,rs); } return totalPage; }
@Override public List<QA> findAllQA(Connection conn, int pageSize, int currentPage) throws SQLException { PreparedStatement pstm = null; ResultSet rs = null; List<QA> qaList = new ArrayList<>(); if(conn != null){ String sql = "select * from covid limit ?, ?"; Object[] params = {(currentPage-1)*pageSize,pageSize}; rs = BaseDao.execute(conn,pstm,rs,sql,params); while (rs.next()){ QA qa = new QA(); qa.setId(rs.getInt("id")); qa.setQuestion(rs.getString("question")); qa.setAnswer(rs.getString("answer")); qaList.add(qa); } BaseDao.closeResource(null,pstm,rs); } return qaList; } }
|
4)QAService业务层接口的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| package com.zqy.service.qa;
import com.zqy.pojo.QA;
import java.util.ArrayList; import java.util.List;
public interface QAService { public boolean updateQA(ArrayList<ArrayList<String>> qaArray); public ArrayList<String> getBestAns(String question); public int getTotalPage(int pageSize); public List<QA> findAllQA(int pageSize, int currentPage); }
|
5)接口实现类QAServiceImpl的编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
| package com.zqy.service.qa.impl;
import com.zqy.dao.BaseDao; import com.zqy.dao.qa.QADao; import com.zqy.dao.qa.impl.QADaoImpl; import com.zqy.pojo.QA; import com.zqy.service.qa.QAService; import org.junit.Test;
import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.List;
public class QAServiceImpl implements QAService { private QADao qaDao;
public QAServiceImpl() { qaDao = new QADaoImpl(); }
@Override public boolean updateQA(ArrayList<ArrayList<String>> qaArray) { Connection connection = BaseDao.getConn(); boolean flag = false; try { for (int i = 0; i < qaArray.size(); i++) { String question = qaArray.get(i).get(0); String answer = qaArray.get(i).get(1); flag = qaDao.updateQA(connection,question,answer); if(flag){ System.out.println("已完成数据载入:"+(i+1)+" question:"+question); } } } catch (SQLException e) { e.printStackTrace(); }finally { BaseDao.closeResource(connection,null,null); } return flag; }
@Override public ArrayList<String> getBestAns(String question) { Connection connection = BaseDao.getConn(); ArrayList<String> ans = null; try { ans = qaDao.getBestAns(connection,question); } catch (SQLException e) { e.printStackTrace(); }finally { BaseDao.closeResource(connection,null,null); } return ans; }
@Override public int getTotalPage(int pageSize) { Connection connection = BaseDao.getConn(); int totalPage = 0; try { totalPage = qaDao.getTotalPage(connection,pageSize); } catch (SQLException e) { e.printStackTrace(); }finally { BaseDao.closeResource(connection,null,null); } return totalPage; }
@Override public List<QA> findAllQA(int pageSize, int currentPage) { Connection connection = BaseDao.getConn(); List<QA> qaList = new ArrayList<>(); try { qaList = qaDao.findAllQA(connection,pageSize,currentPage); } catch (SQLException e) { e.printStackTrace(); }finally { BaseDao.closeResource(connection,null,null); } return qaList; } }
|
6)问答模块Servlet层的编写
1. SearchServlet的编写,负责处理用户的问题搜索请求(加入了自然语言处理,在数据库中进行问题的相似度匹配)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
| package com.zqy.servlet.qa;
import com.zqy.service.qa.QAService; import com.zqy.service.qa.impl.QAServiceImpl; import com.zqy.service.user.UserService; import com.zqy.service.user.impl.UserServiceImpl; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException; import java.util.ArrayList;
public class SearchServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { QAService qaService = new QAServiceImpl();
String quesion = req.getParameter("question"); req.setAttribute("question",quesion);
ArrayList<String> bestAns = qaService.getBestAns(quesion);
String score = null;
String showAns = ""; StringBuffer sb = new StringBuffer(); for (int i = 0; i < bestAns.size(); i++) { String qas = bestAns.get(i); sb.append(qas); if(i == 0){ score = qas.substring(qas.toString().lastIndexOf(":")+1).trim(); } }
if(score!= null && score.equals("1.0")){ req.setAttribute("flag","已找到精确答案!"); }else { req.setAttribute("flag","已找到近似答案!"); }
showAns = sb.toString(); req.setAttribute("answer",showAns); req.getRequestDispatcher("search.jsp").forward(req, resp); }
@Override protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { doGet(req, resp); } }
|
2. ShowQAServlet的编写,负责向前端展示数据库的部分信息。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| package com.zqy.servlet.qa;
import com.zqy.pojo.QA; import com.zqy.service.qa.QAService; import com.zqy.service.qa.impl.QAServiceImpl; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException; import java.util.List;
public class ShowQAServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { String cp = req.getParameter("currentPage"); int currentPage = Integer.parseInt(cp); if(currentPage<1){ currentPage = 1; }
QAService qaService = new QAServiceImpl(); int totalPage = qaService.getTotalPage(10);
if(currentPage>totalPage){ currentPage = totalPage; }
List<QA> qaList = qaService.findAllQA(10, currentPage);
req.setAttribute("currentPage", currentPage); req.setAttribute("totalPage", totalPage); req.setAttribute("qaList", qaList);
req.getRequestDispatcher("showQA.jsp").forward(req, resp); }
@Override protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { doGet(req, resp); } }
|
3. 对应在web.xml文件中添加请求映射
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| <servlet> <servlet-name>SearchServlet</servlet-name> <servlet-class>com.zqy.servlet.qa.SearchServlet</servlet-class> </servlet> <servlet-mapping> <servlet-name>SearchServlet</servlet-name> <url-pattern>/search</url-pattern> </servlet-mapping>
<servlet> <servlet-name>ShowQAServlet</servlet-name> <servlet-class>com.zqy.servlet.qa.ShowQAServlet</servlet-class> </servlet> <servlet-mapping> <servlet-name>ShowQAServlet</servlet-name> <url-pattern>/show</url-pattern> </servlet-mapping>
|
7)前端jsp页面代码
1. main.jsp
1 2 3 4 5 6 7 8 9 10 11 12 13
| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <title>My JSP 'main.jsp' starting page</title> </head> <frameset rows="25%,*" border="5px" bordercolor="black" frameborder="yes"> <frame src="top.jsp" name="top" noresize scrolling="no"/> <frameset cols="15%,*"> <frame src="left.jsp" name="left" noresize/> <frame src="updatePassword.jsp" name="right"/> </frameset> </frameset> </html>
|
2. left.jsp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| <%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>My JSP 'left.jsp' starting page</title> </head> <style> ul{ list-style-type:none; <!--background-color:#1D9BE7;--> border:2px solid purple; width:120px; height:240px; } ul li{ padding:15px; } a{ text-decoration: none; color:black; } a:HOVER { text-decoration:underline; color:red; } </style> <body bgcolor="#ffdead"> <ul> <li><a href="search.jsp" target="right">问题搜索</a></li> <li><a href="show?currentPage=1" target="right">常见问题</a></li> <li><a href="updatePassword.jsp" target="right">密码修改</a></li> <li><a href="https://coderzqy.github.io/" target="main">友情链接</a></li> <li><a href="javascript:void(0)">帮助</a></li> </ul> </body> </html>
|
3. top.jsp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
| <%@ page language="java" import="java.util.*" pageEncoding="utf-8"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>My JSP 'top.jsp' starting page</title> </head> <style> h1{ font-size:45px; font-family:华文行楷; color:black } #time{ position:absolute; left:75%; } </style> <body bgcolor="#ffdead" onload="getNowTime()"> <div style="text-align: center;"><h1>新冠肺炎知识与防护自动问答系统<br>(COVID-19 Intelligent QA System)</h1></div> <span style="text-indent:3cm">欢迎你,${userSession.userName}</span> <span id="time">当前时间: <span id="nowtime"></span> <a href="javascript:window.location.replace('loginAndRegister.jsp')" target="_top">退出系统</a></span> </body> <script> function getNowTime(){ var showtime = document.getElementById("nowtime"); var date = new Date(); var year = date.getFullYear(); var month = date.getMonth()+1; if(month.toString().length < 2){ month = "0"+month; } var day = date.getDate(); if(day.toString().length < 2){ day = "0"+day; } var hour = date.getHours(); if(hour.toString().length < 2){ hour = "0"+hour; } var minu = date.getMinutes(); if(minu.toString().length < 2){ minu = "0"+minu; } var second = date.getSeconds(); if(second.toString().length <2){ second = "0"+second; } var nowtime = year+"/"+month+"/"+day+" "+hour+":"+minu+":"+second; showtime.innerHTML="<font color='black'>"+nowtime+"</font>"; window.setTimeout("getNowTime()",1000); } </script> </html>
|
4. search.jsp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
| <%@ page language="java" contentType="text/html; charset=utf-8" pageEncoding="utf-8"%> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>My JSP 'updatePassword.jsp' starting page</title> </head> <style> .input{ height:30px; width:600px; font-size:20px; align:"center"; } .btn{ cursor:pointer; height:30px; width:70px; font-size:20px; font-weight: bold; border:0; background-color: green; color:white; } .boxes{ font-size:18px; color:rebeccapurple; } </style>
<body bgcolor="#f5f5f5"> <form onsubmit="return check()" name="form1"> <table> <tr> <th colspan="2" style="font-size:25;font-style: oblique">新冠肺炎知识搜索</th> </tr> <tr> <th colspan="2" style="color:red">${flag}</th> </tr> <tr> <td><input type="text" name="question" class="input" value="${question}"></td> <td><input type="submit" value="搜索" class="btn"/></td> <td width="200px" id="e1" style="color:red;font-weight:bold">${error}</td> </tr> </table> </form> <textarea rows="20" cols="120" readonly="readonly" class="boxes">${answer}</textarea>
</body> <script> function check(){ var form1 = document.form1; var question = form1.question.value;
var e1 = document.getElementById("e1"); if(question.length < 1){ e1.innerText="问题不能为空"; return false; }else{ e1.innerText=""; } form1.action="/search"; } </script> </html>
|
5. showQA.jsp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
| <%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %> <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>My JSP 'showQA.jsp.jsp' starting page</title> </head>
<body bgcolor="#f5f5f5"> <form> <table border="1" cellpadding="0" cellspacing="0" bordercolor="black" width="800px"> <tr> <th colspan="5">新冠知识数据库信息</th> </tr> <tr> <th>序号</th> <th>问题</th> <th>答案</th> </tr> <c:forEach items="${qaList}" var="qa"> <tr onmouseover="this.bgColor='lightblue'" onmouseout="this.bgColor=''" style="cursor:pointer"> <td>${qa.id}</td> <td>${qa.question}</td> <td>${qa.answer}</td> </tr> </c:forEach> <tr> <td align="right" colspan="5"> <a href="show?currentPage=1">首页</a> <a href="show?currentPage=${currentPage-1}">上一页</a> <a href="show?currentPage=${currentPage+1}">下一页</a> <a href="show?currentPage=${totalPage}">尾页</a> </td> </tr> </table> </form> </body> </html>
|
8)自然语言处理(NLP)
采用余弦相似度计算字符串相似度的方式,将用户输入的问题和数据库中已有的问题进行匹配,本系统选择将相似度最高的三个可能答案作为最佳答案返回,输出到前端显示。
1. 理论知识
核心公式:
余弦值越接近1,也就是两个向量越相似,这就叫”余弦相似性”,
余弦值越接近0,也就是两个向量越不相似,也就是这两个字符串越不相似。
2. 计算过程示例
举一个例子来说明,用上述理论计算文本的相似性。为了简单起见,先从句子着手。
句子A:这只皮靴号码大了。那只号码合适。
句子B:这只皮靴号码不小,那只更合适。
怎样计算上面两句话的相似程度?
基本思路是:如果这两句话的用词越相似,它们的内容就应该越相似。因此,可以从词频入手,计算它们的相似程度。
第一步:分词
句子A:这只/皮靴/号码/大了。那只/号码/合适。
句子B:这只/皮靴/号码/不/小,那只/更/合适。
第二步:计算词频(也就是每个词语出现的频率)
句子A:这只1,皮靴1,号码2,大了1。那只1,合适1,不0,小0,更0
句子B:这只1,皮靴1,号码1,大了0。那只1,合适1,不1,小1,更1
第三步:写出词频向量
句子A:(1,1,2,1,1,1,0,0,0)
句子B:(1,1,1,0,1,1,1,1,1)
第四步:计算cos值
计算结果中夹角的余弦值为0.81非常接近于1,所以,上面的句子A和句子B是基本相似的
3. 代码编写
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.12.0</version> </dependency>
<dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <version>1.18.18</version> <scope>provided</scope> </dependency>
<dependency> <groupId>com.hankcs</groupId> <artifactId>hanlp</artifactId> <version>portable-1.8.0</version> </dependency>
<dependency> <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> <version>3.2.2</version> </dependency>
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| package com.zqy.util.textSimilarity;
import com.hankcs.hanlp.HanLP; import com.hankcs.hanlp.seg.common.Term;
import java.util.List; import java.util.stream.Collectors;
public class Tokenizer { public static List<Word> segment(String sentence) { List<Term> termList = HanLP.segment(sentence);
return termList.stream().map(term -> new Word(term.word, term.nature.toString())).collect(Collectors.toList()); } }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
| package com.zqy.util.textSimilarity;
import lombok.Data;
import java.util.Objects;
@Data public class Word implements Comparable {
private String name; private String pos;
private Float weight;
public String getName() { return name; }
public void setName(String name) { this.name = name; }
public String getPos() { return pos; }
public void setPos(String pos) { this.pos = pos; }
public Float getWeight() { return weight; }
public void setWeight(Float weight) { this.weight = weight; }
public Word(String name, String pos) { this.name = name; this.pos = pos; }
@Override public int hashCode() { return Objects.hashCode(this.name); }
@Override public boolean equals(Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final Word other = (Word) obj; return Objects.equals(this.name, other.name); }
@Override public String toString() { StringBuilder str = new StringBuilder(); if (name != null) { str.append(name); } if (pos != null) { str.append("/").append(pos); }
return str.toString(); }
@Override public int compareTo(Object o) { if (this == o) { return 0; } if (this.name == null) { return -1; } if (o == null) { return 1; } if (!(o instanceof Word)) { return 1; } String t = ((Word) o).getName(); if (t == null) { return 1; } return this.name.compareTo(t); } }
|
- 编写相似率具体实现工具类CosineSimilarity
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
| package com.zqy.util.textSimilarity;
import org.apache.commons.lang3.StringUtils; import java.math.BigDecimal; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.collections.CollectionUtils;
public class CosineSimilarity {
public static double getSimilarity(String text1, String text2) {
if (StringUtils.isBlank(text1) && StringUtils.isBlank(text2)) { return 1.0; } if (StringUtils.isBlank(text1) || StringUtils.isBlank(text2)) { return 0.0; } if (text1.equalsIgnoreCase(text2)) { return 1.0; } List<Word> words1 = Tokenizer.segment(text1); List<Word> words2 = Tokenizer.segment(text2);
return getSimilarity(words1, words2); }
public static double getSimilarity(List<Word> words1, List<Word> words2) {
double score = getSimilarityImpl(words1, words2);
score = (int) (score * 1000000 + 0.5) / (double) 1000000;
return score; }
public static double getSimilarityImpl(List<Word> words1, List<Word> words2) {
taggingWeightByFrequency(words1, words2);
Map<String, Float> weightMap1 = getFastSearchMap(words1); Map<String, Float> weightMap2 = getFastSearchMap(words2);
Set<Word> words = new HashSet<>(); words.addAll(words1); words.addAll(words2);
AtomicFloat ab = new AtomicFloat(); AtomicFloat aa = new AtomicFloat(); AtomicFloat bb = new AtomicFloat();
words.parallelStream().forEach(word -> { Float x1 = weightMap1.get(word.getName()); Float x2 = weightMap2.get(word.getName()); if (x1 != null && x2 != null) { float oneOfTheDimension = x1 * x2; ab.addAndGet(oneOfTheDimension); } if (x1 != null) { float oneOfTheDimension = x1 * x1; aa.addAndGet(oneOfTheDimension); } if (x2 != null) { float oneOfTheDimension = x2 * x2; bb.addAndGet(oneOfTheDimension); } }); double aaa = Math.sqrt(aa.doubleValue()); double bbb = Math.sqrt(bb.doubleValue());
BigDecimal aabb = BigDecimal.valueOf(aaa).multiply(BigDecimal.valueOf(bbb));
double cos = BigDecimal.valueOf(ab.get()).divide(aabb, 9, BigDecimal.ROUND_HALF_UP).doubleValue(); return cos; }
protected static void taggingWeightByFrequency(List<Word> words1, List<Word> words2) { if (words1.get(0).getWeight() != null && words2.get(0).getWeight() != null) { return; } Map<String, AtomicInteger> frequency1 = getFrequency(words1); Map<String, AtomicInteger> frequency2 = getFrequency(words2);
words1.parallelStream().forEach(word -> word.setWeight(frequency1.get(word.getName()).floatValue())); words2.parallelStream().forEach(word -> word.setWeight(frequency2.get(word.getName()).floatValue())); }
private static Map<String, AtomicInteger> getFrequency(List<Word> words) {
Map<String, AtomicInteger> freq = new HashMap<>(); words.forEach(i -> freq.computeIfAbsent(i.getName(), k -> new AtomicInteger()).incrementAndGet()); return freq; }
private static String getWordsFrequencyString(Map<String, AtomicInteger> frequency) { StringBuilder str = new StringBuilder(); if (frequency != null && !frequency.isEmpty()) { AtomicInteger integer = new AtomicInteger(); frequency.entrySet().stream().sorted((a, b) -> b.getValue().get() - a.getValue().get()).forEach( i -> str.append("\t").append(integer.incrementAndGet()).append("、").append(i.getKey()).append("=") .append(i.getValue()).append("\n")); } str.setLength(str.length() - 1); return str.toString(); }
protected static Map<String, Float> getFastSearchMap(List<Word> words) { if (CollectionUtils.isEmpty(words)) { return Collections.emptyMap(); } Map<String, Float> weightMap = new ConcurrentHashMap<>(words.size());
words.parallelStream().forEach(i -> { if (i.getWeight() != null) { weightMap.put(i.getName(), i.getWeight()); } else { System.out.println("no word weight info:" + i.getName()); } }); return weightMap; }
}
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
| package com.zqy.util.textSimilarity;
import java.util.concurrent.atomic.AtomicInteger;
public class AtomicFloat extends Number {
private AtomicInteger bits;
public AtomicFloat() { this(0f); }
public AtomicFloat(float initialValue) { bits = new AtomicInteger(Float.floatToIntBits(initialValue)); }
public final float addAndGet(float delta) { float expect; float update; do { expect = get(); update = expect + delta; } while (!this.compareAndSet(expect, update));
return update; }
public final float getAndAdd(float delta) { float expect; float update; do { expect = get(); update = expect + delta; } while (!this.compareAndSet(expect, update));
return expect; }
public final float getAndDecrement() { return getAndAdd(-1); }
public final float decrementAndGet() { return addAndGet(-1); }
public final float getAndIncrement() { return getAndAdd(1); }
public final float incrementAndGet() { return addAndGet(1); }
public final float getAndSet(float newValue) { float expect; do { expect = get(); } while (!this.compareAndSet(expect, newValue));
return expect; }
public final boolean compareAndSet(float expect, float update) { return bits.compareAndSet(Float.floatToIntBits(expect), Float.floatToIntBits(update)); }
public final void set(float newValue) { bits.set(Float.floatToIntBits(newValue)); }
public final float get() { return Float.intBitsToFloat(bits.get()); }
@Override public float floatValue() { return get(); }
@Override public double doubleValue() { return (double) floatValue(); }
@Override public int intValue() { return (int) get(); }
@Override public long longValue() { return (long) get(); }
@Override public String toString() { return Float.toString(get()); } }
|
(1)先分词: 这里通过采用HanLP中文自然语言处理中标准分词进行分词。
(2)统计词频: 就统计上面词出现的次数。
(3)通过每一个词出现的次数,变成一个向量,通过向量公式计算相似率。
9)运行测试,略
3、系统界面