首页 > 基础资料 博客日记
Java HttpClient爬虫请求
2023-07-24 11:15:41基础资料围观280次
本篇文章分享Java HttpClient爬虫请求,对你有帮助的话记得收藏一下,看Java资料网收获更多编程知识
**本项目采用spring-boot构建, maven工程
添加依赖
pom文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.test</groupId>
<artifactId>testDome</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.5</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
</dependencies>
</project>
GET 无参形式
package testDemo;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class DoGET {
public static void main(String[] args) throws Exception {
// RequestConfig config = RequestConfig.custom().setRedirectsEnabled(false).build();//不允许重定向
// CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(config).build();
// proxyHost -- 代理ip; proxyPort -- 端口号
// int proxyPort = 8000;
// String proxyHost = "192.10.2.125";
// HttpHost proxy = new HttpHost(proxyHost, proxyPort, "HTTP");
//创建Httpclient对象
CloseableHttpClient httpclient = HttpClients.createDefault();
//get请求(忽略SSL证书),获取结果
// TODO: 2020/4/27 忽略SSL证书
//创建http GET请求
HttpGet get = new HttpGet("http://www.baidu.com");
// CloseableHttpResponse response = httpclient.execute(proxy, get);
CloseableHttpResponse response = httpclient.execute(get);
try {
// 执行请求
response = httpclient.execute(get);
// 判断返回状态是否为200
if (response.getStatusLine().getStatusCode() == 200) {
//请求体内容
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
//内容
System.out.println("<<" + content + ">>");
System.out.println("内容长度:" + content.length());
// Header[] cookie = response.getHeaders("Set-Cookie");
}
} finally {
if (response != null) {
response.close();
}
//相当于关闭浏览器
httpclient.close();
}
}
}
GET带参请求
package testDemo;
import java.io.File;
import java.net.URI;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* 带参数的GET请求
* 两种方式:
* 1.直接将参数拼接到url后面 如:?wd=java
* 2.使用URI的方法设置参数 setParameter("wd", "java")
*/
public class DoGETParam {
public static void main(String[] args) throws Exception {
// 创建Httpclient对象
CloseableHttpClient httpclient = HttpClients.createDefault();
// 定义请求的参数
URI uri = new URIBuilder("http://www.baidu.com/s").setParameter("wd", "java").build();
// 创建http GET请求
HttpGet httpGet = new HttpGet(uri);
//response 对象
CloseableHttpResponse response = null;
try {
// 执行http get请求
response = httpclient.execute(httpGet);
// 判断返回状态是否为200
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
//内容
System.out.println("内容长度:" + content.length());
System.out.println("内容<<:" + content);
}
} finally {
if (response != null) {
response.close();
}
httpclient.close();
}
}
}
POST无参请求
package testDemo;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class DoPOST {
public static void main(String[] args) throws Exception {
// 创建Httpclient对象
CloseableHttpClient httpclient = HttpClients.createDefault();
// 创建http POST请求
HttpPost httpPost = new HttpPost("http://www.oschina.net/");
//伪装浏览器请求
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
CloseableHttpResponse response = null;
try {
// 执行请求
response = httpclient.execute(httpPost);
// 判断返回状态是否为200
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
//内容写入文件
System.out.println(">>" + content);
System.out.println("内容长度:" + content.length());
} else {
System.out.println(response.getStatusLine().getStatusCode());
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(">>" + content);
}
} finally {
if (response != null) {
response.close();
}
httpclient.close();
}
}
}
POST带参请求
package testDemo;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
/**
* 带有参数的Post请求
* Mengtao
*/
public class DoPOSTParam {
public static void main(String[] args) throws Exception {
// 创建Httpclient对象
CloseableHttpClient httpclient = HttpClients.createDefault();
// 创建http POST请求
HttpPost httpPost = new HttpPost("http://www.bcia.com.cn/bcia/FAQ/search");
// 设置2个post参数
List<NameValuePair> parameters = new ArrayList<NameValuePair>(0);
parameters.add(new BasicNameValuePair("lang", "cn"));
parameters.add(new BasicNameValuePair("pageNum", "1"));
// 构造一个form表单式的实体
UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(parameters);
// 将请求实体设置到httpPost对象中
httpPost.setEntity(formEntity);
//伪装浏览器
httpPost.setHeader("Referer", "http://www.bcia.com.cn/cjwt.html");
httpPost.setHeader("Host", "www.bcia.com.cn");
httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36");
CloseableHttpResponse response = null;
try {
// 执行请求
response = httpclient.execute(httpPost);
// 判断返回状态是否为200
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println("内容" + content);
System.out.println("内容长度:" + content.length());
} else
System.out.println("内容111" + response.getStatusLine().getStatusCode());
} finally {
if (response != null)
response.close();
}
httpclient.close();
}
}
文章来源:https://blog.csdn.net/qq_41369057/article/details/131222505
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:jacktools123@163.com进行投诉反馈,一经查实,立即删除!
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:jacktools123@163.com进行投诉反馈,一经查实,立即删除!
标签: