汇总:如何通过PHP进行数据采集
优采云 发布时间: 2022-12-08 19:51汇总:如何通过PHP进行数据采集
随着信息时代的到来,现在大多数人都从互联网上获取信息。如果您对有用的数据使用传统的复制和粘贴,效率将非常低。如何快速完成批量采集工作?说说PHP采集的一些事吧!
采集 是使用 file_get_contents 函数和常规用法。
先贴一段代码
无标题文档
$val)
{
<p>
//echo "<a href=\"http://nitnews.nyist.net/".$arr[2][$id]."\">".$val."</a><br />";
echo "<a href=\"content.php?url="/spanspan class="token operator"./spanspan class="token string double-quoted-string""http://nitnews.nyist.net/"/spanspan class="token operator"./spanspan class="token variable"$arr/spanspan class="token punctuation"[/spanspan class="token number"2/spanspan class="token punctuation"]/spanspan class="token punctuation"[/spanspan class="token variable"$id/spanspan class="token punctuation"]/spanspan class="token operator"./spanspan class="token string double-quoted-string""\">".$val."</a><br />";
}
?>
[/php]
[php]
无标题文档
;
echo "文章内容是:".GetInfo($con,$content);
?>
</p>
这是我从学校新闻网站上看到的一个 采集 节目。原理很简单。如果需要存入数据库~~~那就简单了,在显示的地方写存入数据库的代码就可以了。~
重点:空格和标点不能少!如果 采集 不存在,请检查您的匹配规则。
附:采集很简单,你只需要了解它的原理就可以了~~以后批量发布信息别怕~
汇总:日志采集系统
Logbook采集系统开发记录
在接到开发任务时,需要开发一个日志系统,主要分析谁在什么时间做了什么操作,业务是否执行成功,请求参数和返回参数是什么,使用了什么方法,这样,后台可以直接定位问题。
需要写一个log-starter来执行日志采集的工作,写一个log-center日志来添加和查询功能。
1.自定义一个注解@LogAnnotation
@Target({ElementType.METHOD}) //注解做用在哪里
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface LogAnnotation {
//需要注解获取的内容
String serverName() default "";
String module() default "";
String description() default "";
boolean recordParam() default true;
}
2.配置日志启动器:
@EnableAsync
@Configuration
@ComponentScan(basePackages = {"com.gw.log.autoconfigure","com.gw.log.feign"})
@ConditionalOnClass(value = {LogAspect.class,RemoteLogService.class})
@ConditionalOnWebApplication
@ConditionalOnProperty(name = "enable",prefix = "com.log",havingValue = "true",matchIfMissing = true)
//EnableFeignClients指向定义FeignClient类
@EnableFeignClients(clients = com.gw.log.feign.RemoteLogService.class)
public class LogAutoConfiguration {
@Bean
public LogAspect logAspect() {
LogAspect logAspect = new LogAspect();
return new LogAspect();
}
}
3.写切面(提前准备好日志实体类Syslog)
@Aspect
@Component
public class LogAspect {
private static final Logger log = LoggerFactory.getLogger(LogAspect.class);
@Value("${spring.application.name}")
private String serverName;
@Resource
RemoteLogService remoteLogService;
@Around(value = "@annotation(com.gw.log.constants.LogAnnotation)")
public Object around(ProceedingJoinPoint joinPoint) throws Throwable {
// 获取当前毫秒
long beginTime = System.currentTimeMillis();
SysLog sysLog = new SysLog();
HttpServletRequest request = ((ServletRequestAttributes) RequestContextHolder.getRequestAttributes()).getRequest();
String authorization = request.getHeader("authorization");
if(authorization!=null){
TokenEntity tokenInfo = AuthUtil.getTokenInfo();
if (tokenInfo != null) {
sysLog.setUserName(tokenInfo.getUserName());
sysLog.setUserCode(tokenInfo.getUserCode());
sysLog.setPlatformCode(tokenInfo.getPlatformId().toString());
}
}
MethodSignature methodSignature = (MethodSignature) joinPoint.getSignature();
LogAnnotation logAnnotation = methodSignature.getMethod().getDeclaredAnnotation(LogAnnotation.class);
//方法名称
String name = methodSignature.getName();
sysLog.setOperationModule(logAnnotation.module());
//描述信息
sysLog.setRemark(logAnnotation.description());
//方法路径
String method_path = request.getAttribute(HandlerMapping.BEST_MATCHING_PATTERN_ATTRIBUTE).toString();
sysLog.setRequestAddress(method_path);
//请求方式
String requestMethod = request.getMethod();
sysLog.setRequestMethod(requestMethod);
//系统名称
sysLog.setSystemName(StringUtils.isNotBlank(sysLog.getSystemName()) ? sysLog.getSystemName() : serverName);
if (logAnnotation.recordParam() || true) {
// 获取参数名称
String[] paramNames = methodSignature.getParameterNames();
// 获取参数值
Object[] params = joinPoint.getArgs();
<p>
// 把参数名称和参数值组装成json格式
JSONObject paramsJson = new JSONObject(paramNames.length);
for (int i = 0; i < paramNames.length; i++) {
paramsJson.put(paramNames[i], params[i]);
}
try {
// 以json的形式记录参数
sysLog.setRequestParameters(JSONObject.toJSONString(paramsJson));
} catch (Exception e) {
log.error("记录参数失败:{}", e.getMessage());
}
}
try {
// 执行时长(毫秒)
Long time = System.currentTimeMillis() - beginTime;
// 执行原方法
Object obj = joinPoint.proceed();
//返回值
String s = JSON.toJSONString(obj);
sysLog.setReturnParameter(s);
sysLog.setStatus(Boolean.TRUE);
// 执行时长(毫秒)
sysLog.setOperationTime(time.toString());
//获取用户ip地址
sysLog.setIpAddress(getIpAddr(((ServletRequestAttributes) RequestContextHolder.getRequestAttributes()).getRequest()));
getMethod(joinPoint, sysLog);
return obj;
} catch (Exception e) {
// 方法执行失败
sysLog.setStatus(Boolean.FALSE);
// 备注记录失败原因
sysLog.setRemark(e.getMessage());
throw e;
} finally {
// 异步将Log对象发送到队
try {
String token = request.getHeader("authorization")!=null?request.getHeader("authorization").split(" ")[1]:null;
remoteLogService.saveLog(sysLog,token);
log.info("通过feign发送到log-center服务:{}", log);
} catch (Exception e2) {
e2.getMessage();
}
}
}
private void getMethod(ProceedingJoinPoint joinPoint, SysLog sysLog) {
MethodSignature signature = (MethodSignature) joinPoint.getSignature();
// 请求的方法名
String className = joinPoint.getTarget().getClass().getName();
String methodName = signature.getName();
sysLog.setOperationMethod(className + "." + methodName + "()");
log.info("---------------- " + log);
}
/**
* 获取target字符第x次出现的位置
* @param string
* @param target
* @param x
* @return
*/
public static int getCharacterPosition(String string, String target, int x) {
// 这里是获取target符号的位置
Matcher matcher = Pattern.compile(target).matcher(string);
int mIdx = 0;
while (matcher.find()) {
mIdx++;
// 当target符号第x次出现的位置
if (mIdx == x) {
break;
}
}
int start = matcher.start();
return start;
}
/**
* 获取当前网络ip
* @param request
* @return
*/
public static String getIpAddr(HttpServletRequest request){
String ipAddress = request.getHeader("x-forwarded-for");
if(ipAddress == null || ipAddress.length() == 0 || "unknown".equalsIgnoreCase(ipAddress)) {
ipAddress = request.getHeader("Proxy-Client-IP");
}
if(ipAddress == null || ipAddress.length() == 0 || "unknown".equalsIgnoreCase(ipAddress)) {
ipAddress = request.getHeader("WL-Proxy-Client-IP");
}
if(ipAddress == null || ipAddress.length() == 0 || "unknown".equalsIgnoreCase(ipAddress)) {
ipAddress = request.getRemoteAddr();
if(ipAddress.equals("127.0.0.1") || ipAddress.equals("0:0:0:0:0:0:0:1")){
//根据网卡取本机配置的IP
InetAddress inet=null;
try {
inet = InetAddress.getLocalHost();
} catch (UnknownHostException e) {
log.error(e.getMessage());
}
if (null != inet){
ipAddress= inet.getHostAddress();
}
}
}
//对于通过多个代理的情况,第一个IP为客户端真实IP,多个IP按照','分割
if(ipAddress!=null && ipAddress.length()>15){ //"***.***.***.***".length() = 15
if(ipAddress.indexOf(",")>0){
ipAddress = ipAddress.substring(0,ipAddress.indexOf(","));
}
}
return ipAddress;
}
}
</p>
这是一个环绕通知,值是切点,切点是@LogAnnotation注解所在的位置
@Around(value = "@annotation(com.gw.log.constants.LogAnnotation)")
4.配置@FeignClient
@FeignClient(value = "log" , url = "http://127.0.0.1:8888" , fallback = RemoteLogServiceFallback.class)
public interface RemoteLogService {
/**
* 保存日志
* @param sysLog log
* @return boolean
*/
@PostMapping("/syslog/createLog")
String saveLog(@RequestBody SysLog sysLog, @RequestHeader("Authorization") String Authorization);
}
@FeignClient(value = "log", url = ":8888", fallback = RemoteLogServiceFallback.class)
value可以随便写,url是需要连接的主机地址,fallback相当于一个熔断器。当调用宿主方法出错时,调用RemoteLogServiceFallback中的方法。
RemoteLogServiceFallback实现了RemoteLogService的方法,并添加了@Common注解
@Component
public class RemoteLogServiceFallback implements RemoteLogService {
private static final Logger LOG = LoggerFactory.getLogger(RemoteLogServiceFallback.class);
private Throwable cause;
public Throwable getCause() {
return cause;
}
public void setCause(Throwable cause) {
this.cause = cause;
}
@Override
public String saveLog(SysLog sysLog, String Authorization) {
LOG.error("feign 插入日志失败", cause);
return null;
}
}
日志状态器完成
下面配置log-center
创建一个新项目
导入 log-starter sdk
在controller中写RemoteLogService中的方法。启动。
5、如何调用
在需要采集的项目中导入log-starter sdk,导入spring-cloud-starter-openfeign依赖。在启动类中添加@EnableFeignClients 注解。
在需要采集日志的接口上添加注解@LogAnnotation(description = "logClient test", module = "test module")
启动,测试。
结束!