汇总:如何通过PHP进行数据采集

优采云 发布时间: 2022-12-08 19:51

  汇总:如何通过PHP进行数据采集

  随着信息时代的到来,现在大多数人都从互联网上获取信息。如果您对有用的数据使用传统的复制和粘贴,效率将非常低。如何快速完成批量采集工作?说说PHP采集的一些事吧!

  采集 是使用 file_get_contents 函数和常规用法。

  先贴一段代码

  

无标题文档

$val)

{

<p>

//echo "<a href=\"http://nitnews.nyist.net/".$arr[2][$id]."\">".$val."</a><br />";

echo "<a href=\"content.php?url="/spanspan class="token operator"./spanspan class="token string double-quoted-string""http://nitnews.nyist.net/"/spanspan class="token operator"./spanspan class="token variable"$arr/spanspan class="token punctuation"[/spanspan class="token number"2/spanspan class="token punctuation"]/spanspan class="token punctuation"[/spanspan class="token variable"$id/spanspan class="token punctuation"]/spanspan class="token operator"./spanspan class="token string double-quoted-string""\">".$val."</a><br />";

}

?>

[/php]

[php]

无标题文档

;

echo "文章内容是:".GetInfo($con,$content);

?>

</p>

  这是我从学校新闻网站上看到的一个 采集 节目。原理很简单。如果需要存入数据库~~~那就简单了,在显示的地方写存入数据库的代码就可以了。~

  重点:空格和标点不能少!如果 采集 不存在,请检查您的匹配规则。

  附:采集很简单,你只需要了解它的原理就可以了~~以后批量发布信息别怕~

  汇总:日志采集系统

  Logbook采集系统开发记录

  在接到开发任务时,需要开发一个日志系统,主要分析谁在什么时间做了什么操作,业务是否执行成功,请求参数和返回参数是什么,使用了什么方法,这样,后台可以直接定位问题。

  需要写一个log-starter来执行日志采集的工作,写一个log-center日志来添加和查询功能。

  1.自定义一个注解@LogAnnotation

  @Target({ElementType.METHOD}) //注解做用在哪里

@Retention(RetentionPolicy.RUNTIME)

@Documented

public @interface LogAnnotation {

//需要注解获取的内容

String serverName() default "";

String module() default "";

String description() default "";

boolean recordParam() default true;

}

  2.配置日志启动器:

  @EnableAsync

@Configuration

@ComponentScan(basePackages = {"com.gw.log.autoconfigure","com.gw.log.feign"})

@ConditionalOnClass(value = {LogAspect.class,RemoteLogService.class})

@ConditionalOnWebApplication

@ConditionalOnProperty(name = "enable",prefix = "com.log",havingValue = "true",matchIfMissing = true)

//EnableFeignClients指向定义FeignClient类

@EnableFeignClients(clients = com.gw.log.feign.RemoteLogService.class)

public class LogAutoConfiguration {

@Bean

public LogAspect logAspect() {

LogAspect logAspect = new LogAspect();

return new LogAspect();

}

}

  3.写切面(提前准备好日志实体类Syslog)

  @Aspect

@Component

public class LogAspect {

private static final Logger log = LoggerFactory.getLogger(LogAspect.class);

@Value("${spring.application.name}")

private String serverName;

@Resource

RemoteLogService remoteLogService;

@Around(value = "@annotation(com.gw.log.constants.LogAnnotation)")

public Object around(ProceedingJoinPoint joinPoint) throws Throwable {

// 获取当前毫秒

long beginTime = System.currentTimeMillis();

SysLog sysLog = new SysLog();

HttpServletRequest request = ((ServletRequestAttributes) RequestContextHolder.getRequestAttributes()).getRequest();

String authorization = request.getHeader("authorization");

if(authorization!=null){

TokenEntity tokenInfo = AuthUtil.getTokenInfo();

if (tokenInfo != null) {

sysLog.setUserName(tokenInfo.getUserName());

sysLog.setUserCode(tokenInfo.getUserCode());

sysLog.setPlatformCode(tokenInfo.getPlatformId().toString());

}

}

MethodSignature methodSignature = (MethodSignature) joinPoint.getSignature();

LogAnnotation logAnnotation = methodSignature.getMethod().getDeclaredAnnotation(LogAnnotation.class);

//方法名称

String name = methodSignature.getName();

sysLog.setOperationModule(logAnnotation.module());

//描述信息

sysLog.setRemark(logAnnotation.description());

//方法路径

String method_path = request.getAttribute(HandlerMapping.BEST_MATCHING_PATTERN_ATTRIBUTE).toString();

sysLog.setRequestAddress(method_path);

//请求方式

String requestMethod = request.getMethod();

sysLog.setRequestMethod(requestMethod);

//系统名称

sysLog.setSystemName(StringUtils.isNotBlank(sysLog.getSystemName()) ? sysLog.getSystemName() : serverName);

if (logAnnotation.recordParam() || true) {

// 获取参数名称

String[] paramNames = methodSignature.getParameterNames();

// 获取参数值

Object[] params = joinPoint.getArgs();

<p>

// 把参数名称和参数值组装成json格式

JSONObject paramsJson = new JSONObject(paramNames.length);

for (int i = 0; i < paramNames.length; i++) {

paramsJson.put(paramNames[i], params[i]);

}

try {

// 以json的形式记录参数

sysLog.setRequestParameters(JSONObject.toJSONString(paramsJson));

} catch (Exception e) {

log.error("记录参数失败:{}", e.getMessage());

}

}

try {

// 执行时长(毫秒)

Long time = System.currentTimeMillis() - beginTime;

// 执行原方法

Object obj = joinPoint.proceed();

//返回值

String s = JSON.toJSONString(obj);

sysLog.setReturnParameter(s);

sysLog.setStatus(Boolean.TRUE);

// 执行时长(毫秒)

sysLog.setOperationTime(time.toString());

//获取用户ip地址

sysLog.setIpAddress(getIpAddr(((ServletRequestAttributes) RequestContextHolder.getRequestAttributes()).getRequest()));

getMethod(joinPoint, sysLog);

return obj;

} catch (Exception e) {

// 方法执行失败

sysLog.setStatus(Boolean.FALSE);

// 备注记录失败原因

sysLog.setRemark(e.getMessage());

throw e;

} finally {

// 异步将Log对象发送到队

try {

String token = request.getHeader("authorization")!=null?request.getHeader("authorization").split(" ")[1]:null;

remoteLogService.saveLog(sysLog,token);

log.info("通过feign发送到log-center服务:{}", log);

} catch (Exception e2) {

e2.getMessage();

}

}

}

private void getMethod(ProceedingJoinPoint joinPoint, SysLog sysLog) {

MethodSignature signature = (MethodSignature) joinPoint.getSignature();

// 请求的方法名

String className = joinPoint.getTarget().getClass().getName();

String methodName = signature.getName();

sysLog.setOperationMethod(className + "." + methodName + "()");

log.info("---------------- " + log);

}

/**

* 获取target字符第x次出现的位置

* @param string

* @param target

* @param x

* @return

*/

public static int getCharacterPosition(String string, String target, int x) {

// 这里是获取target符号的位置

Matcher matcher = Pattern.compile(target).matcher(string);

int mIdx = 0;

while (matcher.find()) {

mIdx++;

// 当target符号第x次出现的位置

if (mIdx == x) {

break;

}

}

int start = matcher.start();

return start;

}

/**

* 获取当前网络ip

* @param request

  

* @return

*/

public static String getIpAddr(HttpServletRequest request){

String ipAddress = request.getHeader("x-forwarded-for");

if(ipAddress == null || ipAddress.length() == 0 || "unknown".equalsIgnoreCase(ipAddress)) {

ipAddress = request.getHeader("Proxy-Client-IP");

}

if(ipAddress == null || ipAddress.length() == 0 || "unknown".equalsIgnoreCase(ipAddress)) {

ipAddress = request.getHeader("WL-Proxy-Client-IP");

}

if(ipAddress == null || ipAddress.length() == 0 || "unknown".equalsIgnoreCase(ipAddress)) {

ipAddress = request.getRemoteAddr();

if(ipAddress.equals("127.0.0.1") || ipAddress.equals("0:0:0:0:0:0:0:1")){

//根据网卡取本机配置的IP

InetAddress inet=null;

try {

inet = InetAddress.getLocalHost();

} catch (UnknownHostException e) {

log.error(e.getMessage());

}

if (null != inet){

ipAddress= inet.getHostAddress();

}

}

}

//对于通过多个代理的情况,第一个IP为客户端真实IP,多个IP按照&#39;,&#39;分割

if(ipAddress!=null && ipAddress.length()>15){ //"***.***.***.***".length() = 15

if(ipAddress.indexOf(",")>0){

ipAddress = ipAddress.substring(0,ipAddress.indexOf(","));

}

}

return ipAddress;

}

}

</p>

  这是一个环绕通知,值是切点,切点是@LogAnnotation注解所在的位置

  @Around(value = "@annotation(com.gw.log.constants.LogAnnotation)")

  4.配置@FeignClient

  @FeignClient(value = "log" , url = "http://127.0.0.1:8888" , fallback = RemoteLogServiceFallback.class)

public interface RemoteLogService {

/**

* 保存日志

* @param sysLog log

* @return boolean

*/

@PostMapping("/syslog/createLog")

String saveLog(@RequestBody SysLog sysLog, @RequestHeader("Authorization") String Authorization);

}

  @FeignClient(value = "log", url = ":8888", fallback = RemoteLogServiceFallback.class)

  value可以随便写,url是需要连接的主机地址,fallback相当于一个熔断器。当调用宿主方法出错时,调用RemoteLogServiceFallback中的方法。

  RemoteLogServiceFallback实现了RemoteLogService的方法,并添加了@Common注解

  @Component

public class RemoteLogServiceFallback implements RemoteLogService {

private static final Logger LOG = LoggerFactory.getLogger(RemoteLogServiceFallback.class);

private Throwable cause;

public Throwable getCause() {

return cause;

}

public void setCause(Throwable cause) {

this.cause = cause;

}

@Override

public String saveLog(SysLog sysLog, String Authorization) {

LOG.error("feign 插入日志失败", cause);

return null;

}

}

  日志状态器完成

  下面配置log-center

  创建一个新项目

  导入 log-starter sdk

  在controller中写RemoteLogService中的方法。启动。

  5、如何调用

  在需要采集的项目中导入log-starter sdk,导入spring-cloud-starter-openfeign依赖。在启动类中添加@EnableFeignClients 注解。

  在需要采集日志的接口上添加注解@LogAnnotation(description = "logClient test", module = "test module")

  启动,测试。

  结束!

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线