页面埋点

优采云 发布时间: 2020-08-24 09:07

  页面埋点

  对一个网站进行流量剖析,首先要做的就是数据采集;而采集的形式大至两种形式

  对于网站前端来说,数据上报一般有如下几种方式

  直接向后台发送get请求,伪装成js或者图片请求

http://click.dangdang.com/page_tracker.php?m_id=&o_id=&region_ids=&out_refer=&refer_url=&url=http://www.dangdang.com/&to_url=&type=1&visit_id=20181119161826757207396759923945024&is_first_visit=0&ctr_type=&perm_id=20181114174025913676682755771693602&udid= &res=1920,1080||1903,5211&title=当当—网上购物中心:图书、母婴、美妆、家居、数码、家电、服装、鞋包等,正品低价,*敏*感*词*&trace_id=nohead&special=guan=1;page=id:1|name:当首;&cif=&rsv1=&rsv2=&rsv3=&platform=pc&r=0.857700135627224

https://a.stat.xiaomi.com/js/mstr.js?mid=&device_id=&phpsessid=&mstuid=1536571987936_2638&muuid=&mucid=&sessionId=1690051968&step=185&new_visitor=0&mstprevpid=&mstprev_pid_loc=&prevtarget=&lastsource=&timestamp=1542615493495&ref=&domain=.mi.com&screen=1920*1080&language=zh-CN&vendor=Google%20Inc.&platform=Win32&gu=&miwd=&edm_task=&masid=&client_id=&pu=&rf=0&mutid=&muwd=&domain_id=100&pageid=81190ccc4d52f577&curl=https%3A%2F%2Fwww.mi.com%2F&xmv=1536571987936_2638_1542615493495&v=1.0.0&vuuid=7ERAQ0IQQIBIFMAV

https://warriors.jd.com/log.gif?t=exp_log.100000&m=UA-J2011-1&pin=-&uid=1368883904&sid=1368883904|19&v={"t1":"pc_homepage","t2":"basic","p0":"{\"rept\":\"impr\",\"poi\":\"head|focus|08\",\"text\":\"11.19个护感恩节\",\"url\":\"//sale.jd.com/act/1dCqk7TBj5porf8.html\",\"desc\":\"个护电器\",\"mcinfo\":\"00755652-05703860-1100950352-M#0-2-1--58--#1-tb-#300-9908298#pc-home\",\"biclk\":\"1#6328b7df38f1cf2c1fd7c296f1e920cd7b603c53-101-619081#9908298\"}","pinid":"-","je":0,"sc":"24-bit","sr":"1920x1080","ul":"zh-cn","cs":"UTF-8","dt":"京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!","hn":"www.jd.com","fl":"-","os":"win","br":"chrome","bv":"68.0.3440.106","wb":"1536298255","xb":"1542165688","yb":1542615817,"zb":19,"cb":1,"usc":"direct","ucp":"-","umd":"none","uct":"-","ct":1542615839771,"lt":0,"tad":"-","jdv":"122270672|direct|-|none|-|1542165687598","dataver":"0.1"}&ref=&rm=1542615839772

  回到数据采集端

  nginx + lua

  这种形式须要在nginx端配置日志格式;接收到后端日志搜集恳求后,会对恳求解析,并将日志数据记录在本地c盘;这种形式,有几个显著的缺点:

  日志储存在本地c盘,通常我们在做大数据离线剖析,数据都是储存在hdfs上;所以这些方法就不可防止须要将日志上传到hdfs起来;因为是日志文件方式储存,所以没办法做实时的统计剖析

  后台搜集

  这个就须要开发一个日志搜集服务端,提供一个http get服务;这个服务将上报的数据推送到kafka中;相比第一种形式,后台搜集,你就不需要去各个服务器去搜集日志文件;数据推送到kafka,也就意味着,我们可以使用storm,sparkstreaming进行实时剖析;这个也是目前使用最广的形式

  站点的数据采集流程【后台搜集为例】

  首先是数据上报后端;用过友盟统计和百度统计的朋友都晓得,想要使用友盟百度站点统计功能,首先要做的就是,在站点嵌入一段js或则html代码,大概象这个样子

  

var _maq = new Array();

_maq['_setAccount'] = 'uuid';

_maq['ppppp'] = 'ppppp';

(function () {

var ma = document.createElement('script');

ma.type = 'text/javascript';

ma.async = true;

ma.src = "http://localhost:8089/xmst.js";

var s = document.getElementsByTagName('script')[0];

s.parentNode.insertBefore(ma, s);

})();

  这段代码的意思,就是动态加载远程的js[:8089/xmst.js],嵌入到须要统计服务的站点;xmst.js代码如下

  var params = {};

//Document对象数据

if (document) {

params.domain = document.domain || ''; //获取域名

params.url = document.URL || ''; //当前Url地址

params.title = document.title || '';

params.referrer = document.referrer || ''; //上一跳路径

}

//Window对象数据

if (window && window.screen) {

params.sh = window.screen.height || 0; //获取显示屏信息

params.sw = window.screen.width || 0;

params.cd = window.screen.colorDepth || 0;

}

//navigator对象数据

if (navigator) {

params.lang = navigator.language || ''; //获取所用语言种类

}

params['age'] = '111'

//解析_maq配置

if (_maq) {

for (var i in _maq) { //获取埋点阶段,传递过来的用户行为

params[i] = _maq[i]

}

};

function args_build(){

//拼接参数串

var args = '';

for (var i in params) {

// alert(i);

if (args != '') {

args += '&';

}

args += i + '=' + params[i]; //将所有获取到的信息进行拼接

}

return args;

};

//页面自动加载

function page_load(){

//通过伪装成Image对象,请求后端脚本

var img = new Image(1, 1);

var src = 'http://localhost:8089/flow/log.gif?args=' + encodeURIComponent(args_build());

// alert("请求到的后端脚本为" + src);

img.src = src;

};

// 点击事件

function a_click(maps){

//通过伪装成Image对象,请求后端脚本

var img = new Image(1, 1);

for (var i in maps) {

params[i] = maps[i]

}

var src = 'http://localhost:8089/flow/log.gif?args=' + encodeURIComponent(args_build());

img.src = src;

}

page_load();

  加载了这个脚本的页面会手动调用page_load()方法,这个技巧会将后端的数据伪装成一个长宽都为1象素img get恳求,请求明文如下

  页面浏览报文

http://localhost:8089/flow/log.gif?args=domain=localhost&url=http://localhost:8090/#&title=page test&referrer=&sh=1080&sw=1920&cd=24&lang=zh-CN&age=111&_setAccount=uuid&ppppp=ppppp

页面点击报文

http://localhost:8089/flow/log.gif?args=domain=localhost&url=http://localhost:8090/#&title=page test&referrer=&sh=1080&sw=1920&cd=24&lang=zh-CN&age=111&_setAccount=uuid&ppppp=ppppp&pageid=index.html&*敏*感*词*id=*敏*感*词*id

浏览和点击报文,区别在于pageid=index.html&*敏*感*词*id=*敏*感*词*id,*敏*感*词*id定义为页面位置【例如点击了某个链接;触发了a_click(maps)方法】;

  使用这些方法主要是为了解决跨域的问题,因为大多数情况下,统计脚本不单单为一个站点服务,域名也不可能全都一样;

  服务端插口

  http://localhost:8089/flow/log.gif?args=params

  采集端代码如下【省略push kafka过程】

  package com.fan.ga.gaserver.controller;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.springframework.stereotype.Controller;

import org.springframework.web.bind.annotation.RequestMapping;

import javax.imageio.ImageIO;

import javax.servlet.http.HttpServletResponse;

import java.awt.image.BufferedImage;

import java.io.IOException;

import java.io.OutputStream;

@Controller

@RequestMapping("/flow")

public class LogCollector {

Logger logger = LoggerFactory.getLogger(LogCollector.class);

// http://localhost:8089/flow/log.gif?args=asfafd

@RequestMapping(value = "log.gif")

public void analysis(String args, HttpServletResponse response) throws IOException {

logger.info(args);

response.setHeader("Pragma", "No-cache");

response.setHeader("Cache-Control", "no-cache");

response.setDateHeader("Expires", 0);

response.setContentType("image/gif");

OutputStream out = response.getOutputStream();

BufferedImage image = new BufferedImage(1, 1, BufferedImage.TYPE_INT_RGB);

ImageIO.write(image, "gif", out);

out.flush();

}

}

  站点index.html页面

  

page test

var _maq = new Array();

_maq['_setAccount'] = 'uuid';

_maq['ppppp'] = 'ppppp';

(function () {

var ma = document.createElement('script');

ma.type = 'text/javascript';

ma.async = true;

ma.src = "http://localhost:8089/xmst.js";

var s = document.getElementsByTagName('script')[0];

s.parentNode.insertBefore(ma, s);

})();

首页

detail

  End

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线