一、实现多列的group by操作(方法1)。
类似如下SQL:
select PhoneId, hour, count(*)
from tianyc_test8where PhoneId in ('xx','yy','zz')group by PhoneId, hour;1. 构建测试数据
在集合tianyc_test8中,每个PhoneId在每小时有多条数据,下面统计每个PhoneId在每小时的记录数。
> db.tianyc_test8.find()
{ "_id" : ObjectId("50d0a5bdd03"), "PhoneId" : "460001201624570", "Time" : ISODate("2012-12-18T17:21:11Z") }{ "_id" : ObjectId("50d0a5bdd0c"), "PhoneId" : "460001201521818", "Time" : ISODate("2012-12-18T17:21:12Z") }{ "_id" : ObjectId("50d0a19194a"), "PhoneId" : "460001204101466", "Time" : ISODate("2012-12-18T17:23:01Z") }{ "_id" : ObjectId("50d0a5bdd11"), "PhoneId" : "460001202749862", "Time" : ISODate("2012-12-18T17:19:43Z") }{ "_id" : ObjectId("50d0a5bdd13"), "PhoneId" : "460001202749862", "Time" : ISODate("2012-12-18T17:21:13Z") }{ "_id" : ObjectId("50d0a5bdd14"), "PhoneId" : "460001202749919", "Time" : ISODate("2012-12-18T17:21:14Z") }{ "_id" : ObjectId("50d0a5bdd17"), "PhoneId" : "460001202445292", "Time" : ISODate("2012-12-18T17:21:14Z") }{ "_id" : ObjectId("50d0a5bdd20"), "PhoneId" : "460001201829623", "Time" : ISODate("2012-12-18T17:21:15Z") }{ "_id" : ObjectId("50d0a5bdd22"), "PhoneId" : "460001202441775", "Time" : ISODate("2012-12-18T17:21:15Z") }{ "_id" : ObjectId("50d0a5bdd25"), "PhoneId" : "460001202441680", "Time" : ISODate("2012-12-18T17:21:14Z") }{ "_id" : ObjectId("50d0a5bdd2a"), "PhoneId" : "460001204608783", "Time" : ISODate("2012-12-18T17:21:15Z") }{ "_id" : ObjectId("50d0a5bdd38"), "PhoneId" : "460001202747843", "Time" : ISODate("2012-12-18T17:21:18Z") }{ "_id" : ObjectId("50d0a5bdd3b"), "PhoneId" : "460001201624758", "Time" : ISODate("2012-12-18T17:21:18Z") }{ "_id" : ObjectId("50d0a5bdd3d"), "PhoneId" : "460001201930213", "Time" : ISODate("2012-12-18T17:21:19Z") }{ "_id" : ObjectId("50d0a2f1e84"), "PhoneId" : "460001203762426", "Time" : ISODate("2012-12-18T17:23:50Z") }{ "_id" : ObjectId("50d0a0f0852"), "PhoneId" : "460001201624766", "Time" : ISODate("2012-12-18T17:23:05Z") }{ "_id" : ObjectId("50d0a5bdd47"), "PhoneId" : "460001204101332", "Time" : ISODate("2012-12-18T17:20:54Z") }{ "_id" : ObjectId("50d0c302f99"), "PhoneId" : "460001201624784", "Time" : ISODate("2012-12-18T19:36:16Z") }{ "_id" : ObjectId("50d0c100588"), "PhoneId" : "460001204305245", "Time" : ISODate("2012-12-18T19:35:27Z") }{ "_id" : ObjectId("50d0c100591"), "PhoneId" : "460001204305240", "Time" : ISODate("2012-12-18T19:35:30Z") }has more2. 编写map函数,将需要groupby的两列使用“-”拼接在一起,作为key。
> var m = function(){
... var hour=String(this.Time.getHours());
... if(hour.length==1) { ... hour='0'+hour... };... key=this.PhoneId+'-'+hour;... emit(key,{count:1});... }3. 编写reduce函数,将输出结果进行拆分。
> var r = function (key,values) { ... var total = 0;... var locate = key.indexOf('-');... for (var i=0; i<values.length; i++) { ... total += values[i].count;... }... return { PhoneId:key.substr(0,locate), hour:key.substr(locate+1,2), count : total};... };4. 进行mapreduce,加入查询条件
> var res = db.tianyc_test8.mapReduce(... m,... r,... {query:{PhoneId:{$in:["460001203762426","460001201521818","460001202749919"]}},... out:{replace:'tianyc_mr_Result1'}}... )5. 查看输出结果
> db.tianyc_mr_Result1.find(){ "_id" : "460001201521818-00", "value" : { "PhoneId" : "460001201521818", "hour" : "00", "count" : 112 } }
{ "_id" : "460001201521818-01", "value" : { "PhoneId" : "460001201521818", "hour" : "01", "count" : 65 } }{ "_id" : "460001201521818-02", "value" : { "PhoneId" : "460001201521818", "hour" : "02", "count" : 52 } }{ "_id" : "460001201521818-03", "value" : { "PhoneId" : "460001201521818", "hour" : "03", "count" : 98 } }{ "_id" : "460001201521818-04", "value" : { "PhoneId" : "460001201521818", "hour" : "04", "count" : 112 } }{ "_id" : "460001201521818-05", "value" : { "PhoneId" : "460001201521818", "hour" : "05", "count" : 112 } }{ "_id" : "460001201521818-06", "value" : { "PhoneId" : "460001201521818", "hour" : "06", "count" : 111 } }{ "_id" : "460001201521818-07", "value" : { "PhoneId" : "460001201521818", "hour" : "07", "count" : 111 } }{ "_id" : "460001201521818-08", "value" : { "PhoneId" : "460001201521818", "hour" : "08", "count" : 105 } }{ "_id" : "460001201521818-09", "value" : { "PhoneId" : "460001201521818", "hour" : "09", "count" : 114 } }{ "_id" : "460001201521818-10", "value" : { "PhoneId" : "460001201521818", "hour" : "10", "count" : 107 } }{ "_id" : "460001201521818-11", "value" : { "PhoneId" : "460001201521818", "hour" : "11", "count" : 115 } }{ "_id" : "460001201521818-12", "value" : { "PhoneId" : "460001201521818", "hour" : "12", "count" : 89 } }{ "_id" : "460001201521818-13", "value" : { "PhoneId" : "460001201521818", "hour" : "13", "count" : 114 } }{ "_id" : "460001201521818-14", "value" : { "PhoneId" : "460001201521818", "hour" : "14", "count" : 114 } }{ "_id" : "460001201521818-15", "value" : { "PhoneId" : "460001201521818", "hour" : "15", "count" : 115 } }{ "_id" : "460001201521818-16", "value" : { "PhoneId" : "460001201521818", "hour" : "16", "count" : 115 } }{ "_id" : "460001201521818-17", "value" : { "PhoneId" : "460001201521818", "hour" : "17", "count" : 112 } }{ "_id" : "460001201521818-18", "value" : { "PhoneId" : "460001201521818", "hour" : "18", "count" : 115 } }{ "_id" : "460001201521818-19", "value" : { "PhoneId" : "460001201521818", "hour" : "19", "count" : 70 } }has more > it { "_id" : "460001201521818-20", "value" : { "PhoneId" : "460001201521818", "hour" : "20", "count" : 99 } }{ "_id" : "460001201521818-21", "value" : { "PhoneId" : "460001201521818", "hour" : "21", "count" : 118 } }{ "_id" : "460001201521818-22", "value" : { "PhoneId" : "460001201521818", "hour" : "22", "count" : 116 } }{ "_id" : "460001201521818-23", "value" : { "PhoneId" : "460001201521818", "hour" : "23", "count" : 112 } }{ "_id" : "460001202749919-00", "value" : { "PhoneId" : "460001202749919", "hour" : "00", "count" : 112 } }{ "_id" : "460001202749919-01", "value" : { "PhoneId" : "460001202749919", "hour" : "01", "count" : 111 } }{ "_id" : "460001202749919-02", "value" : { "PhoneId" : "460001202749919", "hour" : "02", "count" : 112 } }{ "_id" : "460001202749919-03", "value" : { "PhoneId" : "460001202749919", "hour" : "03", "count" : 46 } }{ "_id" : "460001202749919-04", "value" : { "PhoneId" : "460001202749919", "hour" : "04", "count" : 108 } }{ "_id" : "460001202749919-05", "value" : { "PhoneId" : "460001202749919", "hour" : "05", "count" : 110 } }{ "_id" : "460001202749919-06", "value" : { "PhoneId" : "460001202749919", "hour" : "06", "count" : 62 } }{ "_id" : "460001202749919-07", "value" : { "PhoneId" : "460001202749919", "hour" : "07", "count" : 108 } }{ "_id" : "460001202749919-08", "value" : { "PhoneId" : "460001202749919", "hour" : "08", "count" : 112 } }{ "_id" : "460001202749919-09", "value" : { "PhoneId" : "460001202749919", "hour" : "09", "count" : 112 } }{ "_id" : "460001202749919-10", "value" : { "PhoneId" : "460001202749919", "hour" : "10", "count" : 108 } }{ "_id" : "460001202749919-11", "value" : { "PhoneId" : "460001202749919", "hour" : "11", "count" : 112 } }{ "_id" : "460001202749919-12", "value" : { "PhoneId" : "460001202749919", "hour" : "12", "count" : 84 } }{ "_id" : "460001202749919-13", "value" : { "PhoneId" : "460001202749919", "hour" : "13", "count" : 112 } }{ "_id" : "460001202749919-14", "value" : { "PhoneId" : "460001202749919", "hour" : "14", "count" : 112 } }{ "_id" : "460001202749919-15", "value" : { "PhoneId" : "460001202749919", "hour" : "15", "count" : 112 } }has more
二、实现多列的group by操作(方法2)。
在map函数中,将key设置为内嵌的集合。此时MR过程与单个列的group by相同。
> var m = function(){
... var hour=String(this.Time.getHours());... if(hour.length==1) {hour='0'+hour};... key={PhoneId:this.PhoneId,hour:hour};... emit(key,{count:1});... }> var r = function (key,values) {... var total = 0;
... for (var i=0; i<values.length; i++) { ... total += values[i].count;... }... return {count : total};... };> var res = db.tianyc_test8.mapReduce(... m,... r,... {query:{PhoneId:{$in:["460001203762426","460001201521818","460001202749919"]}},... out:{replace:'tianyc_mr_Result1'}}... )> db.tianyc_mr_Result1.find(){ "_id" : { "PhoneId" : "460001201521818", "hour" : "00" }, "value" : { "count" : 112 } }
{ "_id" : { "PhoneId" : "460001201521818", "hour" : "01" }, "value" : { "count" : 65 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "02" }, "value" : { "count" : 52 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "03" }, "value" : { "count" : 98 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "04" }, "value" : { "count" : 112 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "05" }, "value" : { "count" : 112 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "06" }, "value" : { "count" : 111 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "07" }, "value" : { "count" : 111 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "08" }, "value" : { "count" : 105 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "09" }, "value" : { "count" : 114 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "10" }, "value" : { "count" : 107 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "11" }, "value" : { "count" : 115 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "12" }, "value" : { "count" : 89 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "13" }, "value" : { "count" : 114 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "14" }, "value" : { "count" : 114 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "15" }, "value" : { "count" : 115 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "16" }, "value" : { "count" : 115 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "17" }, "value" : { "count" : 112 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "18" }, "value" : { "count" : 115 } }{ "_id" : { "PhoneId" : "460001201521818", "hour" : "19" }, "value" : { "count" : 70 } }has more三、实现多列的group by having操作。
类似如下SQL:
select PhoneId, hour, count(*)
from tianyc_test8where PhoneId in ('xx','yy','zz')group by PhoneId, hour having count(*)>=100;1. 使用第一种group by方法进行测试。map和reduce函数不变,增加finalize函数,设置指示列get。
> var f = function(key, rvalues){
... if (rvalues.count>=100){rvalues.get='true';}... else {rvalues.get='false'};... return rvalues;... }2. 使用runCommand来执行mapredurce。
> var res = db.runCommand({
... mapreduce:'tianyc_test8',... map:m,... reduce:r,... finalize:f,... query:{PhoneId:{$in:["460001203762426","460001201521818","460001202749919"]}},... out:'tianyc_mr_Result1'... }... )3. 查询结果。
使用get列来过滤需要的数据,并隐藏该列:
> db.tianyc_mr_Result1.find({'value.get':'true'},{'value.get':0})
{ "_id" : "460001201521818-00", "value" : { "PhoneId" : "460001201521818", "hour" : "00", "count" : 112 } }
{ "_id" : "460001201521818-04", "value" : { "PhoneId" : "460001201521818", "hour" : "04", "count" : 112 } }{ "_id" : "460001201521818-05", "value" : { "PhoneId" : "460001201521818", "hour" : "05", "count" : 112 } }{ "_id" : "460001201521818-06", "value" : { "PhoneId" : "460001201521818", "hour" : "06", "count" : 111 } }{ "_id" : "460001201521818-07", "value" : { "PhoneId" : "460001201521818", "hour" : "07", "count" : 111 } }{ "_id" : "460001201521818-08", "value" : { "PhoneId" : "460001201521818", "hour" : "08", "count" : 105 } }{ "_id" : "460001201521818-09", "value" : { "PhoneId" : "460001201521818", "hour" : "09", "count" : 114 } }{ "_id" : "460001201521818-10", "value" : { "PhoneId" : "460001201521818", "hour" : "10", "count" : 107 } }{ "_id" : "460001201521818-11", "value" : { "PhoneId" : "460001201521818", "hour" : "11", "count" : 115 } }{ "_id" : "460001201521818-13", "value" : { "PhoneId" : "460001201521818", "hour" : "13", "count" : 114 } }{ "_id" : "460001201521818-14", "value" : { "PhoneId" : "460001201521818", "hour" : "14", "count" : 114 } }{ "_id" : "460001201521818-15", "value" : { "PhoneId" : "460001201521818", "hour" : "15", "count" : 115 } }{ "_id" : "460001201521818-16", "value" : { "PhoneId" : "460001201521818", "hour" : "16", "count" : 115 } }{ "_id" : "460001201521818-17", "value" : { "PhoneId" : "460001201521818", "hour" : "17", "count" : 112 } }{ "_id" : "460001201521818-18", "value" : { "PhoneId" : "460001201521818", "hour" : "18", "count" : 115 } }{ "_id" : "460001201521818-21", "value" : { "PhoneId" : "460001201521818", "hour" : "21", "count" : 118 } }{ "_id" : "460001201521818-22", "value" : { "PhoneId" : "460001201521818", "hour" : "22", "count" : 116 } }{ "_id" : "460001201521818-23", "value" : { "PhoneId" : "460001201521818", "hour" : "23", "count" : 112 } }{ "_id" : "460001202749919-00", "value" : { "PhoneId" : "460001202749919", "hour" : "00", "count" : 112 } }{ "_id" : "460001202749919-01", "value" : { "PhoneId" : "460001202749919", "hour" : "01", "count" : 111 } }has more>