MongoDB聚合篇

x33g5p2x  于2021-12-25 转载在 Go  
字(26.5k)|赞(0)|评价(0)|浏览(564)

MongoDB中聚合功能分为以下三种

  • 聚合框架agregate()
  • MapReduce
  • 聚合命令(count、distinct、group)

聚合框架agregate

使用聚合框架agregate可以通过多个操作符对文档进行处理,将前一个操作符处理后的结果传给下一个操作符,最后一个操作符处理的结果就是聚合的最后的结果

$match

筛选条件,过滤掉不满足条件的文档,可以使用常规的查询操作符,如gt、lt、$in等

  1. > db.articles.find()
  2. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "author" : "zhangsan", "title" : "Java Primer", "like" : 10 }
  3. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "author" : "zhangsan", "title" : "iOS Primer", "like" : 30 }
  4. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "author" : "zhangsan", "title" : "Android Primer", "like" : 20 }
  5. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "author" : "lisi", "title" : "Html5 Primer", "like" : 40 }
  6. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "author" : "lisi", "title" : "Go Primer", "like" : 30 }
  7. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "author" : "mengday", "title" : "Swift Primer", "like" : 8 }
  8. >
  9. // $match
  10. > db.articles.aggregate( {"$match": {"like": {"$gte" : 10} }} )
  11. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "author" : "zhangsan", "title" : "Java Primer", "like" : 10 }
  12. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "author" : "zhangsan", "title" : "iOS Primer", "like" : 30 }
  13. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "author" : "zhangsan", "title" : "Android Primer", "like" : 20 }
  14. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "author" : "lisi", "title" : "Html5 Primer", "like" : 40 }
  15. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "author" : "lisi", "title" : "Go Primer", "like" : 30 }

$project 投射

  • 用于包含、排除字段: 设置要查询或者要过滤掉的字段,0: 要过滤掉的字段,不显示,1:需要查询的字段
  • 对字段重命名,
  • 在投射中使用一些【表达式】:数学表达式、日期表达式、字符串表达式、逻辑表达式(比较表达式、布尔表达式、控制语句)
  1. > db.articles.aggregate( {"$match": {"like": {"$gte" : 10} }}, {"$project": {"_id": 0, "author":1, "title": 1}} )
  2. { "author" : "zhangsan", "title" : "Java Primer" }
  3. { "author" : "zhangsan", "title" : "iOS Primer" }
  4. { "author" : "zhangsan", "title" : "Android Primer" }
  5. { "author" : "lisi", "title" : "Html5 Primer" }
  6. { "author" : "lisi", "title" : "Go Primer" }
  7. // $project: 当字段的值是0或者1时用于过滤字段,当键是一个自定义的字符串,值为$跟上原来的字段,表示要对该字段进行重命名
  8. > db.articles.aggregate( {"$match": {"like": {"$gte" : 10} }}, {"$project": {"_id": 0, "author":1, "Book Name": "$title"}} )
  9. { "author" : "zhangsan", "Book Name" : "Java Primer" }
  10. { "author" : "zhangsan", "Book Name" : "iOS Primer" }
  11. { "author" : "zhangsan", "Book Name" : "Android Primer" }
  12. { "author" : "lisi", "Book Name" : "Html5 Primer" }
  13. { "author" : "lisi", "Book Name" : "Go Primer" }
  14. >
  15. // 通过修改字段的名称来达到生成一个字段的副本,以便在后面的操作符中使用
  16. > db.articles.aggregate( {"$match": {"like": {"$gte" : 10} }}, {"$project": {"ID":"$_id", "title": 1}} )
  17. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "title" : "Java Primer", "ID" : ObjectId("5989b692b759cf05ab1c7195") }
  18. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "title" : "iOS Primer", "ID" : ObjectId("5989b692b759cf05ab1c7196") }
  19. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "title" : "Android Primer", "ID" : ObjectId("5989b692b759cf05ab1c7197") }
  20. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "title" : "Html5 Primer", "ID" : ObjectId("5989b692b759cf05ab1c7198") }
  21. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "title" : "Go Primer", "ID" : ObjectId("5989b692b759cf05ab1c7199") }
  22. >
  23. // 算数表达式:用于对一组数字 加减乘除取余
  24. // $add: [exp1, exp2, ... expN]: 对数组中的多个元素进行相加, $fieldname: 用于来引用该字段的值
  25. // 示例是对like字段的值 + like字段的值 + 字面量1 作为 "likes"字段的值
  26. > db.articles.aggregate({"$project": {"likes": {"$add": ["$like", "$like", 1]}} })
  27. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "likes" : 21 }
  28. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "likes" : 61 }
  29. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "likes" : 41 }
  30. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "likes" : 81 }
  31. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "likes" : 61 }
  32. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "likes" : 17 }
  33. // $subtract: [exp1, exp2]: 数组中的第一个元素减去第二个元素
  34. > db.articles.aggregate({"$project": {"likes": {"$subtract": ["$like", 2]}} })
  35. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "likes" : 8 }
  36. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "likes" : 28 }
  37. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "likes" : 18 }
  38. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "likes" : 38 }
  39. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "likes" : 28 }
  40. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "likes" : 6 }
  41. // $multiply:对数组中的多个元素相乘
  42. db.articles.aggregate({"$project": {"likes": {"$multiply": ["$like", 2, 5]}} })
  43. "_id" : ObjectId("5989b692b759cf05ab1c7195"), "likes" : 100 }
  44. "_id" : ObjectId("5989b692b759cf05ab1c7196"), "likes" : 300 }
  45. "_id" : ObjectId("5989b692b759cf05ab1c7197"), "likes" : 200 }
  46. "_id" : ObjectId("5989b692b759cf05ab1c7198"), "likes" : 400 }
  47. "_id" : ObjectId("5989b692b759cf05ab1c7199"), "likes" : 300 }
  48. "_id" : ObjectId("5989b692b759cf05ab1c719a"), "likes" : 80 }
  49. // $divide: 数组中的第一个元素除以第二个元素
  50. db.articles.aggregate({"$project": {"likes": {"$divide": ["$like", 2]}} })
  51. "_id" : ObjectId("5989b692b759cf05ab1c7195"), "likes" : 5 }
  52. "_id" : ObjectId("5989b692b759cf05ab1c7196"), "likes" : 15 }
  53. "_id" : ObjectId("5989b692b759cf05ab1c7197"), "likes" : 10 }
  54. "_id" : ObjectId("5989b692b759cf05ab1c7198"), "likes" : 20 }
  55. "_id" : ObjectId("5989b692b759cf05ab1c7199"), "likes" : 15 }
  56. "_id" : ObjectId("5989b692b759cf05ab1c719a"), "likes" : 4 }
  57. // $mod: 求数组中第一个元素除以第二个元素的余数
  58. db.articles.aggregate({"$project": {"likes": {"$mod": ["$like", 3]}} })
  59. "_id" : ObjectId("5989b692b759cf05ab1c7195"), "likes" : 1 }
  60. "_id" : ObjectId("5989b692b759cf05ab1c7196"), "likes" : 0 }
  61. "_id" : ObjectId("5989b692b759cf05ab1c7197"), "likes" : 2 }
  62. "_id" : ObjectId("5989b692b759cf05ab1c7198"), "likes" : 1 }
  63. "_id" : ObjectId("5989b692b759cf05ab1c7199"), "likes" : 0 }
  64. "_id" : ObjectId("5989b692b759cf05ab1c719a"), "likes" : 2 }
  65. // $substr: [exp, startOffset, numToReturn] : 字符串截取操作
  66. > db.articles.aggregate({"$project": {"newValue": {"$substr": ["$title", 1, 2] } }})
  67. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "newValue" : "av" }
  68. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "newValue" : "OS" }
  69. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "newValue" : "nd" }
  70. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "newValue" : "tm" }
  71. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "newValue" : "o " }
  72. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "newValue" : "wi" }
  73. // $concat:[exp1, exp2, ..., expN]: 字符串操作:将数组中的多个元素拼接在一起
  74. > db.articles.aggregate({"$project": {"newValue": {"$concat": ["$title", "(", "$author", ")"] } }})
  75. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "newValue" : "Java Primer(zhangsan)" }
  76. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "newValue" : "iOS Primer(zhangsan)" }
  77. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "newValue" : "Android Primer(zhangsan)" }
  78. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "newValue" : "Html5 Primer(lisi)" }
  79. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "newValue" : "Go Primer(lisi)" }
  80. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "newValue" : "Swift Primer(mengday)" }
  81. >
  82. // $toLower : exp, 字符串转小写
  83. > db.articles.aggregate({"$project": {"newValue": {"$toLower": "$title" } }})
  84. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "newValue" : "java primer" }
  85. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "newValue" : "ios primer" }
  86. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "newValue" : "android primer" }
  87. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "newValue" : "html5 primer" }
  88. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "newValue" : "go primer" }
  89. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "newValue" : "swift primer" }
  90. // $toUpper: 字符串操作,转大写
  91. > db.articles.aggregate({"$project": {"newValue": {"$toUpper": "$author" } }})
  92. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "newValue" : "ZHANGSAN" }
  93. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "newValue" : "ZHANGSAN" }
  94. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "newValue" : "ZHANGSAN" }
  95. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "newValue" : "LISI" }
  96. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "newValue" : "LISI" }
  97. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "newValue" : "MENGDAY" }
  98. // 为所有文档新增字段
  99. > db.articles.update({}, {"$set": {"publishDate": new Date()}}, true, true)
  100. WriteResult({ "nMatched" : 6, "nUpserted" : 0, "nModified" : 6 })
  101. // 日期表达式:用于获取日期中的任意一部分,年月日时分秒 星期等
  102. // $year、$month、$dayOfMonth、$dayOfWeek、$dayOfYear、$hour、$minute、$second
  103. > db.articles.aggregate({"$project": {"month": {"$month": "$publishDate"}}})
  104. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "month" : 8 }
  105. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "month" : 8 }
  106. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "month" : 8 }
  107. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "month" : 8 }
  108. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "month" : 8 }
  109. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "month" : 8 }
  110. >
  111. > db.articles.aggregate({"$project": {"fasttime": {"$subtract": [{"$second": new Date()}, {"$second": "$publishDate"}]}}})
  112. // $cmp: [exp1, exp2]: 用于比较两个字符串,exp1 == exp2 返回 0, 小于返回一个负数,大于返回一个正数
  113. // $不能用于比较字符串
  114. > db.articles.aggregate({"$project": {"result": {"$cmp": ["$like", 20]} }})
  115. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : -1 }
  116. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : 1 }
  117. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : 0 }
  118. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : 1 }
  119. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : 1 }
  120. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : -1 }
  121. // $strcasecmp: 用于比较字符串,相等返回0
  122. // $cmp 和 $strcasecmp 返回的结果是 0、-1、1
  123. > db.articles.aggregate({"$project": {"result": {"$strcasecmp": ["$author", "mengday"]} }})
  124. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : 1 }
  125. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : 1 }
  126. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : 1 }
  127. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : -1 }
  128. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : -1 }
  129. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : 0 }
  130. >
  131. // $eq: 用于判断两个表达式是否相等
  132. // $ne: 不相等
  133. // $gt: 大于
  134. // $gte: 大于等于
  135. // $lt: 小于
  136. // $lte: 小于等于
  137. > db.articles.aggregate({"$project": {"result": {"$eq": ["$author", "mengday"]}}})
  138. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : false }
  139. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : false }
  140. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : false }
  141. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : false }
  142. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : false }
  143. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : true }
  144. // $and:[exp1, exp2, ..., expN] 用于连接多个条件,当所有条件为真的时候为true
  145. > db.articles.aggregate({"$project": {"result": {"$and": [{"$eq": ["$author", "mengday"]}, {"$gt": ["$like", 20]}]}}})
  146. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : false }
  147. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : false }
  148. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : false }
  149. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : false }
  150. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : false }
  151. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : false }
  152. // $or: [exp1, exp2, ..., expN]
  153. > db.articles.aggregate({"$project": {"result": {"$or": [{"$eq": ["$author", "mengday"]}, {"$gt": ["$like", 20]}]}}})
  154. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : false }
  155. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : true }
  156. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : false }
  157. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : true }
  158. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : true }
  159. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : true }
  160. >
  161. // $not: exp 用于取反操作
  162. > db.articles.aggregate({"$project": {"result": {"$not": {"$eq": ["$author", "mengday"]}}}})
  163. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : true }
  164. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : true }
  165. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : true }
  166. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : true }
  167. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : true }
  168. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : false }
  169. // $cond: [booleanExp, trueExp, falseExp]: 三位运算符
  170. > db.articles.aggregate({"$project": {"result": {"$cond": [ {"$eq": ["$author", "mengday"]}, "111", "222" ]}}})
  171. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : "222" }
  172. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : "222" }
  173. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : "222" }
  174. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : "222" }
  175. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : "222" }
  176. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : "111" }
  177. // $ifNull: [expr, replacementExpr]: 如果条件的值为null,则返回后面表达式的值,当字段不存在时字段的值也是null
  178. > db.articles.aggregate({"$project": {"result": {"$ifNull": ["$notExistFiled", "not exist is null"]}}})
  179. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "result" : "not exist is null" }
  180. { "_id" : ObjectId("5989b692b759cf05ab1c7196"), "result" : "not exist is null" }
  181. { "_id" : ObjectId("5989b692b759cf05ab1c7197"), "result" : "not exist is null" }
  182. { "_id" : ObjectId("5989b692b759cf05ab1c7198"), "result" : "not exist is null" }
  183. { "_id" : ObjectId("5989b692b759cf05ab1c7199"), "result" : "not exist is null" }
  184. { "_id" : ObjectId("5989b692b759cf05ab1c719a"), "result" : "not exist is null" }

$group: 分组

使用_id指定要分组的键,要分组的键也可以是多个,使用其他自定义的字段用于统计

  1. // 过滤 + 分组
  2. // _id:用于指定要分组的键,注意键的前面要使用$,意思是引用该字段的值
  3. // 这里的count可以是任意自定义的键,后面用于描述应该怎么统计分组后的数据,这里是分组的每个文档加1,用于统计某个作者总共发表了多少篇文章
  4. // 返回的结果: _id: 要分组的键,自定义字段:要统计的结果
  5. > db.articles.aggregate(
  6. {"$match": {"like": {"$gte" : 10} }},
  7. {"$group": {"_id": "$author", "count": {"$sum": 1}}}
  8. )
  9. { "_id" : "lisi", "count" : 2 }
  10. { "_id" : "zhangsan", "count" : 3 }
  11. >
  12. // 对多个字段进行分组
  13. > db.articles.aggregate(
  14. {"$match": {"like": {"$gte" : 10} }},
  15. {"$group": {"_id": {"author": "$author", "like": "$like"}, "count": {"$sum": 1}}}
  16. )
  17. { "_id" : { "author" : "zhangsan", "like" : 30 }, "count" : 1 }
  18. { "_id" : { "author" : "zhangsan", "like" : 20 }, "count" : 1 }
  19. { "_id" : { "author" : "lisi", "like" : 40 }, "count" : 1 }
  20. { "_id" : { "author" : "lisi", "like" : 30 }, "count" : 1 }
  21. { "_id" : { "author" : "zhangsan", "like" : 10 }, "count" : 1 }
  22. // $sum: value, 对每个文档相加value,求和
  23. // $avg: value, 求平均数
  24. > db.articles.aggregate( {"$group": {"_id": "$author", "count": {"$avg": "$like"}}} )
  25. { "_id" : "mengday", "count" : 8 }
  26. { "_id" : "lisi", "count" : 35 }
  27. { "_id" : "zhangsan", "count" : 20 }
  28. >
  29. // $max: value, 求分组中某个字段最大的值
  30. > db.articles.aggregate( {"$group": {"_id": "$author", "count": {"$max": "$like"}}} )
  31. { "_id" : "mengday", "count" : 8 }
  32. { "_id" : "lisi", "count" : 40 }
  33. { "_id" : "zhangsan", "count" : 30 }
  34. >
  35. // $min: value, 求分组中某个字段最小的值
  36. > db.articles.aggregate( {"$group": {"_id": "$author", "count": {"$min": "$like"}}} )
  37. { "_id" : "mengday", "count" : 8 }
  38. { "_id" : "lisi", "count" : 30 }
  39. { "_id" : "zhangsan", "count" : 10 }
  40. >
  41. // $first: value, 求分组中第一个值
  42. > db.articles.aggregate( {"$group": {"_id": "$author", "count": {"$first": "$like"}}} )
  43. { "_id" : "mengday", "count" : 8 }
  44. { "_id" : "lisi", "count" : 40 }
  45. { "_id" : "zhangsan", "count" : 10 }
  46. // $last: value, 求分组中第一个值
  47. > db.articles.aggregate( {"$group": {"_id": "$author", "count": {"$last": "$like"}}} )
  48. { "_id" : "mengday", "count" : 8 }
  49. { "_id" : "lisi", "count" : 30 }
  50. { "_id" : "zhangsan", "count" : 20 }
  51. // $addToSet: exp, 将分组后的每个文档指定的值放在set集合中,集合不重复,无序
  52. > db.articles.aggregate( {"$group": {"_id": "$author", "like": {"$addToSet": "$like"}}} )
  53. { "_id" : "mengday", "like" : [ 8 ] }
  54. { "_id" : "lisi", "like" : [ 30, 40 ] }
  55. { "_id" : "zhangsan", "like" : [ 30, 20, 10 ] }
  56. // $push: exp, 将分组后的每个文档指定的值放在数组中,允许重复,有序
  57. > db.articles.aggregate( {"$group": {"_id": "$author", "like": {"$push": "$like"}}} )
  58. { "_id" : "mengday", "like" : [ 8 ] }
  59. { "_id" : "lisi", "like" : [ 30, 40 ] }
  60. { "_id" : "zhangsan", "like" : [ 30, 20, 10 ] }
  61. > db.articles.findOne()
  62. {
  63. "_id" : ObjectId("5989b692b759cf05ab1c7195"),
  64. "author" : "zhangsan",
  65. "title" : "Java Primer",
  66. "like" : 10,
  67. "publishDate" : ISODate("2017-08-09T00:50:36.198Z"),
  68. "comments" : [
  69. "good",
  70. "very good"
  71. ]
  72. }
  73. // $unwind: 将数组中的每个值拆分成一个单独的文档
  74. > db.articles.aggregate( {"$unwind": "$comments"} )
  75. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "author" : "zhangsan", "title" : "Java Primer", "like" : 10, "publishDate" : ISODate("2017-08-09T00:50 :36.198Z"), "comments" : "good" }
  76. { "_id" : ObjectId("5989b692b759cf05ab1c7195"), "author" : "zhangsan", "title" : "Java Primer", "like" : 10, "publishDate" : ISODate("2017-08-09T00:50 :36.198Z"), "comments" : "very good" }
  77. >

$sort: 用于对上一次处理的结果进行排序,1:升续 -1:降续

  1. > db.articles.aggregate(
  2. {"$match": {"like": {"$gte" : 10} }},
  3. {"$group": {"_id": "$author", "count": {"$sum": 1}}},
  4. {"$sort": {"count": -1}}
  5. )
  6. { "_id" : "zhangsan", "count" : 3 }
  7. { "_id" : "lisi", "count" : 2 }

$limit: 用于条数限制

  1. > db.articles.aggregate(
  2. {"$match": {"like": {"$gte" : 10} }},
  3. {"$group": {"_id": "$author", "count": {"$sum": 1}}},
  4. {"$sort": {"count": -1}},
  5. {"$limit": 1}
  6. )
  7. { "_id" : "zhangsan", "count" : 3 }

$skip: 跳过前N条文档,和limit结合可用于分页

  1. db.articles.aggregate(
  2. {"$match": {"like": {"$gte" : 10} }},
  3. {"$group": {"_id": "$author", "count": {"$sum": 1}}},
  4. {"$sort": {"count": -1}},
  5. {"$skip": 1},
  6. {"$limit": 1}
  7. )
  8. // 以上聚合首先通过$match: 过滤掉不匹配的文档,接着讲满足条件的文档交给$group进行分组,分组后将将分组后的结果交给$sort进行排序,然后将排序后的结果交给$skip处理,跳过前1条,把剩下的文档交给$limit处理,获取最终的聚合结果。
  9. 聚合框架,就是将上一个操作符处理的结果交个下一个操作符继续处理(这就是Linux中的管道操作),可以使用任意多个操作符,同一个操作符也可以使用多次

聚合命名

count(): 求数量

  1. > db.articles.count()
  2. 6
  3. > db.articles.count({"author": "zhangsan"})
  4. 3

distinct(“filedname”)

求某个字段不同的值

  1. > db.articles.distinct("author")
  2. [ "zhangsan", "lisi", "mengday" ]

group()

// key: 用于指定要分组的键
// initial: 对于分组统计的字段设置键名和初始值
// reduce: 循环每个分组中的每个文档,一组循环完了会继续下一组,

  1. > db.articles.group({
  2. "key": "author",
  3. "initial": {"sum": 0},
  4. "reduce": function(doc, prev) {
  5. if(doc.like > 10) {
  6. prev.sum += 1;
  7. }}
  8. })
  9. [ { "sum" : 4 } ]
  10. // key: {"fieldname": true} 和 key: "filedname" 有很大的区别:
  11. // key: {"fieldname": true} 分组的结果会带有每个分组的字段,reduce每循环一组就会重新执行以下初始化initial
  12. // key: "filedname", 结果中不带分组的字段,而且好像当执行下一个分组的时候不会进行初始化,下一组的统计结果是在上一组的基础上完成的,所有组循环完了只得到最后的结果,这种方式感觉不像是平常见到的分组
  13. > db.articles.group({
  14. "key": {"author": true},
  15. "initial": {"sum": 0},
  16. "reduce": function(doc, prev) {
  17. if(doc.like > 10) {
  18. prev.sum += 1;
  19. }
  20. }
  21. })
  22. [
  23. {
  24. "author" : "zhangsan",
  25. "sum" : 2
  26. },
  27. {
  28. "author" : "lisi",
  29. "sum" : 2
  30. },
  31. {
  32. "author" : "mengday",
  33. "sum" : 0
  34. }
  35. ]
  36. // condition: 用于分组前筛选掉不满足条件的文档
  37. > db.articles.group({
  38. "key": {"author": true},
  39. "initial": {"sum": 0},
  40. "reduce": function(doc, prev) {
  41. if(doc.like > 10) {
  42. prev.sum += 1;
  43. }
  44. },
  45. "condition":{"author": {"$ne": "mengday"}}
  46. })
  47. [
  48. {
  49. "author" : "zhangsan",
  50. "sum" : 2
  51. },
  52. {
  53. "author" : "lisi",
  54. "sum" : 2
  55. }
  56. ]
  57. // finalize: 用于对分组后的结果进一步处理,每组都会调用finalize
  58. > db.articles.group({
  59. "key": {"author": true},
  60. "initial": {"sum": 0},
  61. "reduce": function(doc, prev) {
  62. if(doc.like > 10) {
  63. prev.sum += 1;
  64. }
  65. },
  66. "condition":{"author": {"$ne": "mengday"}},
  67. "finalize": function(doc){
  68. if(doc.author == "zhangsan"){
  69. delete doc
  70. }
  71. }
  72. })
  73. [
  74. {
  75. "author" : "zhangsan",
  76. "sum" : 3
  77. },
  78. {
  79. "author" : "lisi",
  80. "sum" : 2
  81. }
  82. ]

使用聚合框架aggregate({“$group: {}”}) 和 group()都可以进行分组,但group好像没有聚合框架这么灵活,如group()不能投射,分组后的结果不知道怎么过滤掉一些不满足条件的值,怎么对分组后的结果排序,分页等,这些都可以在聚合框架中使用管道操作很轻易的实现,group()适用于简单的分组,聚合框架适用于更复杂的分组处理(注意:聚合框架分组只是其中的一个管道,也就是说分组只是聚合框架的其中一个功能而不是说聚合框架就是用来分组的)

MapReduce

mapReduce强大、灵活,使用Javascript作为查询语言能够表达任意复杂的逻辑,但是也非常慢,不应该用于实施数据分析。能够在多台服务器上并行执行,当所有机器都完成是,再讲这些结果合并起来再统一聚合一下

http://www.cnblogs.com/Joe-T/p/4264910.html

相关文章