mysql:从每个组中选择n%的随机样本并更新一个字段

rur96b6h  于 2021-06-19  发布在  Mysql
关注(0)|答案(2)|浏览(313)

我有一个名为tbltaskrecord的mysql表,我必须为每个userid为每个reviewdate随机抽取10%的样本,并将auditstatus字段更新为“check”。这是表脚本和一些示例数据:

CREATE TABLE tbltaskrecord(
ReviewDate  DATE  NOT NULL
,UserId      VARCHAR(50) NOT NULL
,TaskId      VARCHAR(50) NOT NULL PRIMARY KEY
,AuditStatus VARCHAR(50)
);

INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000001',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000002',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000003',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000004',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000005',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000006',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000007',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000008',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000009',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000010',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000011',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000012',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000013',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000014',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000015',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000016',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000017',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000018',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000019',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000020',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000021',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000022',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000023',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000024',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000025',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000026',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000027',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000028',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000029',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000030',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000031',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000032',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000033',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000034',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000035',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000036',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000037',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000038',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000039',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000040',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000041',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000042',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000043',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000044',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000045',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000046',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000047',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000048',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000049',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000050',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000051',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000052',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000053',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000054',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000055',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000056',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000057',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000058',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000059',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000060',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000061',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000062',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000063',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000064',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000065',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000066',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000067',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000068',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000069',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000070',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000071',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000072',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000073',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000074',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000075',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000076',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000077',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000078',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000079',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000080',NULL);

人口:

+-------------+-----------+-----------+-------------+
|   UserId    | 9/19/2018 | 9/20/2018 | Grand Total |
+-------------+-----------+-----------+-------------+
| jdoe1       |        20 |        20 |          40 |
| jdoe2       |        20 |        20 |          40 |
| Grand Total |        40 |        40 |          80 |
+-------------+-----------+-----------+-------------+

样品:

+-------------+-----------+-----------+-------------+
|   UserId    | 9/19/2018 | 9/20/2018 | Grand Total |
+-------------+-----------+-----------+-------------+
| jdoe1       |         2 |         2 |           4 |
| jdoe2       |         2 |         2 |           4 |
| Grand Total |         4 |         4 |           8 |
+-------------+-----------+-----------+-------------+

这就是我想要得到的:

+------------+--------+------------+-------------+
| ReviewDate | UserId |   TaskId   | AuditStatus |
+------------+--------+------------+-------------+
| 2018-09-19 | jdoe1  | R110000008 | Check       |
| 2018-09-19 | jdoe1  | R110000020 | Check       |
| 2018-09-19 | jdoe2  | R110000029 | Check       |
| 2018-09-19 | jdoe2  | R110000037 | Check       |
| 2018-09-20 | jdoe1  | R110000052 | Check       |
| 2018-09-20 | jdoe1  | R110000057 | Check       |
| 2018-09-20 | jdoe2  | R110000070 | Check       |
| 2018-09-20 | jdoe2  | R110000074 | Check       |
+------------+--------+------------+-------------+

以下是我尝试选择的:

select * from tbltaskrecord WHERE RAND() < .10

但在4次试验中,它给出了5行、6行、9行和8行。这是最后一次试验的结果。jdoe1只有2个,其中没有一个是2018-09-19。

+------------+--------+-------------+-------------+
| ReviewDate | UserId |   TaskId    | AuditStatus |
+------------+--------+-------------+-------------+
| 2018-09-20 |  jdoe1 |  R110000043 |  NULL       |
| 2018-09-20 |  jdoe1 |  R110000052 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000022 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000028 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000031 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000062 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000064 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000080 |  NULL       |
+------------+--------+-------------+-------------+

我该怎么做?

muk1a3rh

muk1a3rh1#

这是最后一个存储过程。

CREATE PROCEDURE `spPickSample`(IN `pReviewDate` VARCHAR(50))
BEGIN
  UPDATE tbltaskrecord JOIN (SELECT a.ReviewDate 
       , a.UserId 
       , a.TaskId     
       , a.AuditStatus
    FROM 
       ( SELECT x.*
              , CASE WHEN @prev = userid THEN @i:=@i+1 ELSE @i:=1 END i
              , @prev := userid
           FROM
              ( SELECT *
                  FROM tbltaskrecord
                   WHERE ReviewDate = pReviewDate
                 ORDER
                    BY userid,RAND()
              ) x -- my understanding is that this bit shouldn't work. But it does.
              , (SELECT @prev:=null,@i:=0) vars
          ORDER
             BY userid
              , i
       ) a
    JOIN
       (SELECT userid, COUNT(*)/10 pct FROM tbltaskrecord WHERE ReviewDate = pReviewDate GROUP BY userid) b
      ON b.userid = a.userid
     AND b.pct >= a.i) q ON tbltaskrecord.TaskId=q.TaskId SET tbltaskrecord.AuditStatus='Check';
END
tyg4sfes

tyg4sfes2#

这个解决方案是摇摇欲坠的边缘是一个'黑客'-但它超出了我的薪酬等级说,它是否真的越过这条线。。。
不管是好是坏,这里有一个主意。。。

SELECT a.ReviewDate 
     , a.UserId 
     , a.TaskId     
     , a.AuditStatus
  FROM 
     ( SELECT x.*
            , CASE WHEN @prev = userid THEN @i:=@i+1 ELSE @i:=1 END i
            , @prev := userid
         FROM
            ( SELECT *
                FROM tbltaskrecord 
               ORDER
                  BY userid,RAND()
            ) x -- my understanding is that this bit shouldn't work. But it does.
            , (SELECT @prev:=null,@i:=0) vars
        ORDER
           BY userid
            , i
     ) a
  JOIN
     (SELECT userid, COUNT(*)/10 pct FROM tbltaskrecord GROUP BY userid) b
    ON b.userid = a.userid
   AND b.pct >= a.i;

+------------+--------+------------+-------------+
| ReviewDate | UserId | TaskId     | AuditStatus |
+------------+--------+------------+-------------+
| 2018-09-20 | jdoe1  | R110000046 | NULL        |
| 2018-09-20 | jdoe1  | R110000042 | NULL        |
| 2018-09-19 | jdoe1  | R110000012 | NULL        |
| 2018-09-19 | jdoe1  | R110000016 | NULL        |
| 2018-09-20 | jdoe2  | R110000077 | NULL        |
| 2018-09-19 | jdoe2  | R110000034 | NULL        |
| 2018-09-19 | jdoe2  | R110000022 | NULL        |
| 2018-09-19 | jdoe2  | R110000026 | NULL        |
+------------+--------+------------+-------------+
8 rows in set (0.01 sec)

sqlfiddle公司http://sqlfiddle.com/#!9/bd3256/1号

相关问题