mysql 如何在shell脚本中比较并获取两个sQL文件中的不同行

由于尝试识别数据库中的差异与识别基本文件中的差异并不完全相同（* 记录可能不在相同的序列中，但在确定差异时没有相关性 ），我想我将尝试一下逻辑，比较两个定义格式的CSV（ 可以很容易地适应真实的生活中的任何格式 *）。为了实现这一点，我剥离了所识别的每一行输入的最后一个字符，因为它们可以变化，而它们应该与比较结果没有关系。
如果您注意到，我还定义了两个变量“COMPARISON_START_FIELD”和“COMPARISON_END_FIELD"，它们允许您定义与“near-match”条件评估相关的字段，显示可能发生更改/修改/篡改的位置。可以设想修改这些引用，以便与应该考虑用于该确定的字段位置的指定数组一起工作。
还要注意，可以修改“${DBG}”值，以消除阶段I到III显示的输出。也可以将其更改为命令行参数标志，以便进行编程控制。
显然，您可以修改语句以获得您选择的任何格式的输出。

#!/bin/sh

DBG=1
COMPARISON_START_FIELD=2
COMPARISON_END_FIELD=4

#QUESTION:  https://stackoverflow.com/questions/74803126/how-to-compare-and-get-diffrent-rows-at-two-sql-files-in-shell-script
#REFERENCE:  https://www.sample-videos.com/download-sample-sql.php   (for sample load files for 10/100/1000 etc. records)

FILE1="./test_92__input_1.txt"
cat >"${FILE1}" <<"EnDoFiNpUt"
(1, 'rogers63', 'david', 'john', 'Female', 'e6a33eee180b07e563d74fee8c2c66b8', 1),
(2, 'mike28', 'rogers', 'paul', 'Male', '2e7dc6b8a1598f4f75c3eaa47958ee2f', 1),
(3, 'rivera92', 'david', 'john', 'Male', '1c3a8e03f448d211904161a6f5849b68', 1),
(4, 'ross95', 'maria', 'sanders', 'Male', '62f0a68a4179c5cdd997189760cbcf18', 1),
(5, 'paul85', 'morris', 'miller', 'Female', '61bd060b07bddfecccea56a82b850ecf', 1),
(6, 'smith34', 'daniel', 'michael', 'Female', '7055b3d9f5cb2829c26cd7e0e601cde5', 1),
(8, 'daniel53', 'mark', 'mike', 'Male', '299cbf7171ad1b2967408ed200b4e26c', 1),
(9, 'brooks80', 'morgan', 'maria', 'Female', 'aa736a35dc15934d67c0a999dccff8f6', 1),
(10, 'morgan65', 'paul', 'miller', 'Female', 'a28dca31f5aa5792e1cefd1dfd098569', 1);
EnDoFiNpUt

FILE2="./test_92__input_2.txt"
cat >"${FILE2}" <<"EnDoFiNpUt"
(1, 'rogers63', 'david', 'john', 'Female', 'e6a33eee180b07e563d74fee8c2c66b8', 1),
(3, 'rivera92', 'daniella', 'john', 'Female', '1c3a8e03f448d211904161a6f5849b68', 1),
(4, 'ross95', 'maria', 'sanders', 'Male', '62f0a68a4179c5cdd997189760cbcf18', 1),
(5, 'paul85', 'morris', 'miller', 'Female', '61bd060b07bddfecccea56a82b850ecf', 1),
(7, 'james84', 'sanders', 'paul', 'Female', 'b7f72d6eb92b45458020748c8d1a3573', 1),
(8, 'daniel53', 'mark', 'mike', 'Male', '299cbf7171ad1b2967408ed200b4e26c', 1),
(9, 'brooks80', 'morgan', 'maria', 'Female', 'aa736a35dc15934d67c0a999dccff8f6', 1),
(10, 'morgan65', 'paul', 'miller', 'Female', 'a28dca31f5aa5792e1cefd1dfd098569', 1),
(11, 'scorpio17', 'regis', 'philbin', 'male', '5aa5792e1a28dca31fcefd1dfd098569', 1);
EnDoFiNpUt

ls -l ${FILE1} ${FILE2}
wc -l ${FILE1} ${FILE2}

###
### Method #2 - Comparison of arrays
###
awk \
    -v dbg="${DBG}"                 \
    -v f1="${FILE1}"                \
    -v f2="${FILE2}"                \
    -v compStart="${COMPARISON_START_FIELD}"    \
    -v compEnd="${COMPARISON_END_FIELD}"        \
    'BEGIN{
    print "\nPHASE I - Loading FILE1 into array ..."
    ###
    ### Load up the first file into arrays
    ###
    index1f=0 ;
    index1d=0 ;
    split( "", sql1full ) ;     # set to empty array

    if( dbg == 1 ){ print f1 ; } ;
    while( getline < f1 ){
        index1f++ ;
        i=index1f ;
        sql1full[i,1]=substr( $0, 1, length($0)-1 ) ;
        sql1full[i,2]=0 ;
        if( dbg == 1 ){ printf("\t\t Line %s ...\n", i ) ; } ;
    } ;
    printf("\t File 1 captured as array (size= %s) ...\n", index1f ) ;

    index2f=0 ;
    index2d=0 ;
    split( "", sql2full ) ;     # set to empty array
    split( "", components ) ;   # RESET to empty array
    split( "", sql2details ) ;  # set to empty array

    print "\nPHASE II - Loading FILE2 into array "
    print "    AND    First pair matching"
    print "    AND    Identifying unique items in FILE2 ..."

    indexI=0
    split( "", matched ) ;      # set to empty array
}{
    index2f++ ;
    j=index2f ;
    if( dbg == 1 ){ printf("\t ================ j=%s ================\n", j ) ; } ;

    sql2full[j,1]=substr( $0, 1, length($0)-1 ) ;
    sql2full[j,2]=0 ;

    nom=1 ;
    for( i=1 ; i <= index1f ; i++ ){
        ### Legend:
        ### 0 = not classified
        ### 1 = only copy
        ### 2 = duplicate (matched)
        ### 3 = partial match
        if( sql1full[i,2] == 2 ){
            if( dbg == 1 ){
                printf("\t\t SKIPPED| sql1full[%s,1] already matched on previous pass ...\n", i ) ;
            } ;
        }else{
            if( sql2full[j,1] == sql1full[i,1] ){
                nom=0
                sql1full[i,2]=2 ;
                sql2full[j,2]=2 ;
                indexI++ ;
                matched[indexI,1]=i ;
                matched[indexI,2]=j ;
                if( dbg == 1 ){
                    printf("\t MATCHED| sql2full[%s,1] same as sql1full[%s,1] ...\n", j, i ) ;
                } ;
            } ;
        } ;

    } ;
    if( nom == 1 ){
        if( dbg == 1 ){
            printf("\t UNIQUE| sql2full[%s,1] has no match ...\n", j ) ;
        } ;
        sql2full[j,2]=1 ;
    } ;
}END{
    print "\nPHASE III - Identifying unique items in FILE1 ..."

    for( i=1 ; i <= index1f ; i++ ){
        if( dbg == 1 ){ printf("\t ================ i=%s ================\n", i ) ; } ;
        ### Legend:
        ### 0 = not classified
        ### 1 = only copy
        ### 2 = duplicate (matched)
        ### 3 = partial match

        if( sql1full[i,2] == 2 ){
            if( dbg == 1 ){
                printf("\t\t SKIPPED| sql1full[%s,1] already matched on previous pass ...\n", i ) ;
            } ;
        }else{
            nom=1 ;
            for( j=1 ; j <= index2f ; j++ ){
                if( sql2full[j,2] != 2 ){
                    if( sql2full[j,1] == sql1full[i,1] ){
                        nom=0 ;
                        sql1full[i,2]=2 ;
                        sql2full[j,2]=2 ;
                        indexI++ ;
                        matched[indexI,1]=i ;
                        matched[indexI,2]=j ;
                        if( dbg == 1 ){
                            printf("\t MATCHED| sql1full[%s,1] same as sql2full[%s,1] ...\n", i, j ) ;
                        } ;
                    } ;
                } ;
            } ;
            if( nom == 1 ){
                sql1full[i,2]=1 ;
                if( dbg == 1 ){
                    printf("\t UNIQUE| sql1full[%s,1] has no match ...\n", i ) ;
                } ;
            } ;
        } ;
    } ;

    print ""
    print "\nPHASE IV - Identifying Lines which are partial matches ..."

    indexK=0 ;
    split("", near) ;

    for( i=1 ; i <= index1f ; i++ ){
        if( sql1full[i,2] == 1 ){
            if( dbg == 1 ){
                printf("\t Examining sql1full[%s,1] ...\n", i ) ;
            } ;

            n=index( sql1full[i,1], "(" ) ;
            rem=substr( sql1full[i,1], n+1 ) ;

            n=index( rem, ")" ) ;
            rem=substr( rem, 1, n-1 ) ;

            num1=split( rem, sql1details ) ;
            if( dbg == 1 ){
                printf("\t\t num1 = %s ...\n", num1 ) ;
            } ;

            for( j=1 ; j <= index2f ; j++ ){
                if( sql2full[j,2] == 1 ){
                    if( dbg == 1 ){
                        printf("\t\t Examining sql2full[%s,1] ...\n", j ) ;
                    } ;

                    n=index( sql2full[j,1], "(" ) ;
                    rem=substr( sql2full[j,1], n+1 ) ;

                    n=index( rem, ")" ) ;
                    rem=substr( rem, 1, n-1 ) ;

                    num2=split( rem, sql2details ) ;
                    if( dbg == 1 ){
                        printf("\t\t num2 = %s ...\n", num2 ) ;
                    } ;

                    if( num1 == num2 ){
                        pm=0
                        for( f1=compStart ; f1 <= compEnd ; f1++ ){
                            if( sql1details[f1] == sql2details[f1] ){
                                pm=pm+1 ;
                                if( dbg == 1 ){
                                    printf("\t\t\t %s|  %s == %s ...\n", f1, sql1details[f1], sql2details[f1] ) ;
                                } ;
                            }else{
                                if( dbg == 1 ){
                                    printf("\t\t\t %s|  %s != %s ...\n", f1, sql1details[f1], sql2details[f1] ) ;
                                } ;
                            } ;
                        } ;
                        if( pm != 0 ){
                            indexK++ ;
                            sql1full[i,2]=3 ;
                            sql2full[j,2]=3 ;
                            near[indexK,1]=i ;
                            near[indexK,2]=j ;
                            if( dbg == 1 ){
                                printf("\t\t\t PARTIAL| sql1full[%s,1] and sql2full[%s,1] share %s fields ...\n", i, j, pm ) ;
                            } ;
                        } ;
                    }else{
                        if( dbg == 1 ){
                            printf("\t\t\t NO_MATCH| sql1full[%s,1] and sql2full[%s,1] have different field counts ...\n", i, j ) ;
                        } ;
                        
                    } ;
                } ;
            } ;
        } ;
    } ;

    print "\nPHASE IV - Report SHARED for each file ..."
    for( i=1 ; i <= indexI ; i++ ){
        printf("SHARED 1|%03d|%s\n", matched[i,1], sql1full[matched[i,1],1] ) ;
        printf("SHARED 2|%03d|%s\n", matched[i,2], sql2full[matched[i,2],1] ) ;
        print ""
    } ;
    #for( i=1 ; i <= index1f ; i++ ){
    #   if( sql1full[i,2] == 2 ){
    #       printf("SHARED 1|%03d|%s\n", i, sql1full[i,1] ) ;
    #   } ;
    #} ;
    #print ""
    #for( j=1 ; j <= index2f ; j++ ){
    #   if( sql2full[j,2] == 2 ){
    #       printf("SHARED 2|%03d|%s\n", j, sql2full[j,1] ) ;
    #   } ;
    #} ;

    print ""
    print "\nPHASE  V - Report items UNIQUE to each file ..."
    for( i=1 ; i <= index1f ; i++ ){
        if( sql1full[i,2] == 1 ){
            printf("UNIQUE 1|%03d|%s\n", i, sql1full[i,1] ) ;
        } ;
    } ;
    print ""
    for( j=1 ; j <= index2f ; j++ ){
        if( sql2full[j,2] == 1 ){
            printf("UNIQUE 2|%03d|%s\n", j, sql2full[j,1] ) ;
        } ;
    } ;

    print ""
    print "\nPHASE  VI - Report items which have shared data fields ..."
    for( k=1 ; k <= indexK ; k++ ){
        printf("PARTIAL 1|%03d|%s\n", near[k,1], sql1full[near[k,1],1] ) ;
        printf("PARTIAL 2|%03d|%s\n", near[k,2], sql2full[near[k,2],1] ) ;
        print ""
    } ;

}' "${FILE2}"
    
exit

会话输出如下所示：

-rw-rw-r-- 1 ericthered ericthered 746 Dec 29 22:48 ./test_92__input_1.txt
-rw-rw-r-- 1 ericthered ericthered 755 Dec 29 22:48 ./test_92__input_2.txt
   9 ./test_92__input_1.txt
   9 ./test_92__input_2.txt
  18 total

PHASE I - Loading FILE1 into array ...
./test_92__input_1.txt
         Line 1 ...
         Line 2 ...
         Line 3 ...
         Line 4 ...
         Line 5 ...
         Line 6 ...
         Line 7 ...
         Line 8 ...
         Line 9 ...
     File 1 captured as array (size= 9) ...

PHASE II - Loading FILE2 into array 
    AND    First pair matching
    AND    Identifying unique items in FILE2 ...
     ================ j=1 ================
     MATCHED| sql2full[1,1] same as sql1full[1,1] ...
     ================ j=2 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
     UNIQUE| sql2full[2,1] has no match ...
     ================ j=3 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
     MATCHED| sql2full[3,1] same as sql1full[4,1] ...
     ================ j=4 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
         SKIPPED| sql1full[4,1] already matched on previous pass ...
     MATCHED| sql2full[4,1] same as sql1full[5,1] ...
     ================ j=5 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
         SKIPPED| sql1full[4,1] already matched on previous pass ...
         SKIPPED| sql1full[5,1] already matched on previous pass ...
     UNIQUE| sql2full[5,1] has no match ...
     ================ j=6 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
         SKIPPED| sql1full[4,1] already matched on previous pass ...
         SKIPPED| sql1full[5,1] already matched on previous pass ...
     MATCHED| sql2full[6,1] same as sql1full[7,1] ...
     ================ j=7 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
         SKIPPED| sql1full[4,1] already matched on previous pass ...
         SKIPPED| sql1full[5,1] already matched on previous pass ...
         SKIPPED| sql1full[7,1] already matched on previous pass ...
     MATCHED| sql2full[7,1] same as sql1full[8,1] ...
     ================ j=8 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
         SKIPPED| sql1full[4,1] already matched on previous pass ...
         SKIPPED| sql1full[5,1] already matched on previous pass ...
         SKIPPED| sql1full[7,1] already matched on previous pass ...
         SKIPPED| sql1full[8,1] already matched on previous pass ...
     MATCHED| sql2full[8,1] same as sql1full[9,1] ...
     ================ j=9 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
         SKIPPED| sql1full[4,1] already matched on previous pass ...
         SKIPPED| sql1full[5,1] already matched on previous pass ...
         SKIPPED| sql1full[7,1] already matched on previous pass ...
         SKIPPED| sql1full[8,1] already matched on previous pass ...
         SKIPPED| sql1full[9,1] already matched on previous pass ...
     UNIQUE| sql2full[9,1] has no match ...

PHASE III - Identifying unique items in FILE1 ...
     ================ i=1 ================
         SKIPPED| sql1full[1,1] already matched on previous pass ...
     ================ i=2 ================
     UNIQUE| sql1full[2,1] has no match ...
     ================ i=3 ================
     UNIQUE| sql1full[3,1] has no match ...
     ================ i=4 ================
         SKIPPED| sql1full[4,1] already matched on previous pass ...
     ================ i=5 ================
         SKIPPED| sql1full[5,1] already matched on previous pass ...
     ================ i=6 ================
     UNIQUE| sql1full[6,1] has no match ...
     ================ i=7 ================
         SKIPPED| sql1full[7,1] already matched on previous pass ...
     ================ i=8 ================
         SKIPPED| sql1full[8,1] already matched on previous pass ...
     ================ i=9 ================
         SKIPPED| sql1full[9,1] already matched on previous pass ...

PHASE IV - Identifying Lines which are partial matches ...
     Examining sql1full[2,1] ...
         num1 = 7 ...
         Examining sql2full[2,1] ...
         num2 = 7 ...
             2|  'mike28', != 'rivera92', ...
             3|  'rogers', != 'daniella', ...
             4|  'paul', != 'john', ...
         Examining sql2full[5,1] ...
         num2 = 7 ...
             2|  'mike28', != 'james84', ...
             3|  'rogers', != 'sanders', ...
             4|  'paul', == 'paul', ...
             PARTIAL| sql1full[2,1] and sql2full[5,1] share 1 fields ...
         Examining sql2full[9,1] ...
         num2 = 7 ...
             2|  'mike28', != 'scorpio17', ...
             3|  'rogers', != 'regis', ...
             4|  'paul', != 'philbin', ...
     Examining sql1full[3,1] ...
         num1 = 7 ...
         Examining sql2full[2,1] ...
         num2 = 7 ...
             2|  'rivera92', == 'rivera92', ...
             3|  'david', != 'daniella', ...
             4|  'john', == 'john', ...
             PARTIAL| sql1full[3,1] and sql2full[2,1] share 2 fields ...
         Examining sql2full[9,1] ...
         num2 = 7 ...
             2|  'rivera92', != 'scorpio17', ...
             3|  'david', != 'regis', ...
             4|  'john', != 'philbin', ...
     Examining sql1full[6,1] ...
         num1 = 7 ...
         Examining sql2full[9,1] ...
         num2 = 7 ...
             2|  'smith34', != 'scorpio17', ...
             3|  'daniel', != 'regis', ...
             4|  'michael', != 'philbin', ...

PHASE IV - Report of matched SHARED items for each file ...
SHARED 1|001|(1, 'rogers63', 'david', 'john', 'Female', 'e6a33eee180b07e563d74fee8c2c66b8', 1)
SHARED 2|001|(1, 'rogers63', 'david', 'john', 'Female', 'e6a33eee180b07e563d74fee8c2c66b8', 1)

SHARED 1|004|(4, 'ross95', 'maria', 'sanders', 'Male', '62f0a68a4179c5cdd997189760cbcf18', 1)
SHARED 2|003|(4, 'ross95', 'maria', 'sanders', 'Male', '62f0a68a4179c5cdd997189760cbcf18', 1)

SHARED 1|005|(5, 'paul85', 'morris', 'miller', 'Female', '61bd060b07bddfecccea56a82b850ecf', 1)
SHARED 2|004|(5, 'paul85', 'morris', 'miller', 'Female', '61bd060b07bddfecccea56a82b850ecf', 1)

SHARED 1|007|(8, 'daniel53', 'mark', 'mike', 'Male', '299cbf7171ad1b2967408ed200b4e26c', 1)
SHARED 2|006|(8, 'daniel53', 'mark', 'mike', 'Male', '299cbf7171ad1b2967408ed200b4e26c', 1)

SHARED 1|008|(9, 'brooks80', 'morgan', 'maria', 'Female', 'aa736a35dc15934d67c0a999dccff8f6', 1)
SHARED 2|007|(9, 'brooks80', 'morgan', 'maria', 'Female', 'aa736a35dc15934d67c0a999dccff8f6', 1)

SHARED 1|009|(10, 'morgan65', 'paul', 'miller', 'Female', 'a28dca31f5aa5792e1cefd1dfd098569', 1)
SHARED 2|008|(10, 'morgan65', 'paul', 'miller', 'Female', 'a28dca31f5aa5792e1cefd1dfd098569', 1)


PHASE  V - Report items UNIQUE to each file ...
UNIQUE 1|006|(6, 'smith34', 'daniel', 'michael', 'Female', '7055b3d9f5cb2829c26cd7e0e601cde5', 1)

UNIQUE 2|009|(11, 'scorpio17', 'regis', 'philbin', 'male', '5aa5792e1a28dca31fcefd1dfd098569', 1)

PHASE  VI - Report items which have selected SHARED FIELDS ...
PARTIAL 1|002|(2, 'mike28', 'rogers', 'paul', 'Male', '2e7dc6b8a1598f4f75c3eaa47958ee2f', 1)
PARTIAL 2|005|(7, 'james84', 'sanders', 'paul', 'Female', 'b7f72d6eb92b45458020748c8d1a3573', 1)

PARTIAL 1|003|(3, 'rivera92', 'david', 'john', 'Male', '1c3a8e03f448d211904161a6f5849b68', 1)
PARTIAL 2|002|(3, 'rivera92', 'daniella', 'john', 'Female', '1c3a8e03f448d211904161a6f5849b68', 1)

希望有帮助。：-）

mysql 如何在shell脚本中比较并获取两个sQL文件中的不同行

1条答案

相关问题

热门标签

最新问答