如何从命令行简化aws DynamoDB查询JSON输出?

gwo2fgha  于 2022-12-24  发布在  其他
关注(0)|答案(6)|浏览(100)

我正在使用The AWS Command Line Interface for DynamoDB
当我们查询一个项目时,我们会得到一个非常详细的JSON输出,如下所示(它是从get-item构建的,为了几乎详尽(NULL类型被省略)aws command line help

{
    "Count": 1, 
    "Items": [
        {
            "Id": {
                "S": "app1"
            }, 
            "Parameters": {
                "M": {
                    "nfs": {
                        "M": {
                            "IP" : {
                                "S" : "172.16.0.178"
                            }, 
                            "defaultPath": {
                                "S": "/mnt/ebs/"
                            },
                            "key": {
                                "B": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
                            },
                            "activated": {
                                "BOOL": true 
                            }
                        }
                    },
                    "ws" : {
                        "M" : {
                            "number" : {
                                "N" : "5"
                            },
                            "values" : {
                                "L" : [
                                    { "S" : "12253456346346"},
                                    { "S" : "23452353463464"},
                                    { "S" : "23523453461232"},
                                    { "S" : "34645745675675"},
                                    { "S" : "46456745757575"}
                                ]
                            }
                        } 
                    }
                }
            },
            "Oldtypes": {
                "typeSS" : {"SS" : ["foo", "bar", "baz"]},
                "typeNS" : {"NS" : ["0", "1", "2", "3", "4", "5"]},
                "typeBS" : {"BS" : ["VGVybWluYXRvcgo=", "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK", "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=", "VGVybWluYXRvciA0OiBTYWx2YXRpb24K","VGVybWluYXRvciA1OiBHZW5lc2lzCg=="]}
            }
        }
    ], 
    "ScannedCount": 1, 
    "ConsumedCapacity": null
}

有没有什么方法可以为Items器件获得更简单的输出?

{
    "ConsumedCapacity": null,
    "Count": 1,
    "Items": [
        {
            "Id": "app1",
            "Parameters": {
                "nfs": {
                    "IP": "172.16.0.178",
                    "activated": true,
                    "defaultPath": "/mnt/ebs/",
                    "key": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
                },
                "ws": {
                    "number": 5,
                    "values": ["12253456346346","23452353463464","23523453461232","34645745675675","46456745757575"]
                }
            },
            "Oldtypes": {
                "typeBS": ["VGVybWluYXRvcgo=", "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK", "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=", "VGVybWluYXRvciA0OiBTYWx2YXRpb24K", "VGVybWluYXRvciA1OiBHZW5lc2lzCg=="],
                "typeNS": [0, 1, 2, 3, 4, 5],
                "typeSS": ["foo","bar","baz"]
            }
        }
    ],
    "ScannedCount": 1
}

dynamodb - AWS CLI 1.7.10 documentation中没有任何帮助。
我们必须从命令行得到结果。如果需要的话,我愿意使用其他命令行工具,如jq,但这样的jqMap对我来说似乎很复杂。

更新1:基于jq的解决方案(借助DanielH的答案)

使用jq很容易,但不太漂亮,您可以执行以下操作:

$> aws dynamodb query --table-name ConfigCatalog --key-conditions '{ "Id" : {"AttributeValueList": [{"S":"app1"}], "ComparisonOperator": "EQ"}}' | jq -r '.Items[0].Parameters.M."nfs#IP".S'

结果将为:172.16.0.178
jq-r选项提供原始输出。

更新2:基于jq的解决方案(在@jeff-mercado的帮助下)

以下是Jeff Mercado jq函数的更新和注解版本,用于解组DynamoDB输出。它将给予预期输出:

$> cat unmarshal_dynamodb.jq
def unmarshal_dynamodb:
  # DynamoDB string type
  (objects | .S)

  # DynamoDB blob type
  // (objects | .B)

  # DynamoDB number type
  // (objects | .N | strings | tonumber)

  # DynamoDB boolean type
  // (objects | .BOOL)

  # DynamoDB map type, recursion on each item
  // (objects | .M | objects | with_entries(.value |= unmarshal_dynamodb))

  # DynamoDB list type, recursion on each item
  // (objects | .L | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type SS, string set
  // (objects | .SS | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type NS, number set
  // (objects | .NS | arrays | map(tonumber))

  # DynamoDB typed list type BS, blob set
  // (objects | .BS | arrays | map(unmarshal_dynamodb))

  # managing others DynamoDB output entries: "Count", "Items", "ScannedCount" and "ConsumedCapcity"
  // (objects | with_entries(.value |= unmarshal_dynamodb))
  // (arrays | map(unmarshal_dynamodb))

  # leaves values
  // .
  ;
unmarshal_dynamodb

如果您将DynamoDB查询输出保存到一个文件中,比如ddb-query-result.json,您可以执行以获得所需的结果:

$> jq -f unmarshal_dynamodb.jq ddb-query-result.json
2ic8powd

2ic8powd1#

你可以用一个精心设计的函数递归地解码这些值。看起来键的名称对应于一个类型:

S -> string
N -> number
M -> map

如果可能的话,处理你想要解码的每一个案例,否则过滤掉它。你可以利用各种类型过滤器和替代运算符来完成这一任务。
x一个一个一个一个x一个一个二个一个x一个一个三个一个

7nbnzgx9

7nbnzgx92#

另一种实现post目标的方法是使用node.js扩展,如node-dynamodbdynamodb-marshaler,并构建一个node命令行工具。
使用commander package构建node.js命令行应用程序的有趣教程:Creating Your First Node.js Command-line Application
下面是一个快速而简单的oneliner,它从stdin读取一条记录,并以简化的形式打印出来:

node -e 'console.log(JSON.stringify(require("aws-sdk").DynamoDB.Converter.unmarshall(JSON.parse(require("fs").readFileSync(0, "utf-8")))))'
sbtkgmzw

sbtkgmzw3#

下面是可以处理空值的jq解决方案的更新版本。

$> cat unmarshal_dynamodb.jq
def unmarshal_dynamodb:
  # null
  walk( if type == "object" and .NULL then . |= null else . end ) |

  # DynamoDB string type
  (objects | .S)

  # DynamoDB blob type
  // (objects | .B)

  # DynamoDB number type
  // (objects | .N | strings | tonumber)

  # DynamoDB boolean type
  // (objects | .BOOL)

  # DynamoDB map type, recursion on each item
  // (objects | .M | objects | with_entries(.value |= unmarshal_dynamodb))

  # DynamoDB list type, recursion on each item
  // (objects | .L | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type SS, string set
  // (objects | .SS | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type NS, number set
  // (objects | .NS | arrays | map(tonumber))

  # DynamoDB typed list type BS, blob set
  // (objects | .BS | arrays | map(unmarshal_dynamodb))

  # managing others DynamoDB output entries: "Count", "Items", "ScannedCount" and "ConsumedCapcity"
  // (objects | with_entries(.value |= unmarshal_dynamodb))
  // (arrays | map(unmarshal_dynamodb))

  # leaves values
  // .
  ;
unmarshal_dynamodb
$> jq -f unmarshal_dynamodb.jq ddb-query-result.json

原始版本归功于@jeff-mercado和@herve。

mrwjdhj3

mrwjdhj34#

据我所知,没有其他输出像您发布的“冗长”的输出。因此,我认为,您不能避免中间工具,如jqsed
在这篇文章中有几个转换原始发电机数据的建议:
从DynamoDB导出数据
也许您可以将其中一个脚本与jqsed结合使用

nhaq1z21

nhaq1z215#

这是另一种方法。这可能有点残酷,但它展示了基本思想。

def unwanted:    ["B","BOOL","M","S","L","BS","SS"];
def fixpath(p):  [ p[] | select( unwanted[[.]]==[] ) ];
def fixnum(p;v):
    if   p[-2]=="NS" then [p[:-2]+p[-1:],(v|tonumber)]
    elif p[-1]=="N" then [p[:-1], (v|tonumber)]
    else [p,v] end;

reduce (tostream|select(length==2)) as [$p,$v] (
    {}
  ; fixnum(fixpath($p);$v) as [$fp,$fv]      
  | setpath($fp;$fv)
)

在线试用!
样品运行(假设过滤器在filter.jq中,数据在data.json中)

$ jq -M -f filter.jq data.json
{
  "ConsumedCapacity": null,
  "Count": 1,
  "Items": [
    {
      "Id": "app1",
      "Oldtypes": {
        "typeBS": [
          "VGVybWluYXRvcgo=",
          "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK",
          "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=",
          "VGVybWluYXRvciA0OiBTYWx2YXRpb24K",
          "VGVybWluYXRvciA1OiBHZW5lc2lzCg=="
        ],
        "typeNS": [
          0,
          1,
          2,
          3,
          4,
          5
        ],
        "typeSS": [
          "foo",
          "bar",
          "baz"
        ]
      },
      "Parameters": {
        "nfs": {
          "IP": "172.16.0.178",
          "activated": true,
          "defaultPath": "/mnt/ebs/",
          "key": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
        },
        "ws": {
          "number": 5,
          "values": [
            "12253456346346",
            "23452353463464",
            "23523453461232",
            "34645745675675",
            "46456745757575"
          ]
        }
      }
    }
  ],
  "ScannedCount": 1
}
41zrol4v

41zrol4v6#

下面是node中的一个脚本。
我将文件命名为reformat.js,但您可以随意命名它

'use strict';

/**
 * This script will parse the AWS dynamo CLI JSON response into JS.
 * This parses out the type keys in the objects.
 */

const fs = require('fs');

const rawData = fs.readFileSync('response.json'); // Import the raw response from the dynamoDB CLI query
const response = JSON.parse(rawData); // Parse to JS to make it easier to work with.

function shallowFormatData(data){
  // Loop through the object and replace the Type key with the value.
  for(const key in data){
    const innerRawObject = data[key]
    const innerKeys = Object.keys(innerRawObject)
    innerKeys.forEach(innerKey => {
      const innerFormattedObject = innerRawObject[innerKey]
      if(typeof innerFormattedObject == 'object'){
        data[key] = shallowFormatData(innerFormattedObject) // Recursively call formatData if there are nested objects
      }else{
        // Null items come back with a type of "NULL" and value of true. we want to set the value to null if the type is "NULL"
        data[key] = innerKey == 'NULL' ? null : innerFormattedObject
      }
    })
  }
  return data
}

// this only gets the Items and not the meta data.
const result = response.Items.map(item => {
  return shallowFormatData(item)
})

console.dir(result, {'maxArrayLength': null}); // There is a default limit on how big a console.log can be, this removes that limit.

步骤1)通过CLI运行dynamoDB查询并将其保存到JSON文件中。要保存来自CLI的响应,只需添加> somefile.json

// Example: Run in CLI

$ aws dynamodb query --table-name stage_requests-service_FoxEvents \
 --key-condition-expression "PK = :v1" \
 --expression-attribute-values file://expression-attributes.json > response.json

expression-attributes.json

{
  ":v1": {"S": "SOMEVAL"}
}

如果您需要有关如何查询DynamoDB的更多信息,请查看文档www.example.com中的这些示例https://docs.aws.amazon.com/cli/latest/reference/dynamodb/query.html#examples
现在您已经有了需要重新格式化的数据的JSON文件,请从终端运行format.js脚本
步骤2)

// Run this in your terminal
$ node reformat.js > formatted.js

如果您想要JSON对象输出,只需在脚本末尾的console.dir中放入JSON.stringify(result),就应该有一个干净的JS对象输出

相关问题