# !bin/bash
# Maximum number of rows to export/total rows in table, set a bit higher if live data being written
MAX=500000000
# Size of each export batch
STEP=1000000
mkdir -p parts
for (( c=0; c<= $MAX; c = c + $STEP ))
do
mysql --port 3306 --protocol=TCP -h <rdshostname> -u <username> -p<password> --quick --database=<db> -e "select column1, column2, column3 <table> order by <timestamp> ASC limit $STEP offset $c" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > export$c.csv
# split down in to chunks under questdbs 65k line limit
split -d -l 64999 --additional-suffix=.csv $FILE_NAME.csv ./parts/$FILE_NAME
done
# print out import statements to a file
for i in $(ls -v ./parts); do echo "COPY reading from '$i';" >> import.sql; done;
另一种稍微不同的方法可能会更快,具体取决于您已建立的索引,该方法是按月逐个查看数据:
# !bin/bash
START_YEAR=2020
END_YEAR=2022
mkdir -p parts
for (( YEAR=$START_YEAR; YEAR<=$END_YEAR; YEAR++ ))
do
for (( MONTH=1; MONTH<=12; MONTH++ ))
do
NEXT_MONTH=1
let NEXT_YEAR=$YEAR+1
if [ $MONTH -lt 12 ]
then
let NEXT_MONTH=$MONTH+1
NEXT_YEAR=$YEAR
fi
FILE_NAME="export-$YEAR-$MONTH-to-$NEXT_YEAR-$NEXT_MONTH"
mysql --port 3306 --protocol=TCP -h <rdshost> -u app -p<password> --quick --database=<database> -e "select <column1>, <column2>, round(UNIX_TIMESTAMP(<dateColumn>)) * 1000000 as date from <table> where <table>.<dateColumn> >= '$YEAR-$MONTH-01 00:00:00' and table.<dateColumn> < '$NEXT_YEAR-$NEXT_MONTH-01 00:00:00' order by <table>.<dateColumn> ASC" | sed 's/\t/","/g;s/^/"/;s/$/"/;s/\n//g' > $FILE_NAME.csv
# split down in to chunks under questdbs 65k line limit
split -d -l 64999 --additional-suffix=.csv $FILE_NAME.csv ./parts/$FILE_NAME
done
done
# print out import statements to a file
for i in $(ls -v ./parts); do echo "COPY reading from '$i';" >> import.sql; done;
您可以使用SELECT ... INTO OUTFILE语法将数据导出到服务器上的一个文件中。然后,您可以使用mysql命令行客户端连接到RDS示例并从服务器检索该文件。唯一的小问题是,mysql不会连接到RDS示例,除非该示例位于VPC中,因此,如果它不是VPC,您需要首先连接到堡垒主机。然后从那里连接到RDS示例。SELECT * FROM mydb.mytable INTO OUTFILE '/tmp/mytable.csv' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';然后,您可以从服务器获取文件:mysql -uusername -p -hmyrds.rds.amazonaws.com -P3306当mysql命令行客户端出现提示时,您可以使用SELECT命令检索文件:SELECT LOAD_FILE('/tmp/mytable.csv');然后,可以使用以下命令将输出通过管道传输到文件:SELECT LOAD_FILE('/tmp/mytable.csv') INTO OUTFILE '/tmp/mytable_out.csv';然后您可以使用mysql命令行客户端连接到questDB示例并加载数据。如果您想检索特定列,则可以在RDS服务器上创建文件时在SELECT命令中指定列名:SELECT column1, column2, column3 FROM mydb.mytable INTO OUTFILE '/tmp/mytable.csv' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';个
3条答案
按热度按时间uidvcgyl1#
终于在这一点的帮助下想通了:Exporting a table from Amazon RDS into a CSV file
只要您有某种顺序列(例如,自动递增的整数PK或日期列),此解决方案就能很好地工作。如果您有大量数据,请确保您的日期列已编制索引!
另一种稍微不同的方法可能会更快,具体取决于您已建立的索引,该方法是按月逐个查看数据:
上述脚本将输出一个
import.sql
,其中包含导入数据所需的所有sql语句。请参阅:https://questdb.io/docs/guides/importing-data/lx0bsm1f2#
编辑:此解决方案仅在导出整个表时有效,在导出特定列时无效
您可以尝试使用mysqldump和额外的参数进行CSV转换。AWS documents how to use mysqldump with RDS,您可以看到at this stackoverflow question how to use extra params to convert into CSV。
我在这里引用的相关部分,从最后一个链接(因为有很多答案和评论)
xeufq47z3#
您可以使用
SELECT ... INTO OUTFILE
语法将数据导出到服务器上的一个文件中。然后,您可以使用mysql
命令行客户端连接到RDS示例并从服务器检索该文件。唯一的小问题是,mysql
不会连接到RDS示例,除非该示例位于VPC中,因此,如果它不是VPC,您需要首先连接到堡垒主机。然后从那里连接到RDS示例。SELECT * FROM mydb.mytable INTO OUTFILE '/tmp/mytable.csv' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';
然后,您可以从服务器获取文件:mysql -uusername -p -hmyrds.rds.amazonaws.com -P3306
当mysql
命令行客户端出现提示时,您可以使用SELECT
命令检索文件:SELECT LOAD_FILE('/tmp/mytable.csv');
然后,可以使用以下命令将输出通过管道传输到文件:SELECT LOAD_FILE('/tmp/mytable.csv') INTO OUTFILE '/tmp/mytable_out.csv';
然后您可以使用mysql
命令行客户端连接到questDB示例并加载数据。如果您想检索特定列,则可以在RDS服务器上创建文件时在SELECT
命令中指定列名:SELECT column1, column2, column3 FROM mydb.mytable INTO OUTFILE '/tmp/mytable.csv' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n';
个