环境:
硬件:
vmware中的8core、32gb访客操作系统
vsan中的数据为1tb
软件
操作系统:centos 7 64位
jdk版本:11.0.7
postgresql版本:13
timescaledb版本:2.3.0
postgresql jdbc版本:42.2.18
我已经运行了timescaledb tune来优化postgresql.conf
我的数据非常庞大。旧表包含832818510条7天的记录。我遵循了timescaledb的数据迁移指示
我的迁移步骤是:
CREATE TABLE test_lp (LIKE lp_interval INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING INDEXES);
SELECT create_hypertable('test_lp', 'read_time', chunk_time_interval => INTERVAL '1 hour');
INSERT INTO test_lp select * from lp_interval
表架构为:
create table test_lp
(
meterno varchar(11) not null,
reading_id varchar(60) not null,
read_time timestamp not null,
version bigint,
ami_record_num bigint,
flags bigint,
fail_code bigint,
value double precision,
validation_status varchar(255),
custno varchar(11) not null,
insert_date timestamp not null,
constraint test_lp_pkey
primary key (custno, meterno, reading_id, read_time)
);
alter table test_lp
owner to appuser;
create index test_lp_read_time_idx
on test_lp (read_time desc);
总体平均插入速度约为50000条记录/秒。看起来不错。
然后,我开始在timescaledb所在的机器上运行我的简单java程序。java程序使用了一个连接,并将更多数据插入到新表test_lp中。java程序对每1000条插入的记录执行一次提交。
计算插入速度后,java的平均插入速度仅为每秒530条记录。
我还尝试清理表“test_lp”并重新运行java程序。程序的插入速度仍然和上面一样慢
为什么java的插入速度如此之慢?我错过什么了吗?
下面是我的postgresql.conf。我用 show all
在psql中
allow_system_table_mods = off
application_name = PostgreSQL JDBC Driver
archive_cleanup_command =
archive_command = (disabled)
archive_mode = off
archive_timeout = 0
array_nulls = on
authentication_timeout = 1min
autovacuum = on
autovacuum_analyze_scale_factor = 0.1
autovacuum_analyze_threshold = 50
autovacuum_freeze_max_age = 200000000
autovacuum_max_workers = 10
autovacuum_multixact_freeze_max_age = 400000000
autovacuum_naptime = 10s
autovacuum_vacuum_cost_delay = 2ms
autovacuum_vacuum_cost_limit = -1
autovacuum_vacuum_insert_scale_factor = 0.2
autovacuum_vacuum_insert_threshold = 1000
autovacuum_vacuum_scale_factor = 0.2
autovacuum_vacuum_threshold = 50
autovacuum_work_mem = -1
backend_flush_after = 0
backslash_quote = safe_encoding
backtrace_functions =
bgwriter_delay = 200ms
bgwriter_flush_after = 0
bgwriter_lru_maxpages = 100
bgwriter_lru_multiplier = 2
block_size = 8192
bonjour = off
bonjour_name =
bytea_output = hex
check_function_bodies = on
checkpoint_completion_target = 0.9
checkpoint_flush_after = 256kB
checkpoint_timeout = 15min
checkpoint_warning = 30s
client_encoding = UTF8
client_min_messages = notice
cluster_name =
commit_delay = 0
commit_siblings = 5
constraint_exclusion = partition
cpu_index_tuple_cost = 0.005
cpu_operator_cost = 0.0025
cpu_tuple_cost = 0.01
cursor_tuple_fraction = 0.1
data_checksums = off
data_directory_mode = 0700
data_sync_retry = off
DateStyle = ISO, YMD
db_user_namespace = off
deadlock_timeout = 1s
debug_assertions = off
debug_pretty_print = on
debug_print_parse = off
debug_print_plan = off
debug_print_rewritten = off
default_statistics_target = 500
default_table_access_method = heap
default_tablespace =
default_text_search_config = pg_catalog.simple
default_transaction_deferrable = off
default_transaction_isolation = read committed
default_transaction_read_only = off
dynamic_shared_memory_type = posix
effective_cache_size = 22GB
effective_io_concurrency = 200
enable_bitmapscan = on
enable_gathermerge = on
enable_hashagg = on
enable_hashjoin = on
enable_incremental_sort = on
enable_indexonlyscan = on
enable_indexscan = on
enable_material = on
enable_mergejoin = on
enable_nestloop = on
enable_parallel_append = on
enable_parallel_hash = on
enable_partition_pruning = on
enable_partitionwise_aggregate = on
enable_partitionwise_join = on
enable_seqscan = on
enable_sort = on
enable_tidscan = on
escape_string_warning = on
event_source = PostgreSQL
exit_on_error = off
extra_float_digits = 3
force_parallel_mode = off
from_collapse_limit = 8
fsync = on
full_page_writes = on
geqo = on
geqo_effort = 5
geqo_generations = 0
geqo_pool_size = 0
geqo_seed = 0
geqo_selection_bias = 2
geqo_threshold = 12
gin_fuzzy_search_limit = 0
gin_pending_list_limit = 4MB
hash_mem_multiplier = 1
hot_standby = on
hot_standby_feedback = off
huge_pages = try
idle_in_transaction_session_timeout = 0
ignore_checksum_failure = off
ignore_invalid_pages = off
ignore_system_indexes = off
integer_datetimes = on
IntervalStyle = postgres
jit = on
jit_above_cost = 100000
jit_debugging_support = off
jit_dump_bitcode = off
jit_expressions = on
jit_inline_above_cost = 500000
jit_optimize_above_cost = 500000
jit_profiling_support = off
jit_tuple_deforming = on
join_collapse_limit = 8
krb_caseins_users = off
lc_collate = zh_TW.UTF-8
lc_ctype = zh_TW.UTF-8
lc_messages = zh_TW.UTF-8
lc_monetary = zh_TW.UTF-8
lc_numeric = zh_TW.UTF-8
lc_time = zh_TW.UTF-8
listen_addresses = *
lo_compat_privileges = off
local_preload_libraries =
lock_timeout = 0
log_autovacuum_min_duration = -1
log_checkpoints = off
log_connections = off
log_destination = stderr
log_disconnections = off
log_duration = off
log_error_verbosity = default
log_executor_stats = off
log_file_mode = 0600
log_hostname = off
log_line_prefix = %m [%p]
log_lock_waits = off
log_min_duration_sample = -1
log_min_duration_statement = -1
log_min_error_statement = error
log_min_messages = warning
log_parameter_max_length = -1
log_parameter_max_length_on_error = 0
log_parser_stats = off
log_planner_stats = off
log_replication_commands = off
log_rotation_age = 1d
log_rotation_size = 0
log_statement = none
log_statement_sample_rate = 1
log_statement_stats = off
log_temp_files = -1
log_timezone = Asia/Taipei
log_transaction_sample_rate = 0
log_truncate_on_rotation = on
logging_collector = on
logical_decoding_work_mem = 64MB
maintenance_io_concurrency = 10
maintenance_work_mem = 420MB
max_connections = 100
max_files_per_process = 1000
max_function_args = 100
max_identifier_length = 63
max_index_keys = 32
max_locks_per_transaction = 256
max_logical_replication_workers = 4
max_parallel_maintenance_workers = 4
max_parallel_workers = 8
max_parallel_workers_per_gather = 4
max_pred_locks_per_page = 2
max_pred_locks_per_relation = -2
max_pred_locks_per_transaction = 64
max_prepared_transactions = 0
max_replication_slots = 10
max_slot_wal_keep_size = -1
max_stack_depth = 2MB
max_standby_archive_delay = 30s
max_standby_streaming_delay = 30s
max_sync_workers_per_subscription = 2
max_wal_senders = 0
max_wal_size = 32GB
max_worker_processes = 19
min_parallel_index_scan_size = 512kB
min_parallel_table_scan_size = 8MB
min_wal_size = 16GB
old_snapshot_threshold = -1
operator_precedence_warning = off
parallel_leader_participation = on
parallel_setup_cost = 1000
parallel_tuple_cost = 0.1
password_encryption = scram-sha-256
pg_stat_statements.max = 5000
pg_stat_statements.save = on
pg_stat_statements.track = top
pg_stat_statements.track_planning = off
pg_stat_statements.track_utility = on
plan_cache_mode = auto
port = 5432
post_auth_delay = 0
pre_auth_delay = 0
primary_slot_name =
promote_trigger_file =
quote_all_identifiers = off
random_page_cost = 1.1
recovery_end_command =
recovery_min_apply_delay = 0
recovery_target =
recovery_target_action = pause
recovery_target_inclusive = on
recovery_target_lsn =
recovery_target_name =
recovery_target_time =
recovery_target_timeline = latest
recovery_target_xid =
restart_after_crash = on
restore_command =
row_security = on
search_path = public
segment_size = 1GB
seq_page_cost = 1
server_encoding = UTF8
server_version = 13.3
server_version_num = 130003
session_replication_role = origin
shared_buffers = 8GB
shared_memory_type = mmap
ssl = off
ssl_ca_file =
ssl_cert_file = server.crt
ssl_crl_file =
ssl_key_file = server.key
ssl_library = OpenSSL
ssl_passphrase_command_supports_reload = off
ssl_prefer_server_ciphers = on
standard_conforming_strings = on
statement_timeout = 0
superuser_reserved_connections = 3
synchronize_seqscans = on
synchronous_commit = off
synchronous_standby_names =
syslog_facility = local0
syslog_ident = postgres
syslog_sequence_numbers = on
syslog_split_messages = on
tcp_keepalives_count = 9
tcp_keepalives_idle = 7200
tcp_keepalives_interval = 75
tcp_user_timeout = 0
temp_buffers = 8MB
temp_file_limit = -1
temp_tablespaces =
timescaledb.disable_load = off
timescaledb.enable_2pc = on
timescaledb.enable_async_append = on
timescaledb.enable_cagg_reorder_groupby = on
timescaledb.enable_chunk_append = on
timescaledb.enable_client_ddl_on_data_nodes = off
timescaledb.enable_connection_binary_data = on
timescaledb.enable_constraint_aware_append = on
timescaledb.enable_constraint_exclusion = on
timescaledb.enable_optimizations = on
timescaledb.enable_ordered_append = on
timescaledb.enable_parallel_chunk_append = on
timescaledb.enable_per_data_node_queries = on
timescaledb.enable_qual_propagation = on
timescaledb.enable_remote_explain = off
timescaledb.enable_runtime_exclusion = on
timescaledb.enable_skipscan = on
timescaledb.enable_transparent_decompression = on
timescaledb.last_tuned = 2021-07-05T14:31:04+08:00
timescaledb.last_tuned_version = 0.11.0
timescaledb.license = timescale
timescaledb.max_background_workers = 8
timescaledb.max_cached_chunks_per_hypertable = 100
timescaledb.max_insert_batch_size = 1000
timescaledb.max_open_chunks_per_insert = 1342
timescaledb.remote_data_fetcher = cursor
timescaledb.restoring = off
timescaledb.telemetry_level = basic
timescaledb_telemetry.cloud =
TimeZone = UTC
timezone_abbreviations = Default
trace_notify = off
trace_recovery_messages = log
trace_sort = off
track_activities = on
track_activity_query_size = 1kB
track_commit_timestamp = off
track_counts = on
track_functions = pl
track_io_timing = on
transaction_deferrable = off
transaction_isolation = read committed
transaction_read_only = off
transform_null_equals = off
unix_socket_group =
unix_socket_permissions = 0777
update_process_title = on
vacuum_cleanup_index_scale_factor = 0.1
vacuum_cost_delay = 0
vacuum_cost_limit = 200
vacuum_cost_page_dirty = 20
vacuum_cost_page_hit = 1
vacuum_cost_page_miss = 10
vacuum_defer_cleanup_age = 0
vacuum_freeze_min_age = 50000000
vacuum_freeze_table_age = 150000000
vacuum_multixact_freeze_min_age = 5000000
vacuum_multixact_freeze_table_age = 150000000
wal_block_size = 8192
wal_buffers = 16MB
wal_compression = on
wal_consistency_checking =
wal_init_zero = on
wal_keep_size = 0
wal_level = replica
wal_log_hints = off
wal_receiver_create_temp_slot = off
wal_receiver_status_interval = 10s
wal_receiver_timeout = 1min
wal_recycle = on
wal_retrieve_retry_interval = 5s
wal_segment_size = 16MB
wal_sender_timeout = 1min
wal_skip_threshold = 2MB
wal_sync_method = fdatasync
wal_writer_delay = 200ms
wal_writer_flush_after = 1MB
work_mem = 32MB
xmlbinary = base64
xmloption = content
zero_damaged_pages = off
2条答案
按热度按时间zte4gxcn1#
因此,正如我所记得的,java jdbc驱动程序有一个特殊的标志,您需要将其添加到数据库的连接字符串中,以使其重写插入,从而使其成为多值插入,而不是批处理单值插入。这种改变可以使时间刻度/postgres的插入速度提高10-20倍(至少)。
该参数称为rewritebatchedinserts,您可以只添加
rewriteBatchedInserts=true
到连接字符串,或者有些驱动程序允许您以更编程的方式添加它。我想https://vladmihalcea.com/postgresql-multi-row-insert-rewritebatchedinserts-property/ 有更多的信息。这是一个有点奇怪的事情,你几乎只是做了一个咒语,突然一切工作…希望这有帮助!
oknwwptz2#
数据加载器是否在同一网络中运行?你考虑过网络延迟吗?
我建议您在机器中尝试tsbs基准测试工具,以了解问题是出在java客户端还是机器上:https://github.com/timescale/tsbs