geopandas读取geojson文件并写入Spatialite数据库

关于geojson文件的生成见:https://coloraven.github.io/2023/12/GEOJSON文件的生成 部分参考:https://www.giacomodebidda.com/posts/export-a-geodataframe-to-spatialite/

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import sqlite3
import pandas as pd
import geopandas as gpd
import uuid

# %%
pd.options.display.max_rows = 10
gdf = gpd.read_file('world.geojson') # `world.geojson`(大小:`23+MB`)生成自`https://geojson-maps.ash.ms`,选择`High resolution`加`All`。
# gdf.crs
# 为每条记录生成一个唯一的 UUID,便于后面更新时找到关联点
gdf['uuid'] = gdf.apply(lambda _: "".join(str(uuid.uuid4()).split('-')), axis=1)
# gdf.head()
# 设置坐标系
# gdf.crs = {'init': 'epsg:3003', 'no_defs': True}
# 绘图
# gdf.plot(figsize=(8, 8), facecolor='w', edgecolor='k')

# %%
DB_PATH = "test_save.db"
# 先创建普通df
df = gdf.drop(columns=['geometry'])
# 在数据库中创建普通表
with sqlite3.connect(DB_PATH) as conn:
    df.to_sql("world_geo", conn, if_exists='replace', index=False)
    # 新增地理列
    conn.enable_load_extension(True)
    conn.load_extension("mod_spatialite")
    conn.execute("SELECT InitSpatialMetaData(1);")
    conn.execute(
        """
        SELECT AddGeometryColumn('world_geo', 'geometry', 4326, 'GEOMETRY', 2);
        """
    )

# %%
with sqlite3.connect(DB_PATH) as conn:
    conn.enable_load_extension(True)
    conn.load_extension("mod_spatialite")
    # cur = conn.cursor()
    # for index, row in gdf.iterrows():
    #     wkt = row['geometry'].wkt
    #     cur.execute("UPDATE world_geo SET geometry = GeomFromText(?, 4326) WHERE uuid = ?", (wkt, row['uuid']))
    # if index <1:
    #     print('geometry',type(row['geometry']))
    #     print("wkt",type(wkt))
    # 准备包含所有更新的数据的列表
    data_to_update = [(row['geometry'].wkt, row['uuid']) for _, row in gdf.iterrows()]
    # 使用 executemany 一次性执行所有更新
    conn.executemany("UPDATE world_geo SET geometry = GeomFromText(?, 4326) WHERE uuid = ?", data_to_update)
    conn.commit()
    # 写入完毕创建空间索引
    conn.execute("SELECT CreateSpatialIndex('your_table', 'geometry_column');")
    conn.commit()
conn.close()
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import sqlite3,time
DB_PATH = "test_save.db"
with sqlite3.connect(DB_PATH) as conn:
    conn.enable_load_extension(True)
    conn.load_extension("mod_spatialite")
    lat, lon = 42.58220940617247, 92.5976562500021
    # SQL query to find the feature containing the point
    sql_query = f"""
    SELECT name_zh
    FROM world_geo
    WHERE ST_Contains(geometry, MakePoint({lon}, {lat}, 4326))
    """
    # Execute the query
    start = time.time()
    for i in range(1000):
        cur = conn.cursor()
        cur.execute(sql_query)
        result = cur.fetchone()
    print((time.time() - start)/1000)
# 索引前0.01720004343986511
# 索引后0.017798675537109374
# Close the connection
conn.close()
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
package main

import (
	"database/sql"
	"fmt"
	"log"
	"sync"
	"time"

	"github.com/mattn/go-sqlite3"
)

func main() {
	sql.Register("sqlite3_with_spatialite",
		&sqlite3.SQLiteDriver{
			Extensions: []string{"mod_spatialite"},
		})
	// os.Remove("foo.db")
	db, err := sql.Open("sqlite3_with_spatialite", "test_save.db")
	if err != nil {
		log.Panic(err)
	}
	defer db.Close()
	db.SetMaxOpenConns(100)
	db.Exec("PRAGMA journal_mode=WAL;")

	// q = "SELECT spatialite_version();"
	lat, lon := 92.5976562500021, 42.58220940617247
	start := time.Now()
	var wg sync.WaitGroup
	for i := 0; i < 100; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			var r interface{}
			q := "SELECT name_zh FROM world_geo WHERE ST_Contains(geometry, MakePoint(?,?, 4326));"
			db.QueryRow(q, lat, lon).Scan(&r)
			fmt.Println(r)
		}()
	}
	wg.Wait()
	fmt.Println(time.Now().Sub(start))
}