1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
from faker import Faker
import random
import uuid
from datetime import datetime, timedelta
import pandas as pd
# Initialize the Faker library
fake = Faker('zh_CN')
# Number of unique users and departments
n_users = 100 # Number of unique usernames
n_departments = 5
n_records = 1000000 # Number of records to generate
# Generate unique usernames and corresponding departments
unique_users = [fake.name() for _ in range(n_users)]
departments = [fake.company() for _ in range(n_departments)]
# Generate unique user_ids for each username
username_to_ids = {username: [fake.unique.random_number(digits=4) for _ in range(random.randint(1, 3))] for username in unique_users}
# Map each user_id to a single department
user_id_to_department = {}
for username, ids in username_to_ids.items():
for user_id in ids:
user_id_to_department[user_id] = random.choice(departments)
# Generate the random data
random_data = []
for _ in range(n_records):
username = random.choice(unique_users)
user_id = random.choice(username_to_ids[username])
department = user_id_to_department[user_id]
operation_type = random.choice(["登录", "登出", "添加", "删除", "编辑", "查看", "上传", "下载", "分享", "评论", "点赞", "收藏", "回复", "更新", "搜索", "排序", "过滤", "分配", "创建", "发布", "撤销", "审核", "激活", "禁用", "备份", "恢复", "重置"])
operation_ip = fake.ipv4_private()
operation_time = datetime(2023, 1, 1) + timedelta(seconds=random.randint(0, 86400 * 30))
random_data.append([uuid.uuid4(),operation_ip,username,user_id,operation_type,department,operation_time])
# Create a DataFrame
df_random = pd.DataFrame(random_data, columns=["uuid","operation_ip","username","user_id","operation_type","department","operation_time"])
con.sql("INSERT INTO test SELECT * FROM df_random")
|