digital-embryo/embryo-backend/Data/FakeEmbryo.py
2025-07-26 11:41:02 +08:00

81 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
import pandas as pd
import anndata as ad
import os
np.random.seed(42)
# 参数配置
stages = [("CS7", 500), ("CS8", 1000), ("CS9", 1500)]
genes = ["SOX2", "NANOG", "T", "POU5F1", "OTX2", "ZIC2", "FOXA2", "LEFTY1"]
layers = {
"Ectoderm": 1.0, # 外层
"Mesoderm": 0.85, # 中层
"Endoderm": 0.7, # 内层
}
notochord_ratio = 0.05 # 脊索/原条占总细胞数的比例
# 保存路径
script_path = os.path.dirname(os.path.realpath(__file__))
for stage, total_cells in stages:
n_noto = int(total_cells * notochord_ratio)
n_layer = total_cells - n_noto
n_per_layer = n_layer // len(layers)
positions = []
cell_types = []
ids = []
# 椭球比例参数
a, b, c = 10, 8, 5
for i, (layer_name, scale) in enumerate(layers.items()):
N = n_per_layer
phi = np.random.uniform(0, np.pi, N)
theta = np.random.uniform(0, 2*np.pi, N)
r = scale # 层的半径比例
x = a * r * np.sin(phi) * np.cos(theta)
y = b * r * np.sin(phi) * np.sin(theta)
z = c * r * np.cos(phi)
for xi, yi, zi in zip(x, y, z):
positions.append([xi, yi, zi])
cell_types.append(layer_name)
ids.append(f"{stage}_{layer_name}_{len(ids)}")
# 添加脊索/原条样结构Z轴中间偏下的细胞
x = np.random.normal(0, 0.5, n_noto)
y = np.random.normal(0, 0.5, n_noto)
z = np.linspace(-3, 3, n_noto)
for xi, yi, zi in zip(x, y, z):
positions.append([xi, yi, zi])
cell_types.append("Notochord")
ids.append(f"{stage}_Noto_{len(ids)}")
# 构造表达矩阵
pos_arr = np.array(positions)
X_data = []
for gene in genes:
center = np.random.randn(3) * 2
spread = np.random.uniform(5, 12)
expr = np.exp(-np.sum((pos_arr - center)**2, axis=1) / (2 * spread**2))
expr += 0.05 * np.random.rand(len(pos_arr)) # 加一点噪声
X_data.append(expr)
X = np.vstack(X_data).T # shape (cells, genes)
# 构造 AnnData
obs = pd.DataFrame({
"cell_id": ids,
"cell_type": cell_types,
"stage": stage
}, index=ids)
var = pd.DataFrame(index=genes)
obsm = {"spatial": pos_arr}
adata = ad.AnnData(X=X, obs=obs, var=var, obsm=obsm)
adata.write_h5ad(os.path.join(script_path, f"{stage}.h5ad"))
print(f"Saved: {stage}.h5ad with {len(ids)} cells.")