87 lines
2.7 KiB
Python
87 lines
2.7 KiB
Python
import numpy as np
|
||
import pandas as pd
|
||
import anndata as ad
|
||
import os
|
||
|
||
np.random.seed(42)
|
||
|
||
# 参数配置
|
||
stages = [("CS7", 500), ("CS7.5", 750), ("CS8", 1000), ("CS9", 1500)]
|
||
genes = ["SOX2", "NANOG", "T", "POU5F1", "OTX2", "ZIC2", "FOXA2", "LEFTY1"]
|
||
layers = {
|
||
"Ectoderm": 0.1, # 外层
|
||
"Mesoderm": 0.085, # 中层
|
||
"Endoderm": 0.07, # 内层
|
||
}
|
||
notochord_ratio = 0.05 # 脊索/原条占总细胞数的比例
|
||
|
||
# 保存路径
|
||
script_path = os.path.dirname(os.path.realpath(__file__))
|
||
|
||
for stage, total_cells in stages:
|
||
n_noto = int(total_cells * notochord_ratio)
|
||
n_layer = total_cells - n_noto
|
||
num_layers = len(layers)
|
||
# 使用 Dirichlet 生成随机且不相等的概率,再用多项分布采样;若计数仍相等则重采样
|
||
for _ in range(100):
|
||
probs = np.random.dirichlet(np.ones(num_layers))
|
||
n_per_layer = np.random.multinomial(n_layer, probs)
|
||
if len(set(n_per_layer)) == num_layers:
|
||
break
|
||
|
||
positions = []
|
||
cell_types = []
|
||
ids = []
|
||
|
||
# 椭球比例参数
|
||
a, b, c = 1, 0.8, 0.5
|
||
|
||
for i, (layer_name, scale) in enumerate(layers.items()):
|
||
N = n_per_layer[i]
|
||
phi = np.random.uniform(0, np.pi, N)
|
||
theta = np.random.uniform(0, 2*np.pi, N)
|
||
r = scale # 层的半径比例
|
||
|
||
x = a * r * np.sin(phi) * np.cos(theta)
|
||
y = b * r * np.sin(phi) * np.sin(theta)
|
||
z = c * r * np.cos(phi)
|
||
|
||
for xi, yi, zi in zip(x, y, z):
|
||
positions.append([xi, yi, zi])
|
||
cell_types.append(layer_name)
|
||
ids.append(f"{stage}_{layer_name}_{len(ids)}")
|
||
|
||
# 添加脊索/原条样结构(Z轴中间偏下的细胞)
|
||
x = np.random.normal(0, 0.01, n_noto)
|
||
y = np.random.normal(0, 0.01, n_noto)
|
||
z = np.linspace(-0.1, 0.1, n_noto)
|
||
|
||
for xi, yi, zi in zip(x, y, z):
|
||
positions.append([xi, yi, zi])
|
||
cell_types.append("Notochord")
|
||
ids.append(f"{stage}_Noto_{len(ids)}")
|
||
|
||
# 构造表达矩阵
|
||
pos_arr = np.array(positions)
|
||
X_data = []
|
||
for gene in genes:
|
||
center = np.random.randn(3) * 2
|
||
spread = np.random.uniform(5, 12)
|
||
expr = np.exp(-np.sum((pos_arr - center)**2, axis=1) / (2 * spread**2))
|
||
expr += 0.05 * np.random.rand(len(pos_arr)) # 加一点噪声
|
||
X_data.append(expr)
|
||
X = np.vstack(X_data).T # shape (cells, genes)
|
||
|
||
# 构造 AnnData
|
||
obs = pd.DataFrame({
|
||
"cell_id": ids,
|
||
"cell_type": cell_types,
|
||
"stage": stage
|
||
}, index=ids)
|
||
var = pd.DataFrame(index=genes)
|
||
obsm = {"spatial": pos_arr}
|
||
|
||
adata = ad.AnnData(X=X, obs=obs, var=var, obsm=obsm)
|
||
adata.write_h5ad(os.path.join(script_path, f"{stage}.h5ad"))
|
||
print(f"Saved: {stage}.h5ad with {len(ids)} cells.")
|