# Import libraries
import math
import numpy as np
from dataclasses import dataclass, field
from typing import List, Tuple
from pprint import pprint
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
Synthetic Clinical Trial for Down’s Syndrome
Given measured device performace metrics (such as sensitivity, selectivity, storage and sample processing variations), estimate the ROC curve. In other given some device variation data, generate the AUC value that would be achieved by the test in clinic?
In other words, simulate an observational clinical trial for Down’s syndrome.
Inputs:
Device Caliberation Curve with Error Bars, Storage and processing variation data with Error Bars
Outputs: ROC Curve and AUC value
Plan
Here is some sample code that we would eventually wish to write:
= device_model()
theoratical_max_roc, theoratical_max_auc
= device_model(input_file="./raw_data.csv")
roc, auc
plot_roc(roc)print("AUC based on current device = ", auc)
= detection_rate(roc, fpr=0.05)
dr print("Detection rate achieved at 5% FPR is ", dr)
Proposed csv format for Device Caliberation Curve
concentration,mean,std
-8.32,23.33,3.1
...
Proposed csv format for Storage and Processing Variation data
marker,elapsed_days,mean,std
bhcg,8,23.33,3.1
bhcg,10,24.53,4.2
...
Process
- ✅ Produce a model of population distribution for down syndrome
- Define probability distribution of samples
- Generate lots of samples based on the probability distribution (Age, b-hcg, papp-a, NT scan)
- Model and simulate storage and processing errors
- Model and simulate sensitivity and selectivity of our sensor
- ✅ Define a diagnostic algorithm, generate RUC curve and AUC value
Example 1. P(H) = 0.5, P(T) = 0.5 : Probability Distribution : Normal Distribution ( Average(X), SD(X) ) / Log Normal Distribution ( Average(log(X)), SD(log(X) ) 2. Random Sampling (take my distribution) -> then produce a random sample
Note
MoM: Multiples of Median is used as a gestational age normalized form of the raw values from different markers.
Modeling of population distribution for down syndrome
# Some helper lambdas to make switching bases easier to log
# Log base 10
# log = np.log10
# inv_log = lambda x: np.pow(10, x)
# Natural Logs
= np.log
log = np.exp inv_log
@dataclass
class Marker:
"""This class is used to define a particular marker. Example, b-hCG."""
str # Name of the marker, example: b-hCG
name: float # median MoM for down syndrome patients
median_mom_down: float # median MoM for control patients
median_mom_control: float = 0.0 # log of the sd of the marker's MoM values in Down Syndrome cases
log_sd_down: float = 0.0 # log of the sd of the marker's MoM values in Control cases log_sd_control:
@dataclass
class PopulationConfig:
"""
Maternal Age is Log Normally distributed.
"""
float = 27.0 # Mean/Average of maternal age in years
maternal_age_mean: float = 5.5 # SD of maternal age in years
maternal_age_sd:
float = 1/700 # Prevalence of Down syndrome in the population down_syndrome_prevalence:
# Create the configuration for Population with default values
= PopulationConfig()
pop_config pprint(pop_config)
PopulationConfig(maternal_age_mean=27.0,
maternal_age_sd=5.5,
down_syndrome_prevalence=0.0014285714285714286)
# Define the list of markers
list[Marker] = [
markers: ="Free B-hCG", median_mom_down=1.70, median_mom_control=1.01, log_sd_down=0.28, log_sd_control=0.27),
Marker(name="PAPP-A", median_mom_down=0.49, median_mom_control=1.00, log_sd_down=0.31, log_sd_control=0.25),
Marker(name="NT", median_mom_down=1.74, median_mom_control=1.01, log_sd_down=0.23, log_sd_control=0.13),
Marker(name
]
markers
[Marker(name='Free B-hCG', median_mom_down=1.7, median_mom_control=1.01, log_sd_down=0.28, log_sd_control=0.27),
Marker(name='PAPP-A', median_mom_down=0.49, median_mom_control=1.0, log_sd_down=0.31, log_sd_control=0.25),
Marker(name='NT', median_mom_down=1.74, median_mom_control=1.01, log_sd_down=0.23, log_sd_control=0.13)]
Generate lots of samples based on the probability distribution
int = 1_000_000 # Number of samples to be generated from the distribution sample_size:
# If needed, we can make make the random functions behave the same every time by choosing a seed value
42) np.random.seed(
# Randomly (normally) assign down syndrome to patients based on down syndrome prevalence (usually 1/700)
= np.random.random(sample_size) < pop_config.down_syndrome_prevalence
has_down has_down
array([False, False, False, ..., False, False, False])
= (np.sum(has_down) / sample_size)
sampled_prevalence print(f"We get the prevalence of {sampled_prevalence*100:.2f}% which is similar to the expected value of {1/7:.2f}%")
assert np.allclose(sampled_prevalence, pop_config.down_syndrome_prevalence, atol=1/2000), "Sampled prevalence should be similar to expected prevalence"
We get the prevalence of 0.14% which is similar to the expected value of 0.14%
= lambda v, tol: ((v - tol) * 100, (v + tol) * 100)
min_max
0.0014, 1/2000) min_max(
(0.09, 0.19)
Let X be a random variable with a log normal distribution \(N(\mu_X, \sigma^2_X\)). Then the \(ln(X)\) has the mean \(\mu\) and variance \(\sigma^2\).
\[ \begin{align} \mu &= ln({\frac{\mu^2_X}{\sqrt{\mu^2_X + \sigma^2_X}}}) \\ \sigma^2 &= ln(1 + \frac{\sigma^2_X}{\mu^2_X}) \end{align} \]
= log(pop_config.maternal_age_mean**2 / (np.sqrt(pop_config.maternal_age_mean**2 + pop_config.maternal_age_sd**2)))
mu = log(1 + (pop_config.maternal_age_sd ** 2 / pop_config.maternal_age_mean ** 2))
sig2 = np.sqrt(sig2)
sig
= np.random.normal(
log_maternal_ages
mu,
sig,
sample_size
)
# maternal_ages = np.random.normal(
# pop_config.maternal_age_mean,
# pop_config.maternal_age_sd,
# sample_size
# )
= 15, 50
min_age, max_age = np.clip(log_maternal_ages, np.log(min_age), np.log(max_age))
log_maternal_ages # np.clip(maternal_ages, min_age, max_age, out=maternal_ages)
print(
mu, sig )
3.275508180045729 0.20163673255932293
def summary_stats(data: np.ndarray):
print(f"X ~ N(μ, σ^2): N({np.mean(data):.4f}, {np.std(data):.4f})")
print(f"Range: {np.min(data):.4f} ≤ X ≤ {np.max(data):.4f}")
print(f"median (M): {np.median(data):.4f}\n")
inverse of log is exponential
= np.exp(log_maternal_ages)
maternal_age # maternal_age = np.clip(inv_log(log_maternal_ages), 15, 50)
# print(maternal_age)
=50) plt.hist(maternal_age, bins
(array([ 4977., 3983., 6428., 9447., 13570., 17967., 23008., 28621.,
33974., 39115., 43345., 47708., 50206., 52588., 53289., 53121.,
52011., 50169., 48005., 44773., 41435., 37650., 33731., 30576.,
26718., 23731., 20526., 17862., 15216., 13019., 10906., 9264.,
7713., 6455., 5467., 4436., 3617., 3026., 2345., 1889.,
1674., 1268., 1054., 826., 658., 540., 404., 376.,
267., 1046.]),
array([15. , 15.7, 16.4, 17.1, 17.8, 18.5, 19.2, 19.9, 20.6, 21.3, 22. ,
22.7, 23.4, 24.1, 24.8, 25.5, 26.2, 26.9, 27.6, 28.3, 29. , 29.7,
30.4, 31.1, 31.8, 32.5, 33.2, 33.9, 34.6, 35.3, 36. , 36.7, 37.4,
38.1, 38.8, 39.5, 40.2, 40.9, 41.6, 42.3, 43. , 43.7, 44.4, 45.1,
45.8, 46.5, 47.2, 47.9, 48.6, 49.3, 50. ]),
<BarContainer object of 50 artists>)
summary_stats(log_maternal_ages)# summary_stats(np.exp(log_maternal_ages))
summary_stats(inv_log(log_maternal_ages))
X ~ N(μ, σ^2): N(3.2752, 0.2013)
Range: 2.7081 ≤ X ≤ 3.9120
median (M): 3.2752
X ~ N(μ, σ^2): N(26.9903, 5.4890)
Range: 15.0000 ≤ X ≤ 50.0000
median (M): 26.4473
Log normal distribution is defined as: \[ Y = ln(X), Y \sim N(\mu, \sigma) \]
In other words, where the log of the random variable Y is normally distributed.
For a log normally distributed random variable X. The median for X is just the exponential of its mean. \[ M = e^\mu => ln(M) = \mu \]
# Example
= log(0.49)
avg avg
np.float64(-0.7133498878774648)
# Calculate mean of log10 values for all markers for both down's samples and healthy samples
= log([m.median_mom_down for m in markers]) # Mean for all markers for down's patients
mean_down # For example:
# Median MoM value of Papp-a for Down's is 0.49
# Given the above relationship between mean and median for a log-normally distributed variable,
# we can state that the Mean of Papp-a MoM values for Down's is log10(0.49) = -0.31
= log([m.median_mom_control for m in markers]) # Mean for all markers for healthy patients
mean_control
mean_down, mean_control
(array([ 0.53062825, -0.71334989, 0.55388511]),
array([0.00995033, 0. , 0.00995033]))
Covariance Matrices
Assuming that \(X_i\) for all \(i=0...n\) are independent random variables, the \(Cov(X_i, Y_j) = 0\) and hence we get the following diagnol form containing only the variances.
\[ Cov(X, X) = \begin{bmatrix} Var(X_1) & 0 & 0\\ 0 & Var(X_2) & 0\\ 0 & 0 & Var(X_3) \end{bmatrix} = \begin{bmatrix} \sigma^2(X_1) & 0 & 0\\ 0 & \sigma^2(X_2) & 0\\ 0 & 0 & \sigma^2(X_3) \end{bmatrix} \]
The covariance matrix is a diagonal matrix containing only the variances of each of the markers.
Concretely, \[ Cov(X, X) = \begin{bmatrix} Var(Pappa) & 0 & 0\\ 0 & Var(\beta hCG) & 0\\ 0 & 0 & Var(NT) \end{bmatrix} = \begin{bmatrix} \sigma^2(Pappa) & 0 & 0\\ 0 & \sigma^2(\beta hCG) & 0\\ 0 & 0 & \sigma^2(NT) \end{bmatrix} \]
= np.diag([m.log_sd_down**2 for m in markers])
variance_matrix_down = np.diag([m.log_sd_control**2 for m in markers])
variance_matrix_control
variance_matrix_down, variance_matrix_control
(array([[0.0784, 0. , 0. ],
[0. , 0.0961, 0. ],
[0. , 0. , 0.0529]]),
array([[0.0729, 0. , 0. ],
[0. , 0.0625, 0. ],
[0. , 0. , 0.0169]]))
= np.sqrt(variance_matrix_down)
sd_matrix_down = np.sqrt(variance_matrix_control)
sd_matrix_control
sd_matrix_down, sd_matrix_control
(array([[0.28, 0. , 0. ],
[0. , 0.31, 0. ],
[0. , 0. , 0.23]]),
array([[0.27, 0. , 0. ],
[0. , 0.25, 0. ],
[0. , 0. , 0.13]]))
sd_matrix_down.shape, variance_matrix_down.shape
((3, 3), (3, 3))
0.0784) np.sqrt(
np.float64(0.27999999999999997)
Correlation Matrices
for m in markers] [m.name
['Free B-hCG', 'PAPP-A', 'NT']
\[ \begin{bmatrix} Corr(\text{Free B-hCG, Free B-hCG}) & Corr(\text{Free B-hCG, PAPP-A}) & Corr(\text{Free B-hCG, NT}) \\ Corr(\text{PAPP-A, Free B-hCG}) & Corr(\text{PAPP-A, PAPP-A}) & Corr(\text{PAPP-A, NT}) \\ Corr(\text{NT, Free B-hCG}) & Corr(\text{NT, PAPP-A}) & Corr(\text{NT, NT}) \\ \end{bmatrix} \]
Note: Correlations of markers with NT is assumed to be zero as per the paper (table 3)
# Correlation between different markers for Down's samples
= np.array(
correlation_matrix_down 1., 0.191, 0.],
[[0.191, 1., 0.],
[0., 0., 1.]]
[ )
# Correlation between different markers for Healthy samples
= np.array(
correlation_matrix_control 1., 0.186, 0.],
[[0.186, 1., 0.],
[0., 0., 1.]]
[ )
Let Markers be denoted by \(M\), \[ Cov(M, M) = \sigma(M) Corr(M, M) \sigma(M) \]
# Covariance matrix of all markers for Down's patients
= sd_matrix_down @ correlation_matrix_down @ sd_matrix_down
cov_down cov_down
array([[0.0784 , 0.0165788, 0. ],
[0.0165788, 0.0961 , 0. ],
[0. , 0. , 0.0529 ]])
# Covariance matrix of all markers for Healthy patients
= sd_matrix_control @ correlation_matrix_control @ sd_matrix_control
cov_control cov_control
array([[0.0729 , 0.012555, 0. ],
[0.012555, 0.0625 , 0. ],
[0. , 0. , 0.0169 ]])
Generate Marker Values for all patient samples
# Sample the marker values for each of the down's patients
= np.random.multivariate_normal(
log_marker_values_down # Mean of all markers of all markers for Down's patients
mean_down, # Covariance matrix of all markers for Down's patients
cov_down, sum(has_down) # We want to sample these marker values for ALL the down's samples only
np.
)
#, log_marker_values_down log_marker_values_down.shape
(1436, 3)
# Sample the marker values for each of the healthy patients
= np.random.multivariate_normal(
log_marker_values_control # Mean of all markers of all markers for healthy patients
mean_control, # Covariance matrix of all markers for Healthy patients
cov_control, sum(~has_down) # We want to sample these marker values for ALL the healthy samples only
np.
)
#, log_marker_values_control log_marker_values_control.shape
(998564, 3)
Now, lets put all the marker values together into a single big matrix
= np.zeros((sample_size, len(markers)))
log_marker_values log_marker_values.shape
(1000000, 3)
= log_marker_values_down
log_marker_values[has_down] ~has_down] = log_marker_values_control log_marker_values[
# log_marker_values
Convert all the marker values from log(MoM) to MoM values
# marker_values = np.exp(log_marker_values)
= inv_log(log_marker_values)
marker_values # marker_values
def plot_scatters(df, markers):
= plt.subplots(1, 3, figsize=(20, 6))
fig, axes
= [('Free B-hCG', 'PAPP-A'), ('Free B-hCG', 'NT'), ('PAPP-A', 'NT')]
pairs
for i, (x, y) in enumerate(pairs):
# Plot healthy cases with low opacity
=df[~df['Down Syndrome']], x=x, y=y, color='cyan',
sns.scatterplot(data=0.01, ax=axes[i], label='Healthy')
alpha
# Plot Down syndrome cases with higher opacity
=df[df['Down Syndrome']], x=x, y=y, color='red',
sns.scatterplot(data=0.8, ax=axes[i], label='Down Syndrome')
alpha
f'{x} vs {y}')
axes[i].set_title(f'{x} MoM')
axes[i].set_xlabel(f'{y} MoM')
axes[i].set_ylabel(
plt.tight_layout()
plt.show()
def plot_violins(df, markers):
= plt.subplots(1, 3, figsize=(20, 6))
fig, axes = axes.flatten()
axes
for i, column in enumerate(markers):# + ['Maternal Age']):
=df, x='Down Syndrome', y=column, ax=axes[i])
sns.violinplot(dataf'Violin Plot of {column}')
axes[i].set_title('MoM' if column != 'Maternal Age' else 'Age')
axes[i].set_ylabel(
plt.tight_layout() plt.show()
# Convert your data to a pandas DataFrame for easier plotting
= pd.DataFrame(marker_values, columns=[m.name for m in markers])
df 'Maternal Age'] = maternal_age
df['Down Syndrome'] = has_down
df[
# Call the plotting functions
= [m.name for m in markers]
marker_names
plot_scatters(df, marker_names)# plot_violins(df, marker_names)
def plot_2d_distributions(df, markers):
= plt.subplots(1, 3, figsize=(20, 6))
fig, axes = [('Free B-hCG', 'PAPP-A'), ('Free B-hCG', 'NT'), ('PAPP-A', 'NT')]
pairs
for i, (x, y) in enumerate(pairs):
= axes[i]
ax
# Separate data for healthy and Down syndrome cases
= df.loc[~df['Down Syndrome'], x]
healthy_x = df.loc[~df['Down Syndrome'], y]
healthy_y = df.loc[df['Down Syndrome'], x]
down_x = df.loc[df['Down Syndrome'], y]
down_y
# Set the range for histogram
= (0, df[x].quantile(0.99))
x_range = (0, df[y].quantile(0.99))
y_range
# Create 2D histograms
= np.histogram2d(healthy_x, healthy_y, bins=50, range=[x_range, y_range])
healthy_hist, xedges, yedges = np.histogram2d(down_x, down_y, bins=[xedges, yedges])
down_hist, _, _
# Normalize histograms
= healthy_hist / healthy_hist.max()
healthy_hist = down_hist / down_hist.max()
down_hist
# Plot contours for healthy cases
ax.contour(
healthy_hist.T, =[xedges[0], xedges[-1], yedges[0], yedges[-1]],
extent=3, colors='blue', alpha=0.1)
levels
# Plot contours for Down syndrome cases
ax.contour(
down_hist.T, =[xedges[0], xedges[-1], yedges[0], yedges[-1]],
extent=3, colors='red', alpha=0.2)
levels
# Plot scatter for both groups
='lightgreen', s=1, alpha=0.05, label='Healthy')
ax.scatter(healthy_x, healthy_y, c='red', s=10, alpha=1, label='Down Syndrome')
ax.scatter(down_x, down_y, c
f'{x} vs {y}')
ax.set_title(f'{x} MoM')
ax.set_xlabel(f'{y} MoM')
ax.set_ylabel(
ax.legend()
# Set axis limits
ax.set_xlim(x_range)
ax.set_ylim(y_range)
plt.tight_layout() plt.show()
plot_2d_distributions(df, marker_names)
import plotly.graph_objects as go
import plotly.io as pio
def create_interactive_plot(marker_values, maternal_age, is_down, markers, z_axis='maternal_age'):
# pio.renderers.default = "browser"
# Separate Down syndrome and control cases
= np.where(is_down)[0]
down_indices = np.where(~is_down)[0]
control_indices
# Determine z-axis values
if z_axis == 'maternal_age':
= maternal_age
z_values = 'Maternal Age'
z_axis_title elif z_axis == 'NT':
= marker_values[:, 2]
z_values = markers[2].name
z_axis_title else:
raise ValueError("z_axis must be either 'maternal_age' or 'NT'")
# Create scatter plots for Down syndrome and control cases separately
= go.Scatter3d(
scatter_down =marker_values[down_indices, 0],
x=marker_values[down_indices, 1],
y=z_values[down_indices],
z='markers',
mode=dict(
marker=5,
size='red',
color='square',
symbol
),='Down Syndrome',
name=[f"Age: {age:.1f}, {z_axis_title}: {z:.2f}, Down Syndrome: True"
textfor age, z in zip(maternal_age[down_indices], z_values[down_indices])],
="text"
hoverinfo
)
= go.Scatter3d(
scatter_control =marker_values[control_indices, 0],
x=marker_values[control_indices, 1],
y=z_values[control_indices],
z='markers',
mode=dict(
marker=3,
size='lightgreen',
color=0.01,
opacity
),='Control',
name=[f"Age: {age:.1f}, {z_axis_title}: {z:.2f}, Down Syndrome: False"
textfor age, z in zip(maternal_age[control_indices], z_values[control_indices])],
="text"
hoverinfo
)
# Create the layout
= go.Layout(
layout =dict(
scene=markers[0].name,
xaxis_title=markers[1].name,
yaxis_title=z_axis_title,
zaxis_title
),="Down Syndrome Screening Markers",
title=900,
width=700,
height=dict(
legend="top",
yanchor=0.99,
y="left",
xanchor=0.01
x
)
)
# Create the figure and show it
= go.Figure(data=[scatter_control, scatter_down], layout=layout)
fig
fig.show()
# create_interactive_plot(marker_values, maternal_age, has_down, markers, z_axis='maternal_age')
# create_interactive_plot(marker_values, maternal_age, has_down, markers, z_axis='NT')
Diagnostic algorithm, generate RUC curve and AUC value
- Based on likelihood estimation method as described in the paper
- Estimate age based risk
= 1 / (1 + np.exp(16.2 - 0.286 * maternal_age)) # a-priori probability of down's syndrome based only on maternal age
age_risk_prob
# plt.scatter(maternal_age, age_risk_prob, c='red', s=3, alpha=0.1)
summary_stats(age_risk_prob)
X ~ N(μ, σ^2): N(0.0011, 0.0058)
Range: 0.0000 ≤ X ≤ 0.1301
median (M): 0.0002
def maternal_age_risk(age: float) -> float:
"""Calculate risk of Down syndrome based on maternal age."""
return 1 / (1 + np.exp(16.2 - 0.286 * age))
print(f"{maternal_age_risk(30)*100:.2f}%")
0.05%
from scipy.stats import norm, multivariate_normal
from sklearn.metrics import roc_curve, auc
= np.log(marker_values)
log_mom_values
= np.min(log_mom_values, axis=0), np.max(log_mom_values, axis=0)
min_val, max_val
= 100
num_sections
= np.linspace(min_val, max_val, num_sections+1)
sections
= np.ones(log_mom_values.shape[0])
lrs
for i in range(num_sections):
= sections[i]
lower = sections[i+1]
upper
= multivariate_normal.cdf(upper, mean_control, cov_control) - multivariate_normal.cdf(lower, mean_control, cov_control)
prob_control = multivariate_normal.cdf(upper, mean_down, cov_down) - multivariate_normal.cdf(lower, mean_down, cov_down)
prob_down
= prob_down / prob_control
section_lr
= np.all((log_mom_values >= lower) & (log_mom_values < upper), axis=1)
in_section = section_lr
lrs[in_section]
# age_risk = maternal_age_risk(maternal_age)
= age_risk_prob * lrs
combined_risk = combined_risk / (combined_risk + (1 - age_risk_prob))*100
risks
# np.sum(risks[df["Down Syndrome"]] > 0.0014) / len(risks[df["Down Syndrome"]])
# Calculate ROC and AUC
= roc_curve(has_down, lrs)
fpr, tpr, thresholds = auc(fpr, tpr)
roc_auc
print(f"AUC: {roc_auc:.3f}")
# Find detection rate at 5% FPR
= np.argmin(np.abs(fpr - 0.05))
idx = tpr[idx]
dr_at_5_fpr print(f"Detection rate at 5% FPR: {dr_at_5_fpr:.2%}")
AUC: 0.500
Detection rate at 5% FPR: 0.00%
# # Plot ROC curve
# plt.figure(figsize=(8, 6))
# plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
# plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Receiver Operating Characteristic (ROC) Curve')
# plt.legend(loc="lower right")
# plt.show()
for i, marker in enumerate(markers):
print(f"{marker.name}:")
print(f" Control - Median: {marker.median_mom_control:.4f}, Log SD: {marker.log_sd_control:.4f}")
print(f" Down Syndrome - Median: {marker.median_mom_down:.4f}, Log SD: {marker.log_sd_down:.4f}")
# Check actual distributions in your data
= marker_values[~has_down, i]
control_values = marker_values[has_down, i]
down_values
print(f" Actual Control - Median: {np.median(control_values):.4f}, Log SD: {np.std(np.log10(control_values)):.4f}")
print(f" Actual Down - Median: {np.median(down_values):.4f}, Log SD: {np.std(np.log10(down_values)):.4f}")
print()
Free B-hCG:
Control - Median: 1.0100, Log SD: 0.2700
Down Syndrome - Median: 1.7000, Log SD: 0.2800
Actual Control - Median: 1.0100, Log SD: 0.1173
Actual Down - Median: 1.7095, Log SD: 0.1214
PAPP-A:
Control - Median: 1.0000, Log SD: 0.2500
Down Syndrome - Median: 0.4900, Log SD: 0.3100
Actual Control - Median: 0.9998, Log SD: 0.1086
Actual Down - Median: 0.4914, Log SD: 0.1406
NT:
Control - Median: 1.0100, Log SD: 0.1300
Down Syndrome - Median: 1.7400, Log SD: 0.2300
Actual Control - Median: 1.0102, Log SD: 0.0564
Actual Down - Median: 1.7403, Log SD: 0.0992
def simple_lr(mom_values, markers):
= np.log10(mom_values)
log_mom_values = np.ones(mom_values.shape[0])
lrs
for i, marker in enumerate(markers):
= np.log10(marker.median_mom_control)
mean_control = np.log10(marker.median_mom_down)
mean_down
= norm.pdf(log_mom_values[:, i], mean_control, marker.log_sd_control)
pdf_control = norm.pdf(log_mom_values[:, i], mean_down, marker.log_sd_down)
pdf_down
*= pdf_down / pdf_control
lrs
return lrs
# Test this simple LR
= simple_lr(marker_values, markers)
simple_risks = roc_curve(has_down, simple_risks)
fpr, tpr, _ = auc(fpr, tpr)
simple_auc print(f"AUC with simple LR: {simple_auc:.3f}")
AUC with simple LR: 0.999
=(10, 6))
plt.figure(figsize~has_down]), bins=50, alpha=0.3, color="green", label='Control', density=True)
plt.hist(np.log10(simple_risks[=50, alpha=0.6, color="red", label='Down Syndrome', density=True)
plt.hist(np.log10(simple_risks[has_down]), bins'Log10 Likelihood Ratio')
plt.xlabel('Count')
plt.ylabel(
plt.legend()'Distribution of Log Likelihood Ratios')
plt.title( plt.show()
Model of the device
- For each MoM value of the patient, generate the raw value that would be measured by the device (example: ng/ml)
- Calculate median based on our device generated raw value
- Estimate MoM values based on our device’s raw values and gestational age dependent median values