import umap
import umap.plot
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from sklearn.metrics import calinski_harabasz_score as CH
from sklearn.metrics import silhouette_score as ASW
from scipy.optimize import brentq
from scipy.interpolate import make_interp_spline
from sklearn.linear_model import LinearRegression as LinReg
# initialize colors
from pride_colors import pride_colors_matplotlib
from matplotlib import cm
BI = pride_colors_matplotlib(flag='bi')
BP = cm.get_cmap("PuRd")
NW_purple = '#4E2A84'
# check installation - use the demo example
digits = load_digits()
mapper = umap.UMAP(n_neighbors=15, spread=0.5).fit(digits.data)
umap.plot.points(mapper, labels=digits.target)
<AxesSubplot:>
# try it another way
Y = umap.UMAP(min_dist=0.5, spread=2).fit_transform(digits.data)
print("Input dimensions:", digits.data.shape)
print("Output dimensions:", Y.shape)
plt.scatter(Y[:,0], Y[:,1], c=digits.target, s=0.1, cmap='Dark2')
Input dimensions: (1797, 64) Output dimensions: (1797, 2)
<matplotlib.collections.PathCollection at 0x14be635b0>
maybe try setting the number of neighbors to $n / 10$, seeing as the closest points in $\mathbb{R}^n$ could be similar digits..?
print(f"n = {Y.shape[0]}")
nn = digits.data.shape[0] // 10
print(f"Number of Neighbors: {nn}")
n = 1797 Number of Neighbors: 179
fit = umap.UMAP(min_dist=0.5, spread=2, n_neighbors=nn)
Y = fit.fit_transform(digits.data)
print("Input dimensions:", digits.data.shape)
print("Output dimensions:", Y.shape)
plt.scatter(Y[:,0], Y[:,1], c=digits.target, s=0.1, cmap='Dark2')
Input dimensions: (1797, 64) Output dimensions: (1797, 2)
<matplotlib.collections.PathCollection at 0x14bb66fe0>
...I think that might be worse, actually
Starting off with two 2D clusters: $N((5,5),0.5)$ and $N((1,1), 0.5)$
# Create two 2D clusters
X1 = np.random.default_rng(2022).multivariate_normal(mean=[5, 5], cov=(0.5 * np.eye(2)), size=20)
X2 = np.random.default_rng(2022).multivariate_normal(mean=[1, 1], cov=(0.5 * np.eye(2)), size=20)
X = np.vstack((X1, X2))
Y = umap.UMAP().fit_transform(X)
plt.scatter(Y[:,0], Y[:,1], c='m')
<matplotlib.collections.PathCollection at 0x14c503be0>
Plotting $N(\vec{O}, 1)$, $N(t e_1,1)$, and $N(s e_2,1)$, below and then the UMAP result:
D = 15
t = 5
s = 5
np.identity(D)
X1 = np.random.default_rng(20).multivariate_normal(mean=np.zeros(D), cov=(1 * np.eye(D)), size=50)
X2 = np.random.default_rng(20).multivariate_normal(mean= t * np.identity(D)[0], cov=(1 * np.eye(D)), size=50)
X3 = np.random.default_rng(20).multivariate_normal(mean= s * np.identity(D)[1], cov=(1 * np.eye(D)), size=50)
X = np.vstack((X1, X2, X3))
plt.axvline(x=0, c='k', linewidth=1)
plt.axhline(y=0, c='k', linewidth=1)
plt.scatter(X[:,0], X[:,1],
c=np.hstack((np.repeat(0,50), np.repeat(1,50), np.repeat(2,50))),
cmap='cool')
<matplotlib.collections.PathCollection at 0x1555d36d0>
Y = umap.UMAP().fit_transform(X)
plt.scatter(Y[:,0], Y[:,1], c='m')
<matplotlib.collections.PathCollection at 0x153fbb7f0>
The clusters become less distinct as the dimension increase â they become ambiguous around $D=20$ â maybe because the points are "closer" in some sense when you look at them from higher and higher dimensions?
D = 1000
t = 28
s = 28
size = 300
np.identity(D)
X1 = np.random.default_rng(20).multivariate_normal(mean=np.zeros(D), cov=(1 * np.eye(D)), size=size)
X2 = np.random.default_rng(20).multivariate_normal(mean= t * np.identity(D)[0], cov=(1 * np.eye(D)), size=size)
X3 = np.random.default_rng(20).multivariate_normal(mean= s * np.identity(D)[1], cov=(1 * np.eye(D)), size=size)
X = np.vstack((X1, X2, X3))
Y = umap.UMAP().fit_transform(X)
plt.scatter(Y[:,0], Y[:,1], c='m')
<matplotlib.collections.PathCollection at 0x153ed3220>
We vary the distances between two clusters to find the threshold UMAP can detect the difference. This varies per dimension.
Dimension: 100, varying the centroids from 0 to 14 units apart
# D = 100, t=1 to t=15
_, axes = plt.subplots(3, 5, figsize=(15, 7))
labels = np.hstack((np.repeat(0, size), np.repeat(1, size)))
scores = np.zeros(15)
for t in range(15):
D = 100
size = 50
np.identity(D)
X1 = np.random.default_rng(20).multivariate_normal(mean=np.zeros(D), cov=(1 * np.eye(D)), size=size)
X2 = np.random.default_rng(20).multivariate_normal(mean= t * np.identity(D)[0], cov=(1 * np.eye(D)), size=size)
X = np.vstack((X1, X2))
Y = umap.UMAP().fit_transform(X)
axes[t // 5, t % 5].scatter(Y[:,0], Y[:,1], c='#9B4F96', s=1)
scores[t] = ASW(Y, labels)
plt.axhline(y=0.5)
t = np.argmax(scores > 0.5)
plt.axvline(x=t)
plt.plot(scores, c='#c41230')
[<matplotlib.lines.Line2D at 0x166f0b370>]
_, axes = plt.subplots(4, 5, figsize=(15, 7))
labels = np.hstack((np.repeat(0, size), np.repeat(1, size)))
scores = np.zeros(20)
for t in range(10, 30):
D = 300
size = 50
np.identity(D)
X1 = np.random.default_rng(20).multivariate_normal(mean=np.zeros(D), cov=(1 * np.eye(D)), size=size)
X2 = np.random.default_rng(20).multivariate_normal(mean= t * np.identity(D)[0], cov=(1 * np.eye(D)), size=size)
X = np.vstack((X1, X2))
Y = umap.UMAP().fit_transform(X)
axes[(t-10) // 5, (t-10) % 5].scatter(Y[:,0], Y[:,1], c='#9B4F96', s=1)
scores[t-10] = ASW(Y, labels)
plt.axhline(y=0.5)
t = np.argmax(scores > 0.5)
plt.xlim(10, 30)
plt.plot(np.arange(10, 30), scores, c='#c41230')
0
[<matplotlib.lines.Line2D at 0x167e9ef50>]
To quantify how well the dimensionality algorithm represented the clusters, we use the Calinski-Harabasz variance ratio, which compares the ratio between within-cluster and between-cluster variance: $$ \renewcommand{\Ck}{\mathcal{C}_K} \renewcommand{\TCk}{\mathbf{T} _ {\mathcal{C}_K}} \renewcommand{\WCk}{\mathbf{W} _ {\mathcal{C}_K}} \renewcommand{\x}{\mathbf{x}} \renewcommand{\xbar}{\bar{\x}} $$
$$CH(\Ck) = \frac{\mathrm{trace}(\TCk)}{\mathrm{trace}(\WCk)} \times \frac{n-K}{K-1}$$where between-cluster variance $\TCk$ is $$\TCk = \sum_{j=1}^K n_j (\xbar_j - \xbar) (\xbar_j - \xbar)^T$$ and within-cluster variance $\WCk$ is $$\WCk = \sum_{j=1}^K \sum_{c(i)=j} (\x_i - \xbar_j) (\x_i - \xbar_j)^T$$
This metric is adapted from the objective function for k-means. It assumes a pre-set number of clusters that should be spherical. This doesn't suit all clustering scenarios, but our setup uses Gaussian clusters with a spherical variance matrix, so it's suitable our task. Also, the algorithm has fast implementations, which is suited to our high number of metric calculations.
For example:
X1 = np.random.default_rng(100).multivariate_normal(
mean=np.zeros(2), cov=(np.eye(2)), size=15)
X2 = np.random.default_rng(20).multivariate_normal(
mean=np.array([2, 2]), cov=(np.eye(2)), size=15)
X = np.vstack((X1, X2))
labels = np.hstack((np.repeat(0, 15), np.repeat(1, 15)))
plt.scatter(X[:,0], X[:,1], c=labels)
plt.show()
print(CH(X, labels))
X1 = np.random.default_rng(100).multivariate_normal(
mean=np.zeros(2), cov=(np.eye(2)), size=15)
X2 = np.random.default_rng(20).multivariate_normal(
mean=np.array([5, 2]), cov=(np.eye(2)), size=15)
X = np.vstack((X1, X2))
labels = np.hstack((np.repeat(0, 15), np.repeat(1, 15)))
plt.scatter(X[:, 0], X[:, 1], c=labels)
plt.show()
print(CH(X, labels))
17.50122328782498
79.34075386925672
Larger values of the CH ratio point to better clusterings â clusters that are more internally homogenous and different from each other. The problem here is that it isn't standardized to a certain range, which can make comparisons difficult. Next, we'll try ASW (average sillhoette width):
X1 = np.random.default_rng(100).multivariate_normal(
mean=np.zeros(2), cov=(np.eye(2)), size=15)
X2 = np.random.default_rng(20).multivariate_normal(
mean=np.array([2, 2]), cov=(np.eye(2)), size=15)
X = np.vstack((X1, X2))
labels = np.hstack((np.repeat(0, 15), np.repeat(1, 15)))
plt.scatter(X[:, 0], X[:, 1], c=labels)
plt.show()
print(ASW(X, labels))
X1 = np.random.default_rng(100).multivariate_normal(
mean=np.zeros(2), cov=(np.eye(2)), size=15)
X2 = np.random.default_rng(20).multivariate_normal(
mean=np.array([5, 2]), cov=(np.eye(2)), size=15)
X = np.vstack((X1, X2))
labels = np.hstack((np.repeat(0, 15), np.repeat(1, 15)))
plt.scatter(X[:, 0], X[:, 1], c=labels)
plt.show()
print(ASW(X, labels))
0.2877463490205072
0.6198791379016718
def cluster_test(t=1, s=None, D=1000, size=100):
# The first cluster is at (0, 0, ...)
X1 = np.random.default_rng(20).multivariate_normal(mean=np.zeros(D), cov=(1 * np.eye(D)), size=size)
X2 = np.random.default_rng(20).multivariate_normal(mean= t * np.identity(D)[0], cov=(1 * np.eye(D)), size=size)
if s is not None:
print(f"t = {t}, s = {s}")
X3 = np.random.default_rng(20).multivariate_normal(mean= s * np.identity(D)[1], cov=(1 * np.eye(D)), size=size)
X = np.vstack((X1, X2, X3))
labels = np.hstack((np.repeat(0, size), np.repeat(1, size), np.repeat(2, size)))
else:
X = np.vstack((X1, X2))
labels = np.hstack((np.repeat(0, size), np.repeat(1, size)))
# fix seed for reproducibility
Y = umap.UMAP(random_state=2022).fit_transform(X)
score = ASW(Y, labels)
return (Y, labels, score)
def plot_UMAP(Y, labels=None):
if labels is not None:
plt.scatter(Y[:,0], Y[:,1], c=labels)
else:
plt.scatter(Y[:,0], Y[:,1])
plt.show()
def scores_plot(scores, t_range):
plt.figure(figsize=(6, 5), dpi=80)
plt.ylim(-1, 1)
plt.axhline(y=0.5, linestyle='--', c='#4d5051')
plt.plot(t_range, scores, c='#c41230')
# plt.axhline(y=0.5)
plt.title("ASW Score on UMAP Output by Cluster Distance")
plt.xlabel("Cluster Distance")
plt.ylabel("Average Silhouette Width")
plt.show()
return scores
from matplotlib.cm import ScalarMappable
def cluster_tests(t_start=0, t_stop=0, t_step=1, s=None, D=1000, size=100, stop_early=False, plot_scores=False, plotY=None):
t_range = np.linspace(t_start, t_stop, round((t_stop - t_start)/t_step + 1))
# print(t_range)
scores = np.zeros(t_range.size)
over_threshold = None
if plotY is not None:
r, c = plotY
if r * c != t_range.size:
raise Exception("Subplot dimensions do not match number of elements")
fig, axes = plt.subplots(r, c, figsize=(20, 10))
for i, t in enumerate(t_range):
Y, labels, score = cluster_test(t, s, D=D, size=size)
if plotY is not None:
if c == 1 or r == 1:
axes[i].scatter(Y[:,0], Y[:,1], c='m')
else:
color = BI((score + 0.3)/2)
axes[i // c, i % c].scatter(Y[:,0], Y[:,1], color=color)
axes[i // c, i % c].set_title(f"t = {t}")
scores[i] = score
# print(score)
if over_threshold is None and score >= 0.5:
over_threshold = t
if stop_early:
if plot_scores:
scores_plot(scores[:i+1], t_range[:i+1])
return (over_threshold, scores[:i+1])
if plotY:
fig.suptitle("UMAP Clusterings By Distance t", fontsize=16)
plt.subplots_adjust(wspace=0.25, hspace=0.25)
fig.colorbar(cm.ScalarMappable(norm=mpl.colors.Normalize(-0.3, 1), cmap=BI), ax=axes, location='right')
plt.show()
if plot_scores:
plt.clf()
scores_plot(scores, t_range)
# threshold = t_range[np.argmax(scores > 0.5)]
return (over_threshold, scores)
# cluster_tests(t_start=5, t_stop=15, D=300, plot=True, stop_early=True)
cluster_tests(t_start=5, t_stop=16, D=100, size=30, plot_scores=True)
<Figure size 432x288 with 0 Axes>
(8.0, array([-0.00818234, 0.2280554 , 0.40794522, 0.55711216, 0.66200715, 0.63079417, 0.60013908, 0.63225007, 0.75171405, 0.89417207, 0.95501864, 0.94802678]))
Let's try using some kind of optimization algorithm instead of just iterating up from 0 with increments of 0.1 - it would be accurate and faster way to find the score that means the ASW cluster threshold of 0.5.
def find_threshold(choose_t=True, choose_s=False, set_t = None, D=1000, size=100, asw=0.5, upper_bound=1e2):
lower_bound = 0
# define lambda function that's zero when it reaches the threshold of 0.5
if choose_s and choose_t: # change both s and t (3 clusters)
f = lambda t : cluster_test(t=t, s=t, D=D, size=size)[2] - asw
elif choose_s and not choose_t and set_t is not None: # set t, change s (3 clusters)
f = lambda t : cluster_test(t=set_t, s=t, D=D, size=size)[2] - asw
else: # just change t (2 clusters)
f = lambda t : cluster_test(t=t, s=None, D=D, size=size)[2] - asw
# call optimizing algorithm, w/ margin of error 0.1
return brentq(f, lower_bound, upper_bound, xtol=0.1)
Now let's try graphing the relationship between dimension and minimum centroid distance to be detected by UMAP (AWS=0.5).
dim_range = np.arange(2, 100, 1)
threshes = np.zeros(dim_range.size)
for i, d in enumerate(dim_range):
# thresh, scores = cluster_tests(0, 1e6, t_step=0.1, D=d, stop_early=True)
thresh = find_threshold(D=d)
# print(thresh,scores)
# if d % 100 == 0:
threshes[i] = thresh
print(f"Dimension {d}, threshold {thresh}...")
np.savetxt("thresholds.txt", (dim_range, threshes))
plt.scatter(dim_range, threshes, color='#4E2A84')
plt.xlabel("Dimension")
plt.ylabel("Detection Threshold")
plt.savefig("UMAP_dim.png")
np.savetxt("thresholds.txt", (dim_range, threshes))
Dimension 2, threshold 3.1265207334625633... Dimension 3, threshold 2.6295270132991027... Dimension 4, threshold 3.4035902648210734... Dimension 5, threshold 2.9229195183061556... Dimension 6, threshold 2.9636389684530475... Dimension 7, threshold 3.1100451828635576... Dimension 8, threshold 3.490320078363497... Dimension 9, threshold 3.248156200063308... Dimension 10, threshold 3.5655315446266767... Dimension 11, threshold 3.5842306314607977... Dimension 12, threshold 3.8577213307152745... Dimension 13, threshold 3.8701283838357714... Dimension 14, threshold 3.800407015607024... Dimension 15, threshold 4.034192213658093... Dimension 16, threshold 4.082854794260189... Dimension 17, threshold 4.30427428026175... Dimension 18, threshold 4.155591559809225... Dimension 19, threshold 4.7761238099107635... Dimension 20, threshold 4.597282906931779... Dimension 21, threshold 4.666666355861286... Dimension 22, threshold 4.843204769678859... Dimension 23, threshold 4.922500901316063... Dimension 24, threshold 5.0199399835592144... Dimension 25, threshold 5.185372974088914... Dimension 26, threshold 5.259833544312192... Dimension 27, threshold 5.178405250419456... Dimension 28, threshold 5.078383477796321... Dimension 29, threshold 5.550726528995924... Dimension 30, threshold 5.441956647336653... Dimension 31, threshold 5.432437251867018... Dimension 32, threshold 5.721047830496132... Dimension 33, threshold 5.732181222199331... Dimension 34, threshold 5.749470993732638... Dimension 35, threshold 6.07077513519572... Dimension 36, threshold 6.158776973349157... Dimension 37, threshold 5.94379212258021... Dimension 38, threshold 6.354073786186122... Dimension 39, threshold 6.203745278873132... Dimension 40, threshold 6.380920487594623... Dimension 41, threshold 6.780445998311652... Dimension 42, threshold 6.427601291894331... Dimension 43, threshold 6.681491972553136... Dimension 44, threshold 6.658357159683057... Dimension 45, threshold 7.216166650055531... Dimension 46, threshold 6.413863922295413... Dimension 47, threshold 6.779404692556416... Dimension 48, threshold 6.336033870193989... Dimension 49, threshold 5.826724968690502... Dimension 50, threshold 5.651743926252781... Dimension 51, threshold 6.675200566583079... Dimension 52, threshold 6.076944333028447... Dimension 53, threshold 6.730664183840391... Dimension 54, threshold 6.8158226545937675... Dimension 55, threshold 6.144927432095878... Dimension 56, threshold 6.153320573148227... Dimension 57, threshold 6.717420404291396... Dimension 58, threshold 6.292580042180749... Dimension 59, threshold 6.295190564402301... Dimension 60, threshold 6.168377505129522... Dimension 61, threshold 6.801032113958849... Dimension 62, threshold 6.224189027159753... Dimension 63, threshold 7.958453268539338... Dimension 64, threshold 6.059972583504039... Dimension 65, threshold 6.369032573822391... Dimension 66, threshold 6.47796127813199... Dimension 67, threshold 6.721653418236161... Dimension 68, threshold 6.845212584849185... Dimension 69, threshold 6.852947703051186... Dimension 70, threshold 6.689664903506223... Dimension 71, threshold 6.494278371190816... Dimension 72, threshold 7.44470339151967... Dimension 73, threshold 6.53776452808791... Dimension 74, threshold 6.052515953197033... Dimension 75, threshold 7.699487938075816... Dimension 76, threshold 6.73236499391945... Dimension 77, threshold 7.264352563055844... Dimension 78, threshold 6.560700167181189... Dimension 79, threshold 6.121844292080114... Dimension 80, threshold 6.90766308666687... Dimension 81, threshold 6.55718940603906... Dimension 82, threshold 6.440435620436755... Dimension 83, threshold 6.1025606168642... Dimension 84, threshold 10.314236199737302... Dimension 85, threshold 6.869063496884358... Dimension 86, threshold 6.805714050622864... Dimension 87, threshold 7.394847753450263... Dimension 88, threshold 6.898071873747576... Dimension 89, threshold 7.430883428521396... Dimension 90, threshold 7.2987342438842795... Dimension 91, threshold 6.481847290238232... Dimension 92, threshold 6.856972613392357... Dimension 93, threshold 6.244074700947295... Dimension 94, threshold 6.586092448791222... Dimension 95, threshold 6.334164506461391... Dimension 96, threshold 6.699660886036713... Dimension 97, threshold 7.0677601174430045... Dimension 98, threshold 6.8081245914475765... Dimension 99, threshold 6.887053274850261...
Replotting the graph but with different sizing, aesthetics, etc
Td, Tt = dim_range, threshes
# Td, Tt = np.genfromtxt("thresholds copy.txt")
plt.figure(figsize=(5, 5), dpi=100)
plt.scatter(Td, Tt, color='pink', s=15)
# plt.plot(Td, Tt, color='#4E2A84', linewidth=0.1)
plt.xlabel("Dimension")
plt.ylabel("Detection Threshold")
plt.title("UMAP Minimum Detected Cluster Distance \n by Input Dimension")
Text(0.5, 1.0, 'UMAP Minimum Detected Cluster Distance \n by Input Dimension')
The three clusters are centered around $(0, 0, ...)$, ($0, t, 0, ...)$, and $(s, 0, 0, ...)$. First, we'll set $t = s$ and vary them together to see when the UMAP output $Y$ reaches the minimum ASW score of 0.5
dim_range = np.arange(3, 1000, 10)
threshes = np.zeros(dim_range.size)
for i, d in enumerate(dim_range):
thresh = find_threshold(choose_t=True, choose_s=True, D=d)
# print(thresh,scores)
# if d % 100 == 0:
threshes[i] = thresh
print(f"Dimension {d}, threshold {thresh}...")
np.savetxt("thresholds3_1_1k_10.txt", (dim_range, threshes))
t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn( /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 55.64256241120091, s = 55.64256241120091 t = 27.821281205600457, s = 27.821281205600457 t = 13.910640602800228, s = 13.910640602800228 t = 6.955320301400114, s = 6.955320301400114 t = 3.477660150700057, s = 3.477660150700057 t = 2.744076981902132, s = 2.744076981902132 t = 3.0438710546282017, s = 3.0438710546282017 t = 3.093871054628203, s = 3.093871054628203 Dimension 3, threshold 3.0438710546282017... t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn( /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 53.94629820042008, s = 53.94629820042008 t = 26.97314910021004, s = 26.97314910021004 t = 13.48657455010502, s = 13.48657455010502 t = 6.74328727505251, s = 6.74328727505251 t = 3.371643637526255, s = 3.371643637526255 t = 4.599796443980791, s = 4.599796443980791 t = 4.092153463188384, s = 4.092153463188384 t = 3.911412226945234, s = 3.911412226945234 t = 3.8614122269452325, s = 3.8614122269452325 Dimension 13, threshold 3.911412226945234... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.0888139088084, s = 54.0888139088084 t = 27.0444069544042, s = 27.0444069544042 t = 13.5222034772021, s = 13.5222034772021 t = 6.76110173860105, s = 6.76110173860105 t = 3.380550869300525, s = 3.380550869300525 t = 5.310001513918561, s = 5.310001513918561 t = 4.588504478480701, s = 4.588504478480701 t = 4.878821495927598, s = 4.878821495927598 t = 4.9288214959276, s = 4.9288214959276 Dimension 23, threshold 4.9288214959276... t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn( /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 53.66179850448236, s = 53.66179850448236 t = 26.83089925224118, s = 26.83089925224118 t = 13.41544962612059, s = 13.41544962612059 t = 6.707724813060295, s = 6.707724813060295 t = 3.3538624065301477, s = 3.3538624065301477 t = 5.742007419616747, s = 5.742007419616747 t = 5.938240789385703, s = 5.938240789385703 t = 5.8882407893857005, s = 5.8882407893857005 t = 5.815124104501224, s = 5.815124104501224 Dimension 33, threshold 5.815124104501224... t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn( /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn( /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 54.09458303492709, s = 54.09458303492709 t = 27.047291517463545, s = 27.047291517463545 t = 13.523645758731773, s = 13.523645758731773 t = 6.761822879365886, s = 6.761822879365886 t = 6.221467291357456, s = 6.221467291357456 t = 6.53099190888612, s = 6.53099190888612 t = 6.580991908886123, s = 6.580991908886123 Dimension 43, threshold 6.53099190888612... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.02836890156036, s = 53.02836890156036 t = 26.51418445078018, s = 26.51418445078018 t = 13.25709222539009, s = 13.25709222539009 t = 6.628546112695045, s = 6.628546112695045 t = 6.96089873803055, s = 6.96089873803055 t = 6.820453872895585, s = 6.820453872895585 t = 6.768450888583956, s = 6.768450888583956 Dimension 53, threshold 6.768450888583956... t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn( /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 54.147104772125886, s = 54.147104772125886 t = 27.073552386062943, s = 27.073552386062943 t = 13.536776193031471, s = 13.536776193031471 t = 6.768388096515736, s = 6.768388096515736 t = 8.829892970524188, s = 8.829892970524188 t = 8.603096198899742, s = 8.603096198899742 t = 8.678726762851277, s = 8.678726762851277 t = 8.72872676285128, s = 8.72872676285128 Dimension 63, threshold 8.678726762851277... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.3704326114102, s = 54.3704326114102 t = 27.1852163057051, s = 27.1852163057051 t = 13.59260815285255, s = 13.59260815285255 t = 6.796304076426275, s = 6.796304076426275 t = 10.515203884239, s = 10.515203884239 t = 8.344908942521242, s = 8.344908942521242 t = 8.162176206980474, s = 8.162176206980474 t = 7.479240141703375, s = 7.479240141703375 t = 8.009407728464867, s = 8.009407728464867 t = 7.959407728464863, s = 7.959407728464863 t = 7.71932393508412, s = 7.71932393508412 t = 7.90940772846486, s = 7.90940772846486 Dimension 73, threshold 7.90940772846486... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.59525739276722, s = 53.59525739276722 t = 26.79762869638361, s = 26.79762869638361 t = 13.398814348191806, s = 13.398814348191806 t = 6.699407174095903, s = 6.699407174095903 t = 10.267967743125165, s = 10.267967743125165 t = 9.757012252568291, s = 9.757012252568291 t = 8.228209713332097, s = 8.228209713332097 t = 8.427022323866291, s = 8.427022323866291 t = 8.377022323866287, s = 8.377022323866287 t = 8.278209713332101, s = 8.278209713332101 Dimension 83, threshold 8.228209713332097... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.48532232281583, s = 53.48532232281583 t = 26.742661161407916, s = 26.742661161407916 t = 13.371330580703958, s = 13.371330580703958 t = 6.685665290351979, s = 6.685665290351979 t = 10.273062710561316, s = 10.273062710561316 t = 9.82525723381768, s = 9.82525723381768 t = 8.255461262084829, s = 8.255461262084829 t = 8.946931925472835, s = 8.946931925472835 t = 8.723377632829026, s = 8.723377632829026 t = 8.520248799414144, s = 8.520248799414144 t = 8.659736376891978, s = 8.659736376891978 Dimension 93, threshold 8.723377632829026... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.67165762731317, s = 53.67165762731317 t = 26.835828813656583, s = 26.835828813656583 t = 13.417914406828292, s = 13.417914406828292 t = 6.708957203414146, s = 6.708957203414146 t = 10.663587058330572, s = 10.663587058330572 t = 10.281702109414496, s = 10.281702109414496 t = 8.49532965641432, s = 8.49532965641432 t = 10.11320021600469, s = 10.11320021600469 t = 9.304264936209506, s = 9.304264936209506 t = 9.478659564626259, s = 9.478659564626259 t = 9.428659564626255, s = 9.428659564626255 Dimension 103, threshold 9.478659564626259... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.61915509679387, s = 54.61915509679387 t = 27.309577548396934, s = 27.309577548396934 t = 13.654788774198467, s = 13.654788774198467 t = 6.8273943870992335, s = 6.8273943870992335 t = 11.307856521747947, s = 11.307856521747947 t = 10.644948161127177, s = 10.644948161127177 t = 8.736171274113206, s = 8.736171274113206 t = 10.477950374903426, s = 10.477950374903426 t = 9.607060824508316, s = 9.607060824508316 t = 9.77721807031543, s = 9.77721807031543 t = 10.127584222609428, s = 10.127584222609428 t = 9.857688585678789, s = 9.857688585678789 Dimension 113, threshold 9.77721807031543... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.21694670136135, s = 54.21694670136135 t = 27.108473350680676, s = 27.108473350680676 t = 13.554236675340338, s = 13.554236675340338 t = 11.44910362078458, s = 11.44910362078458 t = 6.761241370037824, s = 6.761241370037824 t = 11.040428463411493, s = 11.040428463411493 t = 10.896990073546217, s = 10.896990073546217 t = 8.829115721792022, s = 8.829115721792022 t = 10.638375439282708, s = 10.638375439282708 t = 10.429232826741263, s = 10.429232826741263 t = 10.479232826741267, s = 10.479232826741267 Dimension 123, threshold 10.429232826741263... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.71152515530647, s = 53.71152515530647 t = 26.855762577653234, s = 26.855762577653234 t = 13.427881288826617, s = 13.427881288826617 t = 12.696946824348709, s = 12.696946824348709 t = 6.3484734121743545, s = 6.3484734121743545 t = 12.424651863413773, s = 12.424651863413773 t = 9.386562637794064, s = 9.386562637794064 t = 12.285869575391038, s = 12.285869575391038 t = 10.836216106592552, s = 10.836216106592552 t = 11.183246574992724, s = 11.183246574992724 t = 10.886216106592556, s = 10.886216106592556 Dimension 133, threshold 10.836216106592552... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.37256380462947, s = 53.37256380462947 t = 26.686281902314736, s = 26.686281902314736 t = 13.343140951157368, s = 13.343140951157368 t = 11.04540980840135, s = 11.04540980840135 t = 10.982248437517494, s = 10.982248437517494 t = 5.491124218758747, s = 5.491124218758747 t = 10.93224843751749, s = 10.93224843751749 Dimension 143, threshold 10.93224843751749... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.88666005219748, s = 53.88666005219748 t = 26.94333002609874, s = 26.94333002609874 t = 13.47166501304937, s = 13.47166501304937 t = 11.37350601043601, s = 11.37350601043601 t = 11.595098447888057, s = 11.595098447888057 t = 11.455454641342184, s = 11.455454641342184 t = 11.52527654461512, s = 11.52527654461512 Dimension 153, threshold 11.455454641342184... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 55.10429803410966, s = 55.10429803410966 t = 27.55214901705483, s = 27.55214901705483 t = 13.776074508527415, s = 13.776074508527415 t = 12.235294735699231, s = 12.235294735699231 t = 10.206360565760043, s = 10.206360565760043 t = 12.125248913972763, s = 12.125248913972763 t = 11.165804739866402, s = 11.165804739866402 t = 11.717934994459338, s = 11.717934994459338 t = 11.931027500938553, s = 11.931027500938553 t = 11.99518940058861, s = 11.99518940058861 Dimension 163, threshold 11.99518940058861... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.1343517652789, s = 54.1343517652789 t = 27.06717588263945, s = 27.06717588263945 t = 13.533587941319725, s = 13.533587941319725 t = 11.5027748934829, s = 11.5027748934829 t = 12.518903585958247, s = 12.518903585958247 t = 12.010839239720575, s = 12.010839239720575 t = 12.239921670123055, s = 12.239921670123055 t = 12.33838764293048, s = 12.33838764293048 Dimension 173, threshold 12.33838764293048... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.819400349211264, s = 53.819400349211264 t = 26.909700174605632, s = 26.909700174605632 t = 13.454850087302816, s = 13.454850087302816 t = 11.958610349491682, s = 11.958610349491682 t = 12.781566775811543, s = 12.781566775811543 t = 12.870742762218612, s = 12.870742762218612 Dimension 183, threshold 12.870742762218612... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.232575861833425, s = 53.232575861833425 t = 26.616287930916712, s = 26.616287930916712 t = 13.308143965458356, s = 13.308143965458356 t = 12.481327895195895, s = 12.481327895195895 t = 12.738761711369557, s = 12.738761711369557 t = 13.048826120790324, s = 13.048826120790324 t = 12.89379391607994, s = 12.89379391607994 t = 12.943793916079946, s = 12.943793916079946 Dimension 193, threshold 12.943793916079946... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.7056650348551, s = 53.7056650348551 t = 26.85283251742755, s = 26.85283251742755 t = 13.426416258713775, s = 13.426416258713775 t = 11.91905264778443, s = 11.91905264778443 t = 12.875351570379948, s = 12.875351570379948 t = 12.715501707984341, s = 12.715501707984341 t = 12.80155330296976, s = 12.80155330296976 Dimension 203, threshold 12.80155330296976... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.5325120280489, s = 53.5325120280489 t = 26.76625601402445, s = 26.76625601402445 t = 13.383128007012225, s = 13.383128007012225 t = 14.340455308628, s = 14.340455308628 t = 13.648832938600547, s = 13.648832938600547 t = 13.698832938600553, s = 13.698832938600553 Dimension 213, threshold 13.648832938600547... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.78824966704024, s = 53.78824966704024 t = 26.89412483352012, s = 26.89412483352012 t = 13.44706241676006, s = 13.44706241676006 t = 14.351642639134727, s = 14.351642639134727 t = 13.793014988064195, s = 13.793014988064195 t = 13.930211413439535, s = 13.930211413439535 t = 13.867377639556992, s = 13.867377639556992 Dimension 223, threshold 13.867377639556992... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.36697809022346, s = 54.36697809022346 t = 27.18348904511173, s = 27.18348904511173 t = 13.591744522555866, s = 13.591744522555866 t = 14.36840645569171, s = 14.36840645569171 t = 13.978812477124503, s = 13.978812477124503 t = 14.196160266042039, s = 14.196160266042039 t = 14.087486371583271, s = 14.087486371583271 t = 14.146160266042033, s = 14.146160266042033 Dimension 233, threshold 14.146160266042033... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.44166540566673, s = 53.44166540566673 t = 26.720832702833366, s = 26.720832702833366 t = 13.360416351416683, s = 13.360416351416683 t = 14.913245595726776, s = 14.913245595726776 t = 14.150686801272206, s = 14.150686801272206 t = 14.29148691613756, s = 14.29148691613756 t = 14.602366255932168, s = 14.602366255932168 t = 14.455581860256215, s = 14.455581860256215 t = 14.348035795969725, s = 14.348035795969725 t = 14.40180882811297, s = 14.40180882811297 Dimension 243, threshold 14.40180882811297... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.52946119656321, s = 53.52946119656321 t = 26.764730598281606, s = 26.764730598281606 t = 13.382365299140803, s = 13.382365299140803 t = 15.14193713735353, s = 15.14193713735353 t = 14.330038885388344, s = 14.330038885388344 t = 14.50582249106012, s = 14.50582249106012 t = 14.455822491060115, s = 14.455822491060115 Dimension 253, threshold 14.50582249106012... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.12302971198864, s = 53.12302971198864 t = 26.56151485599432, s = 26.56151485599432 t = 13.28075742799716, s = 13.28075742799716 t = 19.04109696302779, s = 19.04109696302779 t = 17.957456326072418, s = 17.957456326072418 t = 15.619106877034788, s = 15.619106877034788 t = 14.449932152515974, s = 14.449932152515974 t = 14.932264862715815, s = 14.932264862715815 t = 14.982264862715821, s = 14.982264862715821 t = 15.300685869875306, s = 15.300685869875306 t = 15.459896373455047, s = 15.459896373455047 t = 15.409896373455041, s = 15.409896373455041 t = 15.359896373455035, s = 15.359896373455035 Dimension 263, threshold 15.300685869875306... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.2180695204845, s = 53.2180695204845 t = 26.60903476024225, s = 26.60903476024225 t = 13.304517380121125, s = 13.304517380121125 t = 20.429548796968344, s = 20.429548796968344 t = 19.539226540162467, s = 19.539226540162467 t = 16.421871960141797, s = 16.421871960141797 t = 14.86319467013146, s = 14.86319467013146 t = 15.257334961584304, s = 15.257334961584304 t = 16.029966075000726, s = 16.029966075000726 t = 15.439719490729672, s = 15.439719490729672 t = 15.389719490729666, s = 15.389719490729666 Dimension 273, threshold 15.439719490729672... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.00001908833835, s = 53.00001908833835 t = 26.500009544169174, s = 26.500009544169174 t = 13.250004772084587, s = 13.250004772084587 t = 20.376471006372917, s = 20.376471006372917 t = 19.38707061373536, s = 19.38707061373536 t = 16.318537692909974, s = 16.318537692909974 t = 14.78427123249728, s = 14.78427123249728 t = 15.980805761649101, s = 15.980805761649101 t = 15.580138417388342, s = 15.580138417388342 t = 15.630138417388348, s = 15.630138417388348 t = 15.805472089518725, s = 15.805472089518725 t = 15.755472089518717, s = 15.755472089518717 t = 15.692805253453532, s = 15.692805253453532 Dimension 283, threshold 15.755472089518717... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.09671225515189, s = 53.09671225515189 t = 26.548356127575946, s = 26.548356127575946 t = 13.274178063787973, s = 13.274178063787973 t = 20.429689702413683, s = 20.429689702413683 t = 19.026066800221095, s = 19.026066800221095 t = 16.150122432004533, s = 16.150122432004533 t = 15.460206591058096, s = 15.460206591058096 t = 15.774637660872285, s = 15.774637660872285 t = 15.962380046438408, s = 15.962380046438408 t = 16.05625123922147, s = 16.05625123922147 Dimension 293, threshold 15.962380046438408... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.13891250329352, s = 53.13891250329352 t = 26.56945625164676, s = 26.56945625164676 t = 13.28472812582338, s = 13.28472812582338 t = 20.531671562791534, s = 20.531671562791534 t = 19.511632719102916, s = 19.511632719102916 t = 16.39818042246315, s = 16.39818042246315 t = 16.12721461722099, s = 16.12721461722099 t = 16.26020561822624, s = 16.26020561822624 t = 16.177214617220997, s = 16.177214617220997 Dimension 303, threshold 16.26020561822624... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.78656466242507, s = 53.78656466242507 t = 26.893282331212536, s = 26.893282331212536 t = 13.446641165606268, s = 13.446641165606268 t = 20.778059915947964, s = 20.778059915947964 t = 18.996485233035457, s = 18.996485233035457 t = 16.22156319932086, s = 16.22156319932086 t = 16.27156319932087, s = 16.27156319932087 Dimension 313, threshold 16.22156319932086... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.1281553137079, s = 53.1281553137079 t = 26.56407765685395, s = 26.56407765685395 t = 13.282038828426975, s = 13.282038828426975 t = 20.36954085032763, s = 20.36954085032763 t = 19.051775892181, s = 19.051775892181 t = 16.166907360303988, s = 16.166907360303988 t = 16.63494813622528, s = 16.63494813622528 t = 16.822761511293486, s = 16.822761511293486 t = 16.872761511293493, s = 16.872761511293493 t = 17.962268701737244, s = 17.962268701737244 t = 16.974410002989675, s = 16.974410002989675 t = 17.12678373517271, s = 17.12678373517271 t = 17.024410002989683, s = 17.024410002989683 Dimension 323, threshold 16.974410002989675... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.47688417692804, s = 54.47688417692804 t = 27.23844208846402, s = 27.23844208846402 t = 13.61922104423201, s = 13.61922104423201 t = 20.941597549060035, s = 20.941597549060035 t = 19.733946077822626, s = 19.733946077822626 t = 16.676583561027318, s = 16.676583561027318 t = 17.41199584440883, s = 17.41199584440883 t = 17.344237179661217, s = 17.344237179661217 Dimension 333, threshold 17.41199584440883... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.531837435081464, s = 53.531837435081464 t = 26.765918717540732, s = 26.765918717540732 t = 13.382959358770366, s = 13.382959358770366 t = 20.61042215056926, s = 20.61042215056926 t = 19.665357310189663, s = 19.665357310189663 t = 16.524158334480013, s = 16.524158334480013 t = 17.134742726928945, s = 17.134742726928945 t = 18.400050018559305, s = 18.400050018559305 t = 17.50004016204518, s = 17.50004016204518 t = 17.3300914405808, s = 17.3300914405808 t = 17.274523189182165, s = 17.274523189182165 Dimension 343, threshold 17.3300914405808... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.83863356982773, s = 53.83863356982773 t = 26.919316784913864, s = 26.919316784913864 t = 13.459658392456932, s = 13.459658392456932 t = 20.729037131843587, s = 20.729037131843587 t = 19.346193265864294, s = 19.346193265864294 t = 16.402925829160612, s = 16.402925829160612 t = 17.346819887943674, s = 17.346819887943674 t = 17.26390030795853, s = 17.26390030795853 Dimension 353, threshold 17.346819887943674... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 55.09854198610026, s = 55.09854198610026 t = 27.54927099305013, s = 27.54927099305013 t = 13.774635496525065, s = 13.774635496525065 t = 21.350024608086606, s = 21.350024608086606 t = 19.708648429423853, s = 19.708648429423853 t = 16.74164196297446, s = 16.74164196297446 t = 19.16375410952226, s = 19.16375410952226 t = 17.88106021370471, s = 17.88106021370471 t = 17.831060213704703, s = 17.831060213704703 t = 17.28635108833958, s = 17.28635108833958 t = 17.63184188859313, s = 17.63184188859313 t = 17.681841888593137, s = 17.681841888593137 Dimension 363, threshold 17.63184188859313... t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 53.998030868696326, s = 53.998030868696326 t = 26.999015434348163, s = 26.999015434348163 t = 13.499507717174081, s = 13.499507717174081 t = 20.753069807309206, s = 20.753069807309206 t = 19.654298920986786, s = 19.654298920986786 t = 16.772104076090187, s = 16.772104076090187 t = 18.750977528717485, s = 18.750977528717485 t = 17.522463704634827, s = 17.522463704634827 t = 18.204554867347838, s = 18.204554867347838 t = 18.41457975341627, s = 18.41457975341627 t = 18.364579753416262, s = 18.364579753416262 t = 18.28456731038205, s = 18.28456731038205 Dimension 373, threshold 18.28456731038205... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.6775691850271, s = 53.6775691850271 t = 26.83878459251355, s = 26.83878459251355 t = 13.419392296256776, s = 13.419392296256776 t = 21.289366237240486, s = 21.289366237240486 t = 19.801440114970312, s = 19.801440114970312 t = 16.610416205613543, s = 16.610416205613543 t = 17.51023503873484, s = 17.51023503873484 t = 17.31686417723332, s = 17.31686417723332 t = 17.460235038734833, s = 17.460235038734833 Dimension 383, threshold 17.51023503873484... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 52.943023820083255, s = 52.943023820083255 t = 26.471511910041627, s = 26.471511910041627 t = 13.235755955020814, s = 13.235755955020814 t = 22.279836927256923, s = 22.279836927256923 t = 20.52292714225072, s = 20.52292714225072 t = 16.879341548635765, s = 16.879341548635765 t = 18.59196397313089, s = 18.59196397313089 t = 18.21578705204395, s = 18.21578705204395 t = 18.165787052043942, s = 18.165787052043942 Dimension 393, threshold 18.21578705204395... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.2868160503888, s = 54.2868160503888 t = 27.1434080251944, s = 27.1434080251944 t = 13.5717040125972, s = 13.5717040125972 t = 22.257005444884946, s = 22.257005444884946 t = 20.363127334336195, s = 20.363127334336195 t = 16.967415673466697, s = 16.967415673466697 t = 19.869222840920024, s = 19.869222840920024 t = 18.41831925719336, s = 18.41831925719336 t = 18.87276363326603, s = 18.87276363326603 t = 18.82276363326602, s = 18.82276363326602 t = 18.62054144522969, s = 18.62054144522969 t = 18.6705414452297, s = 18.6705414452297 Dimension 403, threshold 18.62054144522969... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.115125432448664, s = 54.115125432448664 t = 27.057562716224332, s = 27.057562716224332 t = 14.466614630703418, s = 14.466614630703418 t = 23.97691845306413, s = 23.97691845306413 t = 22.463958563036936, s = 22.463958563036936 t = 18.46528659687018, s = 18.46528659687018 t = 20.248211331659018, s = 20.248211331659018 t = 19.286300574124365, s = 19.286300574124365 t = 19.62017101348642, s = 19.62017101348642 t = 19.48464381512035, s = 19.48464381512035 t = 19.534643815120358, s = 19.534643815120358 Dimension 413, threshold 19.534643815120358... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 52.911500728111676, s = 52.911500728111676 t = 26.455750364055838, s = 26.455750364055838 t = 23.8286559240979, s = 23.8286559240979 t = 11.91432796204895, s = 11.91432796204895 t = 22.217681404982212, s = 22.217681404982212 t = 17.06600468351558, s = 17.06600468351558 t = 21.507318115735295, s = 21.507318115735295 t = 19.286661399625437, s = 19.286661399625437 t = 19.23666139962543, s = 19.23666139962543 Dimension 423, threshold 19.286661399625437... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.59265976331859, s = 53.59265976331859 t = 26.796329881659297, s = 26.796329881659297 t = 23.37829972181713, s = 23.37829972181713 t = 11.689149860908564, s = 11.689149860908564 t = 21.483548775813045, s = 21.483548775813045 t = 16.586349318360803, s = 16.586349318360803 t = 20.820435676396574, s = 20.820435676396574 t = 18.703392497378687, s = 18.703392497378687 t = 19.555031079894185, s = 19.555031079894185 t = 19.605031079894193, s = 19.605031079894193 Dimension 433, threshold 19.555031079894185... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.988006204237145, s = 53.988006204237145 t = 26.994003102118572, s = 26.994003102118572 t = 20.113422128462425, s = 20.113422128462425 t = 19.563692208401118, s = 19.563692208401118 t = 19.858620410277204, s = 19.858620410277204 t = 19.68240792411891, s = 19.68240792411891 t = 19.788017288442592, s = 19.788017288442592 Dimension 443, threshold 19.788017288442592... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.309609966751985, s = 54.309609966751985 t = 27.154804983375993, s = 27.154804983375993 t = 23.077886864914007, s = 23.077886864914007 t = 11.538943432457003, s = 11.538943432457003 t = 21.577387629553876, s = 21.577387629553876 t = 16.55816553100544, s = 16.55816553100544 t = 20.738231123443622, s = 20.738231123443622 t = 20.078741625360273, s = 20.078741625360273 t = 20.37005200612236, s = 20.37005200612236 t = 20.251934073841095, s = 20.251934073841095 t = 20.165337849600682, s = 20.165337849600682 Dimension 453, threshold 20.165337849600682... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.999527445176284, s = 53.999527445176284 t = 26.999763722588142, s = 26.999763722588142 t = 20.254031591801244, s = 20.254031591801244 t = 21.85979382649254, s = 21.85979382649254 t = 20.792209129035637, s = 20.792209129035637 t = 20.686318856323396, s = 20.686318856323396 t = 20.47017522406232, s = 20.47017522406232 t = 20.60540998641792, s = 20.60540998641792 t = 20.530741714679415, s = 20.530741714679415 Dimension 463, threshold 20.530741714679415... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.67463266257902, s = 53.67463266257902 t = 26.83731633128951, s = 26.83731633128951 t = 20.91287696349575, s = 20.91287696349575 t = 17.067873890222724, s = 17.067873890222724 t = 20.63825471843982, s = 20.63825471843982 t = 20.588254718439813, s = 20.588254718439813 t = 18.82806430433127, s = 18.82806430433127 t = 19.9107678475742, s = 19.9107678475742 t = 20.18444842628001, s = 20.18444842628001 t = 20.386351572359914, s = 20.386351572359914 t = 20.487303145399864, s = 20.487303145399864 t = 20.53777893191984, s = 20.53777893191984 Dimension 473, threshold 20.53777893191984... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.1313589783752, s = 54.1313589783752 t = 27.0656794891876, s = 27.0656794891876 t = 23.23829546394786, s = 23.23829546394786 t = 11.61914773197393, s = 11.61914773197393 t = 21.484528917942413, s = 21.484528917942413 t = 19.64347605615664, s = 19.64347605615664 t = 20.328115411836663, s = 20.328115411836663 t = 20.649693275865896, s = 20.649693275865896 t = 20.760294426048688, s = 20.760294426048688 t = 21.12241167199555, s = 21.12241167199555 t = 20.906111958253955, s = 20.906111958253955 t = 20.9875524004155, s = 20.9875524004155 Dimension 483, threshold 20.906111958253955... t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 53.71619293282514, s = 53.71619293282514 t = 26.85809646641257, s = 26.85809646641257 t = 21.491305273149415, s = 21.491305273149415 t = 10.745652636574707, s = 10.745652636574707 t = 20.145187951613476, s = 20.145187951613476 t = 20.19590970015209, s = 20.19590970015209 Dimension 493, threshold 20.145187951613476... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 55.36035170168662, s = 55.36035170168662 t = 27.68017585084331, s = 27.68017585084331 t = 22.27874748710628, s = 22.27874748710628 t = 11.13937374355314, s = 11.13937374355314 t = 20.59370577333871, s = 20.59370577333871 t = 20.91232462040961, s = 20.91232462040961 t = 21.595536053757947, s = 21.595536053757947 t = 21.33007038291159, s = 21.33007038291159 t = 21.25237445351144, s = 21.25237445351144 t = 21.082349536960525, s = 21.082349536960525 t = 20.997337078685067, s = 20.997337078685067 Dimension 503, threshold 20.997337078685067... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.1290498644526, s = 54.1290498644526 t = 27.0645249322263, s = 27.0645249322263 t = 22.14485751408509, s = 22.14485751408509 t = 11.072428757042545, s = 11.072428757042545 t = 20.937352351643284, s = 20.937352351643284 t = 20.887352351643276, s = 20.887352351643276 Dimension 513, threshold 20.937352351643284... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.50559958459968, s = 53.50559958459968 t = 26.75279979229984, s = 26.75279979229984 t = 20.380743707082942, s = 20.380743707082942 t = 21.462301345773618, s = 21.462301345773618 t = 21.409854472919974, s = 21.409854472919974 Dimension 523, threshold 21.462301345773618... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.96570492615921, s = 53.96570492615921 t = 26.982852463079606, s = 26.982852463079606 t = 20.889022092651405, s = 20.889022092651405 t = 22.544108894347364, s = 22.544108894347364 t = 21.437131345158136, s = 21.437131345158136 t = 21.990620119752748, s = 21.990620119752748 t = 22.052192600506014, s = 22.052192600506014 Dimension 533, threshold 21.990620119752748... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.8557286197431, s = 53.8557286197431 t = 26.92786430987155, s = 26.92786430987155 t = 21.01011055105942, s = 21.01011055105942 t = 22.241286407922352, s = 22.241286407922352 t = 21.63511229552858, s = 21.63511229552858 t = 21.50283190023296, s = 21.50283190023296 t = 21.585112295528567, s = 21.585112295528567 Dimension 543, threshold 21.63511229552858... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.76402086545633, s = 53.76402086545633 t = 26.882010432728165, s = 26.882010432728165 t = 21.674357351487586, s = 21.674357351487586 t = 22.94074951018199, s = 22.94074951018199 t = 22.049089129754226, s = 22.049089129754226 t = 22.334814161917482, s = 22.334814161917482 t = 22.637781836049736, s = 22.637781836049736 t = 22.533527196027137, s = 22.533527196027137 t = 22.434170678972308, s = 22.434170678972308 Dimension 553, threshold 22.434170678972308... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.35683264299345, s = 54.35683264299345 t = 27.178416321496726, s = 27.178416321496726 t = 21.02294151478126, s = 21.02294151478126 t = 22.036992956848973, s = 22.036992956848973 t = 22.117555621381666, s = 22.117555621381666 Dimension 563, threshold 22.036992956848973... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.75071538182451, s = 53.75071538182451 t = 26.875357690912256, s = 26.875357690912256 t = 21.552262625407764, s = 21.552262625407764 t = 23.583665074895787, s = 23.583665074895787 t = 22.312496972481856, s = 22.312496972481856 t = 22.36848373460662, s = 22.36848373460662 t = 22.976074404751202, s = 22.976074404751202 t = 22.681233110186334, s = 22.681233110186334 t = 22.80769029557859, s = 22.80769029557859 t = 22.891882350164895, s = 22.891882350164895 Dimension 573, threshold 22.80769029557859... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.21032882334778, s = 53.21032882334778 t = 26.60516441167389, s = 26.60516441167389 t = 21.342155803866326, s = 21.342155803866326 t = 23.072112142702974, s = 23.072112142702974 t = 24.78731465859272, s = 24.78731465859272 t = 23.443309895898334, s = 23.443309895898334 t = 23.2016668667277, s = 23.2016668667277 t = 23.151666866727687, s = 23.151666866727687 Dimension 583, threshold 23.151666866727687... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.313340384940766, s = 54.313340384940766 t = 27.156670192470383, s = 27.156670192470383 t = 21.208638900974844, s = 21.208638900974844 t = 23.978497845204082, s = 23.978497845204082 t = 22.734954687244425, s = 22.734954687244425 t = 23.05622371100968, s = 23.05622371100968 t = 23.131422620492057, s = 23.131422620492057 Dimension 593, threshold 23.131422620492057... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.48808791166794, s = 53.48808791166794 t = 26.74404395583397, s = 26.74404395583397 t = 20.088224042520146, s = 20.088224042520146 t = 25.62353847620242, s = 25.62353847620242 t = 22.855881259361283, s = 22.855881259361283 t = 22.974701850759146, s = 22.974701850759146 t = 22.924701850759135, s = 22.924701850759135 Dimension 603, threshold 22.855881259361283... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.45085931430848, s = 53.45085931430848 t = 26.72542965715424, s = 26.72542965715424 t = 20.661738686246515, s = 20.661738686246515 t = 25.762286715842702, s = 25.762286715842702 t = 24.112068134791116, s = 24.112068134791116 t = 22.386903410518816, s = 22.386903410518816 t = 23.06032208406143, s = 23.06032208406143 t = 23.11032208406144, s = 23.11032208406144 t = 23.611195109426276, s = 23.611195109426276 t = 23.805555326667218, s = 23.805555326667218 t = 23.668629362969025, s = 23.668629362969025 Dimension 613, threshold 23.611195109426276... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.940812770909524, s = 53.940812770909524 t = 26.970406385454762, s = 26.970406385454762 t = 20.829480579537158, s = 20.829480579537158 t = 25.99922367140289, s = 25.99922367140289 t = 23.414352125470025, s = 23.414352125470025 t = 23.2859464302656, s = 23.2859464302656 t = 22.05771350490138, s = 22.05771350490138 t = 23.003004074039303, s = 23.003004074039303 t = 23.07797403094942, s = 23.07797403094942 t = 23.222166603907198, s = 23.222166603907198 t = 23.172166603907186, s = 23.172166603907186 Dimension 623, threshold 23.222166603907198... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.52316175376341, s = 53.52316175376341 t = 26.761580876881705, s = 26.761580876881705 t = 21.10098827088637, s = 21.10098827088637 t = 25.904023731571552, s = 25.904023731571552 t = 23.50250600122896, s = 23.50250600122896 t = 24.065379694496606, s = 24.065379694496606 t = 24.003645214481967, s = 24.003645214481967 t = 23.753075607855465, s = 23.753075607855465 t = 23.703075607855453, s = 23.703075607855453 t = 23.602790804542206, s = 23.602790804542206 t = 23.552790804542195, s = 23.552790804542195 Dimension 633, threshold 23.552790804542195... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.30231336126322, s = 53.30231336126322 t = 26.65115668063161, s = 26.65115668063161 t = 21.457809840942936, s = 21.457809840942936 t = 22.480194180822423, s = 22.480194180822423 t = 24.565675430727016, s = 24.565675430727016 t = 23.78081969830675, s = 23.78081969830675 t = 23.29424285212281, s = 23.29424285212281 t = 23.629254842201842, s = 23.629254842201842 t = 23.679254842201853, s = 23.679254842201853 t = 23.7300372702543, s = 23.7300372702543 Dimension 643, threshold 23.7300372702543... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.2027323978787, s = 53.2027323978787 t = 26.60136619893935, s = 26.60136619893935 t = 21.30834836865558, s = 21.30834836865558 t = 24.859481277521745, s = 24.859481277521745 t = 23.126194974591197, s = 23.126194974591197 t = 23.588685487177806, s = 23.588685487177806 t = 24.224083382349775, s = 24.224083382349775 t = 24.02033837290818, s = 24.02033837290818 t = 24.13866428737062, s = 24.13866428737062 t = 24.0795013301394, s = 24.0795013301394 Dimension 653, threshold 24.13866428737062... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.33149167749125, s = 54.33149167749125 t = 27.165745838745625, s = 27.165745838745625 t = 22.47338878851022, s = 22.47338878851022 t = 24.66715620749032, s = 24.66715620749032 t = 24.45191470183189, s = 24.45191470183189 t = 24.551548068665543, s = 24.551548068665543 Dimension 663, threshold 24.551548068665543... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.809053946944445, s = 54.809053946944445 t = 27.404526973472223, s = 27.404526973472223 t = 22.169849425396244, s = 22.169849425396244 t = 26.666212618906574, s = 26.666212618906574 t = 24.41803102215141, s = 24.41803102215141 t = 24.974761457270716, s = 24.974761457270716 t = 24.836960644895537, s = 24.836960644895537 t = 24.90849288878891, s = 24.90849288878891 Dimension 673, threshold 24.836960644895537... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.17457494826583, s = 54.17457494826583 t = 27.087287474132914, s = 27.087287474132914 t = 22.986935376125864, s = 22.986935376125864 t = 24.52749258915238, s = 24.52749258915238 t = 25.807390031642647, s = 25.807390031642647 t = 24.984449997794922, s = 24.984449997794922 t = 24.733909448583344, s = 24.733909448583344 t = 24.806889907317714, s = 24.806889907317714 t = 24.895669952556318, s = 24.895669952556318 Dimension 683, threshold 24.806889907317714... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.64600338809799, s = 53.64600338809799 t = 26.823001694048994, s = 26.823001694048994 t = 21.25542797901976, s = 21.25542797901976 t = 26.00263157022162, s = 26.00263157022162 t = 23.62902977462069, s = 23.62902977462069 t = 24.234536033257065, s = 24.234536033257065 t = 24.184536033257054, s = 24.184536033257054 t = 23.906782903938872, s = 23.906782903938872 t = 24.134536033257042, s = 24.134536033257042 Dimension 693, threshold 24.184536033257054... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.92741476204583, s = 53.92741476204583 t = 26.963707381022914, s = 26.963707381022914 t = 19.73856018635105, s = 19.73856018635105 t = 25.692489129881185, s = 25.692489129881185 t = 24.498976270957893, s = 24.498976270957893 t = 24.73821952718175, s = 24.73821952718175 t = 25.21535432853147, s = 25.21535432853147 t = 24.975787453444855, s = 24.975787453444855 t = 24.925787453444844, s = 24.925787453444844 t = 24.832003490313298, s = 24.832003490313298 Dimension 703, threshold 24.832003490313298... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.812496472612644, s = 53.812496472612644 t = 26.906248236306322, s = 26.906248236306322 t = 21.45882834867608, s = 21.45882834867608 t = 26.101166293243853, s = 26.101166293243853 t = 25.36011191840813, s = 25.36011191840813 t = 25.0897209071587, s = 25.0897209071587 t = 25.262688176205494, s = 25.262688176205494 Dimension 713, threshold 25.36011191840813... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.5930222196105, s = 53.5930222196105 t = 26.79651110980525, s = 26.79651110980525 t = 22.190389125896452, s = 22.190389125896452 t = 25.608005921114984, s = 25.608005921114984 t = 25.743242556696405, s = 25.743242556696405 t = 25.658005921114995, s = 25.658005921114995 Dimension 723, threshold 25.658005921114995... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.119867859029405, s = 53.119867859029405 t = 26.559933929514703, s = 26.559933929514703 t = 23.180411329487953, s = 23.180411329487953 t = 24.854169624921738, s = 24.854169624921738 t = 25.70705177721822, s = 25.70705177721822 t = 25.942773508622068, s = 25.942773508622068 t = 25.824095224285685, s = 25.824095224285685 t = 25.760065395015932, s = 25.760065395015932 Dimension 733, threshold 25.760065395015932... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.44007435017297, s = 54.44007435017297 t = 27.220037175086485, s = 27.220037175086485 t = 23.21730104641076, s = 23.21730104641076 t = 26.770346369389927, s = 26.770346369389927 t = 24.993823707900344, s = 24.993823707900344 t = 25.72725406692507, s = 25.72725406692507 t = 26.348595520895053, s = 26.348595520895053 t = 26.23775604414533, s = 26.23775604414533 t = 26.29859552089504, s = 26.29859552089504 Dimension 743, threshold 26.29859552089504... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.52566485919362, s = 54.52566485919362 t = 27.26283242959681, s = 27.26283242959681 t = 21.22373120102469, s = 21.22373120102469 t = 26.31001222985055, s = 26.31001222985055 t = 25.89948283781257, s = 25.89948283781257 t = 26.00653927886085, s = 26.00653927886085 t = 25.956539278860838, s = 25.956539278860838 Dimension 753, threshold 26.00653927886085... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.1313590216207, s = 53.1313590216207 t = 26.56567951081035, s = 26.56567951081035 t = 25.86582666953435, s = 25.86582666953435 t = 26.03936085807571, s = 26.03936085807571 t = 25.91582666953436, s = 25.91582666953436 Dimension 763, threshold 25.86582666953435... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.32822282061661, s = 54.32822282061661 t = 27.164111410308305, s = 27.164111410308305 t = 23.542110143995348, s = 23.542110143995348 t = 26.78527349332182, s = 26.78527349332182 t = 26.41232782680494, s = 26.41232782680494 t = 24.977218985400143, s = 24.977218985400143 t = 25.783629722268802, s = 25.783629722268802 t = 26.089597897319784, s = 26.089597897319784 t = 25.98125114538329, s = 25.98125114538329 t = 26.039597897319773, s = 26.039597897319773 Dimension 773, threshold 26.039597897319773... t = 0.0, s = 0.0
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/umap/spectral.py:260: UserWarning: WARNING: spectral initialisation failed! The eigenvector solver failed. This is likely due to too small an eigengap. Consider adding some noise or jitter to your data. Falling back to random initialisation! warn(
t = 100.0, s = 100.0 t = 53.7636020121432, s = 53.7636020121432 t = 26.8818010060716, s = 26.8818010060716 t = 25.172437800273347, s = 25.172437800273347 t = 26.074454104203294, s = 26.074454104203294 t = 26.355729567163078, s = 26.355729567163078 t = 26.596965466332094, s = 26.596965466332094 t = 26.437639417200874, s = 26.437639417200874 t = 26.517302441766482, s = 26.517302441766482 Dimension 783, threshold 26.517302441766482... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.980966489792564, s = 53.980966489792564 t = 26.990483244896282, s = 26.990483244896282 t = 25.25207594953495, s = 25.25207594953495 t = 26.157506411298158, s = 26.157506411298158 t = 25.884713752630446, s = 25.884713752630446 t = 26.102300653044562, s = 26.102300653044562 t = 26.05230065304455, s = 26.05230065304455 Dimension 793, threshold 26.102300653044562... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.621321114794526, s = 53.621321114794526 t = 26.810660557397263, s = 26.810660557397263 t = 27.473195227261787, s = 27.473195227261787 t = 26.953986025416782, s = 26.953986025416782 t = 27.213590626339283, s = 27.213590626339283 t = 27.33546389662175, s = 27.33546389662175 t = 27.263590626339294, s = 27.263590626339294 Dimension 803, threshold 27.213590626339283... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.19688883290351, s = 54.19688883290351 t = 27.098444416451756, s = 27.098444416451756 t = 27.172435566500525, s = 27.172435566500525 t = 40.68466219970202, s = 40.68466219970202 t = 27.333177293949337, s = 27.333177293949337 t = 27.283177293949326, s = 27.283177293949326 t = 27.222435566500536, s = 27.222435566500536 Dimension 813, threshold 27.222435566500536... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.56672712422047, s = 53.56672712422047 t = 26.783363562110235, s = 26.783363562110235 t = 26.833363562110247, s = 26.833363562110247 t = 40.20004534316536, s = 40.20004534316536 t = 27.128718906928345, s = 27.128718906928345 t = 27.078718906928334, s = 27.078718906928334 t = 27.028718906928322, s = 27.028718906928322 Dimension 823, threshold 27.078718906928334... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.21257876927163, s = 53.21257876927163 t = 26.606289384635815, s = 26.606289384635815 t = 27.672620095066684, s = 27.672620095066684 t = 28.711185940882512, s = 28.711185940882512 t = 27.767906162131826, s = 27.767906162131826 Dimension 833, threshold 27.672620095066684... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.13639654402818, s = 53.13639654402818 t = 26.56819827201409, s = 26.56819827201409 t = 27.34290071747596, s = 27.34290071747596 t = 40.23964863075207, s = 40.23964863075207 t = 28.155055243939593, s = 28.155055243939593 t = 27.645788545910424, s = 27.645788545910424 t = 27.536290986809018, s = 27.536290986809018 t = 27.595788545910413, s = 27.595788545910413 Dimension 843, threshold 27.536290986809018... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.78765474167423, s = 53.78765474167423 t = 26.893827370837116, s = 26.893827370837116 t = 27.359559136953763, s = 27.359559136953763 t = 27.409559136953774, s = 27.409559136953774 t = 40.598606939314, s = 40.598606939314 t = 27.619147980874175, s = 27.619147980874175 t = 27.487923321941036, s = 27.487923321941036 t = 27.553535651407607, s = 27.553535651407607 Dimension 853, threshold 27.487923321941036... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.18980805115088, s = 54.18980805115088 t = 27.09490402557544, s = 27.09490402557544 t = 28.425939984458815, s = 28.425939984458815 t = 27.54741465960876, s = 27.54741465960876 t = 27.59741465960877, s = 27.59741465960877 t = 28.01167732203379, s = 28.01167732203379 t = 28.218808653246302, s = 28.218808653246302 t = 28.32237431885256, s = 28.32237431885256 t = 28.27237431885255, s = 28.27237431885255 Dimension 863, threshold 28.27237431885255... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.46485894331772, s = 53.46485894331772 t = 26.73242947165886, s = 26.73242947165886 t = 29.60607617256103, s = 29.60607617256103 t = 27.87868683962357, s = 27.87868683962357 t = 28.573086470565034, s = 28.573086470565034 t = 28.084999428478426, s = 28.084999428478426 t = 28.329042949521728, s = 28.329042949521728 t = 28.213216116365047, s = 28.213216116365047 t = 28.273848585779895, s = 28.273848585779895 Dimension 873, threshold 28.273848585779895... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.09729585557782, s = 53.09729585557782 t = 26.54864792778891, s = 26.54864792778891 t = 27.47582197352086, s = 27.47582197352086 t = 40.28655891454934, s = 40.28655891454934 t = 29.21626350061877, s = 29.21626350061877 t = 27.97645896871325, s = 27.97645896871325 t = 28.573756365625258, s = 28.573756365625258 t = 28.62375636562527, s = 28.62375636562527 Dimension 883, threshold 28.573756365625258... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.24481969272407, s = 54.24481969272407 t = 27.122409846362036, s = 27.122409846362036 t = 27.664635386310692, s = 27.664635386310692 t = 27.74710001036185, s = 27.74710001036185 t = 40.99595985154296, s = 40.99595985154296 t = 28.79584339457936, s = 28.79584339457936 t = 28.454242934422332, s = 28.454242934422332 t = 28.40424293442232, s = 28.40424293442232 t = 28.075671472392088, s = 28.075671472392088 t = 28.26340269184247, s = 28.26340269184247 t = 28.16953708211728, s = 28.16953708211728 Dimension 893, threshold 28.26340269184247... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.666695397062334, s = 53.666695397062334 t = 26.833347698531167, s = 26.833347698531167 t = 27.970565205211575, s = 27.970565205211575 t = 30.57913702763547, s = 30.57913702763547 t = 28.350554257076617, s = 28.350554257076617 t = 29.464845642356043, s = 29.464845642356043 t = 28.56077733247934, s = 28.56077733247934 t = 28.496449898333506, s = 28.496449898333506 Dimension 903, threshold 28.496449898333506... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 52.8180006822951, s = 52.8180006822951 t = 26.40900034114755, s = 26.40900034114755 t = 30.55492144669553, s = 30.55492144669553 t = 28.274150388405495, s = 28.274150388405495 t = 28.57447465167848, s = 28.57447465167848 t = 28.470639412054794, s = 28.470639412054794 t = 28.524474651678467, s = 28.524474651678467 Dimension 913, threshold 28.57447465167848... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.28064563627905, s = 53.28064563627905 t = 26.640322818139524, s = 26.640322818139524 t = 26.549894802946167, s = 26.549894802946167 Dimension 923, threshold 26.640322818139524... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.82918845968134, s = 53.82918845968134 t = 26.91459422984067, s = 26.91459422984067 t = 29.71630664194539, s = 29.71630664194539 t = 28.375142206322323, s = 28.375142206322323 t = 28.823795952730027, s = 28.823795952730027 t = 28.771412127216998, s = 28.771412127216998 t = 28.57327716676966, s = 28.57327716676966 t = 28.688912626227324, s = 28.688912626227324 Dimension 933, threshold 28.771412127216998... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.93235206596263, s = 53.93235206596263 t = 26.966176032981316, s = 26.966176032981316 t = 30.148071061708265, s = 30.148071061708265 t = 28.66493798708969, s = 28.66493798708969 t = 29.368735791010007, s = 29.368735791010007 t = 29.46064681556248, s = 29.46064681556248 t = 29.804358938635374, s = 29.804358938635374 t = 29.58292745668519, s = 29.58292745668519 t = 29.53292745668518, s = 29.53292745668518 Dimension 943, threshold 29.58292745668519... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 52.62187945723119, s = 52.62187945723119 t = 26.310939728615594, s = 26.310939728615594 t = 29.77253767946043, s = 29.77253767946043 t = 28.46315480937585, s = 28.46315480937585 t = 28.180697509986345, s = 28.180697509986345 t = 28.335858255053225, s = 28.335858255053225 t = 28.41315480937584, s = 28.41315480937584 Dimension 953, threshold 28.41315480937584... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.38502720075004, s = 53.38502720075004 t = 26.69251360037502, s = 26.69251360037502 t = 31.104898567928736, s = 31.104898567928736 t = 28.824833784165463, s = 28.824833784165463 t = 28.610895361573636, s = 28.610895361573636 t = 28.750509016491062, s = 28.750509016491062 Dimension 963, threshold 28.824833784165463... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 54.40361087724064, s = 54.40361087724064 t = 27.20180543862032, s = 27.20180543862032 t = 31.029494756476687, s = 31.029494756476687 t = 29.196569617936767, s = 29.196569617936767 t = 29.146569617936755, s = 29.146569617936755 t = 28.174187528278537, s = 28.174187528278537 t = 29.04390660069665, s = 29.04390660069665 t = 29.096569617936744, s = 29.096569617936744 Dimension 973, threshold 29.146569617936755... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.339689863710525, s = 53.339689863710525 t = 26.669844931855263, s = 26.669844931855263 t = 41.09620608391207, s = 41.09620608391207 t = 38.743276538357236, s = 38.743276538357236 t = 32.70656073510625, s = 32.70656073510625 t = 29.68820283348076, s = 29.68820283348076 t = 29.43068222656953, s = 29.43068222656953 t = 28.08083958062908, s = 28.08083958062908 t = 29.109035022890694, s = 29.109035022890694 t = 29.23211308861121, s = 29.23211308861121 t = 29.374838081812676, s = 29.374838081812676 Dimension 983, threshold 29.374838081812676... t = 0.0, s = 0.0 t = 100.0, s = 100.0 t = 53.31859006930567, s = 53.31859006930567 t = 26.659295034652835, s = 26.659295034652835 t = 41.16326796937919, s = 41.16326796937919 t = 38.694259608738726, s = 38.694259608738726 t = 32.67677732169578, s = 32.67677732169578 t = 29.668036178174308, s = 29.668036178174308 t = 29.854011486512345, s = 29.854011486512345 t = 29.96585653683096, s = 29.96585653683096 t = 29.90401148651236, s = 29.90401148651236 Dimension 993, threshold 29.854011486512345...
Td, Tt = dim_range, threshes
# Td, Tt = np.genfromtxt("thresholds copy.txt")
plt.figure(figsize=(10, 5), dpi=100)
plt.scatter(Td, Tt, color='#4E2A84', s=15)
# plt.plot(Td, Tt, color='#4E2A84', linewidth=0.1)
plt.xlabel("Dimension")
plt.ylabel("Detection Threshold")
plt.title("UMAP Minimum Detected 3-Cluster Distances \n by Input Dimension")
Text(0.5, 1.0, 'UMAP Minimum Detected 3-Cluster Distances \n by Input Dimension')
Ooh ok now let's try plotting the two-cluster and 1-cluster setup on the same graph, using the data we saved
def plot_dimgraph(fname, color='pink', smooth=False):
Td, Tt = np.genfromtxt(fname)
plt.xlabel("Input Dimension")
plt.ylabel("Detection Threshold")
plt.scatter(Td, Tt, color=color, s=10)
if smooth:
min, max = Td[0], Td[-1]
xnew = np.linspace(min, max, round(max-min)*10)
spline = make_interp_spline(Td, Tt)
plt.plot(xnew, spline(xnew), c='m')
return Td, Tt
# plt.plot(Td, Tt, color='#4E2A84', linewidth=0.1)
# Plot 1 to 100 graph
plt.figure(figsize=(6, 5), dpi=100)
plt.title("UMAP Detection Thresholds \n by Input Dimension (for 2 and 3 clusters)")
plot_dimgraph("thresholds_1_100_1.txt", '#4E2A84')
plot_dimgraph("thresholds3_1_100_1.txt", 'm')
plt.show()
# Plot 1 to 1000 graph
plt.figure(figsize=(10, 5), dpi=100)
plt.title("UMAP Detection Thresholds \n by Input Dimension (for 2 and 3 clusters)")
plot_dimgraph("thresholds_1_1k_10.txt", '#4E2A84')
plot_dimgraph("thresholds3_1_1k_10.txt", 'm')
plt.show()
These aren't bad, but still kind of cluttered. It might be better to fit a smoothed curve (maybe a spline) to each one and plot those instead. First, let's try fitting a curve just for the 2-cluster 100dim dataset.
It looks like scipy
and scikit-learn
don't have easy ways of fitting smoothed curves like ggplot
can. I could try using plotnine
?
The relationship between input dimension and the minimum detection threshold seems to follow a predictable curve. We can model the relationshp with the model $T = \mathbf{\beta} F(D)$, where $\beta$ is the design matrix that includes the intercept and $F$ is a transformation function. If the model is a square root curve, we can apply the $F(D) = D^2$ transformation and fit a linear transformation; $F(D) = \mathrm{exp}(D)$ would fit a logarithmic relationship.
# Function for fitting and plotting a transformed linear model
def fit_mod(fname, fn, apply='x', inv_fn=None, plot=True, color='m'):
if plot:
Td, Tt = plot_dimgraph(fname)
else:
Td, Tt = np.genfromtxt(fname)
T = Tt.reshape(-1, 1)
D = Td.reshape(-1, 1)
Xnew = np.linspace(Td.min(), Td.max(), round(Td.max() - Td.min())*2).reshape(-1, 1)
if apply == 'x':
fit = LinReg().fit(fn(D), T)
print(fit.score(fn(D), T))
Ynew = fit.predict(fn(Xnew))
elif apply == 'y' and inv_fn is not None:
fit = LinReg().fit(D, fn(T))
print(fit.score(D, fn(T)))
Ynew = inv_fn(fit.predict(Xnew))
else:
raise Exception("Must specify 'x' or 'y' for apply and an inverse function if apply='y'")
if plot:
plt.plot(Xnew, Ynew, c=color)
# get the actual numbers out - return intercept, then coefficient
return (fit.intercept_[0], fit.coef_[0][0])
$T \sim \sqrt{D}$
# Plot graphs and fitted lines for first 100 dimensions
plt.figure(figsize=(8, 3), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_100_1.txt', lambda x : x**0.5)
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_100_1.txt', lambda x : x**0.5)
plt.subplots_adjust(wspace=0.5)
plt.show()
# Plot graphs and fitted lines for first 1000 dimensions
plt.figure(figsize=(10, 4), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_1k_10.txt', lambda x : x**0.5)
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_1k_10.txt', lambda x : x**0.5)
0.7781007106235502 0.9596151244262725
0.7939208338976498 0.9965006755604313
(0.07766219823938059, 0.9379464556369232)
$T^2 \sim D$
# Plot graphs and fitted lines for first 100 dimensions
plt.figure(figsize=(8, 3), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_100_1.txt', lambda y: y**2, apply='y', inv_fn = lambda y: y**0.5)
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_100_1.txt', lambda y: y**2, apply='y', inv_fn = lambda y: y**0.5)
plt.subplots_adjust(wspace=0.5)
plt.show()
# Plot graphs and fitted lines for first 1000 dimensions
plt.figure(figsize=(10, 4), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_1k_10.txt', lambda y: y **2, apply='y', inv_fn=lambda y: y**0.5)
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_1k_10.txt', lambda y: y **2, apply='y', inv_fn=lambda y: y**0.5)
0.6300451199342515 0.9345355639826407
0.8203336393612541 0.9952348820055732
/var/folders/d0/64bdzj095w509hfmv_5cd8sw0000gn/T/ipykernel_51122/3899025680.py:19: RuntimeWarning: invalid value encountered in sqrt fit_mod('thresholds3_1_1k_10.txt', lambda y: y **2, apply='y', inv_fn=lambda y: y**0.5)
Applying the square root to $T$ presents problems when $T < 0$ (when the clusters aren't separated). While this can be fixed with another transformation, the lower accuracy rate for this method combined with this complication suggests a preference for $T \sim \sqrt{D}$.
Some concerns are that the model does noticeably worse for the first 100 dimensions. This is a problem because:
(1) it violates the homoscedasticity assumption, suggesting model misspecification
(2) many real-world input datasets are of the first 100 dimensions, so it is important to model them correctly
$T \sim \log(D)$
# Plot graphs and fitted lines for first 100 dimensions
plt.figure(figsize=(8, 3), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_100_1.txt', lambda x: np.log(x))
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_100_1.txt', lambda x: np.log(x))
plt.subplots_adjust(wspace=0.5)
plt.show()
# Plot graphs and fitted lines for first 1000 dimensions
plt.figure(figsize=(10, 4), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_1k_10.txt', lambda x: np.log(x))
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_1k_10.txt', lambda x: np.log(x))
0.8115109388629443 0.9036387624910984
0.5674959183047068 0.8718098058283815
$exp(T) \sim D$
# Plot graphs and fitted lines for first 100 dimensions
plt.figure(figsize=(8, 3), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_100_1.txt', np.exp, apply='y', inv_fn=np.log)
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_100_1.txt', np.exp, apply='y', inv_fn=np.log)
plt.subplots_adjust(wspace=0.5)
plt.show()
# Plot graphs and fitted lines for first 1000 dimensions
plt.figure(figsize=(10, 4), dpi=100)
plt.subplot(121)
plt.title("2 Clusters")
fit_mod('thresholds_1_1k_10.txt', np.exp, apply='y', inv_fn=np.log)
plt.subplot(122)
plt.title("3 Clusters")
fit_mod('thresholds3_1_1k_10.txt', np.exp, apply='y', inv_fn=np.log)
0.05308636663168087 0.37102236603932637
/var/folders/d0/64bdzj095w509hfmv_5cd8sw0000gn/T/ipykernel_51122/3892792788.py:14: RuntimeWarning: invalid value encountered in log Ynew = inv_fn(fit.predict(Xnew)) /var/folders/d0/64bdzj095w509hfmv_5cd8sw0000gn/T/ipykernel_51122/3892792788.py:14: RuntimeWarning: invalid value encountered in log Ynew = inv_fn(fit.predict(Xnew))
0.6618327522142211 0.31756823997795214
/var/folders/d0/64bdzj095w509hfmv_5cd8sw0000gn/T/ipykernel_51122/659855156.py:16: RuntimeWarning: invalid value encountered in log fit_mod('thresholds_1_1k_10.txt', np.exp, apply='y', inv_fn=lambda y : np.log(y)) /var/folders/d0/64bdzj095w509hfmv_5cd8sw0000gn/T/ipykernel_51122/659855156.py:19: RuntimeWarning: invalid value encountered in log fit_mod('thresholds3_1_1k_10.txt', np.exp, apply='y', inv_fn=lambda y: np.log(y))
The log
-transformed models seem to perform worse than the square-root models, with lower $R^2$ values across the board. As of now, we'll select the $T \sim \sqrt{D}$ model going forward.
Since we're going to be drawing lots of dim-thresh curves, let's write a function to make this easier:
def dim_thresh(numrange, asw=0.5, fname=None, plot=True, upper_bound=1e2):
"""numrange = tuple of min, max, step of dimensions \\
asw = the target ASW score \\
fname = name of .txt and .png file to save results and graph to \\
returns the dimension range and thresholds
"""
dim_range = np.arange(*numrange)
threshes = np.zeros(dim_range.size)
for i, d in enumerate(dim_range):
thresh = find_threshold(D=d, asw=asw, upper_bound=upper_bound)
threshes[i] = thresh
if fname is not None:
np.savetxt(f"{fname}.txt", (dim_range, threshes))
print(f"Dimension {d}, threshold {thresh}...")
if plot:
plt.scatter(dim_range, threshes, color='#4E2A84')
plt.xlabel("Dimension")
plt.ylabel("Detection Threshold")
if fname is not None:
np.savetxt(f"{fname}.txt", (dim_range, threshes))
plt.savefig(f"{fname}.png")
return (dim_range, threshes)
Let's see how the dimension-threshold graphs change when we change the minimum cluster separation score from 0.5 to various other values from 0 to 1.
coefs = fit_mod('thresholds_1_1k_10.txt', lambda x: x**0.5, plot=True)
print(coefs)
0.7939208338976498 (5.507215974285145, 0.153598024373336)
So our dimension-threshold curve model for an average sillhouette width of 0.5 is $T \sim 5.5072 + 0.1536 \sqrt{D}$. Let's see how the curve changes when we set ASW to 0.8, for example:
dim_thresh((2, 100, 10), asw=0.8)
Dimension 2, threshold 5.613750497603059... Dimension 12, threshold 5.706393371323584... Dimension 22, threshold 6.050574757243772... Dimension 32, threshold 6.74166741641923... Dimension 42, threshold 7.889201729282776... Dimension 52, threshold 8.834738826814196... Dimension 62, threshold 9.824628768395186... Dimension 72, threshold 10.668244929966734... Dimension 82, threshold 11.534872692496897... Dimension 92, threshold 12.215431473525387...
(array([ 2, 12, 22, 32, 42, 52, 62, 72, 82, 92]), array([ 5.6137505 , 5.70639337, 6.05057476, 6.74166742, 7.88920173, 8.83473883, 9.82462877, 10.66824493, 11.53487269, 12.21543147]))
Now let's see how the dimension-detection curve changes by plotting the above graph for different values of ASW from 0 to 1.
# Plot the 1 to 100 by 10 curve from ASW = 0, 0.1, ..., 1
ASWs = np.linspace(0, 1, 11)
np.repeat(ASWs, 10)
color_labs = np.zeros((ASWs.size))
Y = np.zeros(10*ASWs.size)
for i, asw in enumerate(ASWs):
D, T = dim_thresh((2, 100, 10), fname=f"{asw}thresholds_1_100_10", asw=asw)
assert(T.size == 10)
Y[i*10:i*10+10] = T
print(Y)
plt.scatter(np.linspace(2, 100, 10), Y, c=color_labs)
Dimension 2, threshold 0.3621640150980363... Dimension 12, threshold 2.8552321984398388... Dimension 22, threshold 4.423545533246347... Dimension 32, threshold 4.755326968307792... Dimension 42, threshold 4.194989705542616... Dimension 52, threshold 5.1078564581060775... Dimension 62, threshold 5.359614069569666... Dimension 72, threshold 5.708626649075599... Dimension 82, threshold 5.366426077833518... Dimension 92, threshold 5.96495582646076... [0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 1.2875515632906476... Dimension 12, threshold 3.078655234836156... Dimension 22, threshold 4.444166983675143... Dimension 32, threshold 4.930008997493937... Dimension 42, threshold 4.385020545161585... Dimension 52, threshold 4.996478261678475... Dimension 62, threshold 5.4394457945148... Dimension 72, threshold 6.61445079328707... Dimension 82, threshold 5.542170118710299... Dimension 92, threshold 5.896544148143586... [0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 1.6613422462140706... Dimension 12, threshold 3.12911115260216... Dimension 22, threshold 4.551522659426805... Dimension 32, threshold 5.2253918813561455... Dimension 42, threshold 4.852078501843154... Dimension 52, threshold 5.044801222458389... Dimension 62, threshold 5.203003703345319... Dimension 72, threshold 5.954542785767611... Dimension 82, threshold 5.716692089150789... Dimension 92, threshold 5.732088255115968... [0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 1.66134225 3.12911115 4.55152266 5.22539188 4.8520785 5.04480122 5.2030037 5.95454279 5.71669209 5.73208826 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 2.0209417076020286... Dimension 12, threshold 3.291580847798015... Dimension 22, threshold 4.550869782896616... Dimension 32, threshold 4.826003744114972... Dimension 42, threshold 4.739903927891191... Dimension 52, threshold 5.241992440423674... Dimension 62, threshold 5.2762438371184235... Dimension 72, threshold 6.427694100670831... Dimension 82, threshold 5.969244154894712... Dimension 92, threshold 5.754916188703097... [0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 1.66134225 3.12911115 4.55152266 5.22539188 4.8520785 5.04480122 5.2030037 5.95454279 5.71669209 5.73208826 2.02094171 3.29158085 4.55086978 4.82600374 4.73990393 5.24199244 5.27624384 6.4276941 5.96924415 5.75491619 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 2.5993313525803714... Dimension 12, threshold 3.623084401054671... Dimension 22, threshold 4.642960860035894... Dimension 32, threshold 5.210244927983069... Dimension 42, threshold 5.171820663845513... Dimension 52, threshold 5.361602452980076... Dimension 62, threshold 5.644590367435315... Dimension 72, threshold 6.131893805110455... Dimension 82, threshold 5.833514323396139... Dimension 92, threshold 5.934644057967144... [0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 1.66134225 3.12911115 4.55152266 5.22539188 4.8520785 5.04480122 5.2030037 5.95454279 5.71669209 5.73208826 2.02094171 3.29158085 4.55086978 4.82600374 4.73990393 5.24199244 5.27624384 6.4276941 5.96924415 5.75491619 2.59933135 3.6230844 4.64296086 5.21024493 5.17182066 5.36160245 5.64459037 6.13189381 5.83351432 5.93464406 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 3.1265207334625633... Dimension 12, threshold 3.8577213307152745... Dimension 22, threshold 4.843204769678859... Dimension 32, threshold 5.721047830496132... Dimension 42, threshold 6.427601291894331... Dimension 52, threshold 6.076944333028447... Dimension 62, threshold 6.224189027159753... Dimension 72, threshold 7.44470339151967... Dimension 82, threshold 6.440435620436755... Dimension 92, threshold 6.856972613392357... [0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 1.66134225 3.12911115 4.55152266 5.22539188 4.8520785 5.04480122 5.2030037 5.95454279 5.71669209 5.73208826 2.02094171 3.29158085 4.55086978 4.82600374 4.73990393 5.24199244 5.27624384 6.4276941 5.96924415 5.75491619 2.59933135 3.6230844 4.64296086 5.21024493 5.17182066 5.36160245 5.64459037 6.13189381 5.83351432 5.93464406 3.12652073 3.85772133 4.84320477 5.72104783 6.42760129 6.07694433 6.22418903 7.44470339 6.44043562 6.85697261 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 3.897611781582614... Dimension 12, threshold 4.169650442634372... Dimension 22, threshold 5.132808910167331... Dimension 32, threshold 6.072914494607649... Dimension 42, threshold 7.16788708981796... Dimension 52, threshold 8.19844869435329... Dimension 62, threshold 8.937601573907262... Dimension 72, threshold 9.854829571009804... Dimension 82, threshold 10.911424384554138... Dimension 92, threshold 8.464212772472537... [ 0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 1.66134225 3.12911115 4.55152266 5.22539188 4.8520785 5.04480122 5.2030037 5.95454279 5.71669209 5.73208826 2.02094171 3.29158085 4.55086978 4.82600374 4.73990393 5.24199244 5.27624384 6.4276941 5.96924415 5.75491619 2.59933135 3.6230844 4.64296086 5.21024493 5.17182066 5.36160245 5.64459037 6.13189381 5.83351432 5.93464406 3.12652073 3.85772133 4.84320477 5.72104783 6.42760129 6.07694433 6.22418903 7.44470339 6.44043562 6.85697261 3.89761178 4.16965044 5.13280891 6.07291449 7.16788709 8.19844869 8.93760157 9.85482957 10.91142438 8.46421277 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 5.185690621132979... Dimension 12, threshold 5.160981708508345... Dimension 22, threshold 5.616333115086975... Dimension 32, threshold 6.535851120955324... Dimension 42, threshold 7.460727250294686... Dimension 52, threshold 8.431478682024558... Dimension 62, threshold 9.285591933748487... Dimension 72, threshold 10.354189454048514... Dimension 82, threshold 11.194061459858663... Dimension 92, threshold 11.801244022887314... [ 0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 1.66134225 3.12911115 4.55152266 5.22539188 4.8520785 5.04480122 5.2030037 5.95454279 5.71669209 5.73208826 2.02094171 3.29158085 4.55086978 4.82600374 4.73990393 5.24199244 5.27624384 6.4276941 5.96924415 5.75491619 2.59933135 3.6230844 4.64296086 5.21024493 5.17182066 5.36160245 5.64459037 6.13189381 5.83351432 5.93464406 3.12652073 3.85772133 4.84320477 5.72104783 6.42760129 6.07694433 6.22418903 7.44470339 6.44043562 6.85697261 3.89761178 4.16965044 5.13280891 6.07291449 7.16788709 8.19844869 8.93760157 9.85482957 10.91142438 8.46421277 5.18569062 5.16098171 5.61633312 6.53585112 7.46072725 8.43147868 9.28559193 10.35418945 11.19406146 11.80124402 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] Dimension 2, threshold 5.613750497603059... Dimension 12, threshold 5.706393371323584... Dimension 22, threshold 6.050574757243772... Dimension 32, threshold 6.74166741641923... Dimension 42, threshold 7.889201729282776... Dimension 52, threshold 8.834738826814196... Dimension 62, threshold 9.824628768395186... Dimension 72, threshold 10.668244929966734... Dimension 82, threshold 11.534872692496897... Dimension 92, threshold 12.215431473525387... [ 0.36216402 2.8552322 4.42354553 4.75532697 4.19498971 5.10785646 5.35961407 5.70862665 5.36642608 5.96495583 1.28755156 3.07865523 4.44416698 4.930009 4.38502055 4.99647826 5.43944579 6.61445079 5.54217012 5.89654415 1.66134225 3.12911115 4.55152266 5.22539188 4.8520785 5.04480122 5.2030037 5.95454279 5.71669209 5.73208826 2.02094171 3.29158085 4.55086978 4.82600374 4.73990393 5.24199244 5.27624384 6.4276941 5.96924415 5.75491619 2.59933135 3.6230844 4.64296086 5.21024493 5.17182066 5.36160245 5.64459037 6.13189381 5.83351432 5.93464406 3.12652073 3.85772133 4.84320477 5.72104783 6.42760129 6.07694433 6.22418903 7.44470339 6.44043562 6.85697261 3.89761178 4.16965044 5.13280891 6.07291449 7.16788709 8.19844869 8.93760157 9.85482957 10.91142438 8.46421277 5.18569062 5.16098171 5.61633312 6.53585112 7.46072725 8.43147868 9.28559193 10.35418945 11.19406146 11.80124402 5.6137505 5.70639337 6.05057476 6.74166742 7.88920173 8.83473883 9.82462877 10.66824493 11.53487269 12.21543147 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb Cell 75 in <cell line: 6>() <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=4'>5</a> Y = np.zeros(10*ASWs.size) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=5'>6</a> for i, asw in enumerate(ASWs): ----> <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=6'>7</a> D, T = dim_thresh((2, 100, 10), fname=f"{asw}thresholds_1_100_10", asw=asw) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=7'>8</a> assert(T.size == 10) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=8'>9</a> Y[i*10:i*10+10] = T /Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb Cell 75 in dim_thresh(numrange, asw, fname, plot) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=7'>8</a> threshes = np.zeros(dim_range.size) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=8'>9</a> for i, d in enumerate(dim_range): ---> <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=9'>10</a> thresh = find_threshold(D=d, asw=asw) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=10'>11</a> threshes[i] = thresh <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=11'>12</a> print(f"Dimension {d}, threshold {thresh}...") /Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb Cell 75 in find_threshold(choose_t, choose_s, set_t, D, size, asw) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=9'>10</a> f = lambda t : cluster_test(t=t, s=None, D=D, size=size)[2] - asw <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=10'>11</a> # call optimizing algorithm, w/ margin of error 0.1 ---> <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000145?line=11'>12</a> return brentq(f, lower_bound, upper_bound, xtol=0.1) File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/scipy/optimize/_zeros_py.py:783, in brentq(f, a, b, args, xtol, rtol, maxiter, full_output, disp) 781 if rtol < _rtol: 782 raise ValueError("rtol too small (%g < %g)" % (rtol, _rtol)) --> 783 r = _zeros._brentq(f, a, b, xtol, rtol, maxiter, args, full_output, disp) 784 return results_c(full_output, r) ValueError: f(a) and f(b) must have different signs
The upper bound used in the optimization function was 100; apparently, this wasn't high enough of a distance threshold for some values under ASW=0.9
!
Plotting the results of the above ASW points from 0 to 0.8:
color_labs = np.repeat(ASWs, 10)
X = np.tile(np.arange(2, 100, 10), 11)
print(Y.size, X.size, color_labs.size)
# plt.scatter(X[:-20], Y[:-20], c=color_labs[:-20])
# plt.figure(figsize=(10, 6), dpi=80)
fig, axes = plt.subplots(1, 1, figsize=(8, 6))
# for i, asw in enumerate(ASWs):
# if i < 9:
# plt.scatter(X[i*10:i*10+10], Y[i*10:i*10+10], color=BI(asw), s=20)
plt.scatter(X[:-20], Y[:-20], c=color_labs[:-20], cmap=BI)
fig.colorbar(cm.ScalarMappable(norm=mpl.colors.Normalize(0, 0.8),
cmap=BI), ax=axes, location='right')
plt.xlabel("Dimension")
plt.ylabel("Detection Threshold")
plt.title("Dimension-Threshold Curve For ASWs from 0 to 1", fontsize=16)
110 110 110
Text(0.5, 1.0, 'Dimension-Threshold Curve For ASWs from 0 to 1')
Now, we will repeat the above experiment but from ASW = 0 to 1 on the dimension interval 1 to 1000, as we did for the 0.5 ASW setting.
plt.show()
fig, axes = plt.subplots(1, 1, figsize=(18, 12))
fig.colorbar(cm.ScalarMappable(norm=mpl.colors.Normalize(0, 0.8),
cmap=BI), ax=axes, location='right')
_ = plot_dimgraph('asw1k-outputs/0.0thresholds_1_1k_10.txt', color=BI(0))
_ = plot_dimgraph('asw1k-outputs/0.1thresholds_1_1k_10.txt', color=BI(0.1))
_ = plot_dimgraph('asw1k-outputs/0.2thresholds_1_1k_10.txt', color=BI(0.2))
_ = plot_dimgraph('asw1k-outputs/0.3thresholds_1_1k_10.txt', color=BI(0.3))
_ = plot_dimgraph('asw1k-outputs/0.4thresholds_1_1k_10.txt', color=BI(0.4))
_ = plot_dimgraph('asw1k-outputs/0.5thresholds_1_1k_10.txt', color=BI(0.5))
_ = plot_dimgraph('asw1k-outputs/0.6thresholds_1_1k_10.txt', color=BI(0.6))
_ = plot_dimgraph('asw1k-outputs/0.7thresholds_1_1k_10.txt', color=BI(0.7))
_ = plot_dimgraph('asw1k-outputs/0.8thresholds_1_1k_10.txt', color=BI(0.8))
# ASWs = np.linspace(0, 0.9, 10)
ASWs = np.array([0.9])
for asw in ASWs:
try:
upper_bound = 100
dim_thresh((2, 1000, 10), fname=f"asw1k-outputs/{asw}thresholds_1_1k_10", asw=asw, upper_bound=upper_bound)
print(f"~ ~ DONE WITH ASW = {asw} ~ ~")
except ValueError:
raise Exception(f"it looks like the bound wasn't high enough for ASW={asw}")
pass
print("all done ^_^")
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb Cell 81 in <cell line: 3>() <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=4'>5</a> upper_bound = 1000 ----> <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=5'>6</a> dim_thresh((2, 1000, 10), fname=f"asw1k-outputs/{asw}thresholds_1_1k_10", asw=asw, upper_bound=upper_bound) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=6'>7</a> print(f"~ ~ DONE WITH ASW = {asw} ~ ~") /Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb Cell 81 in dim_thresh(numrange, asw, fname, plot, upper_bound) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=8'>9</a> for i, d in enumerate(dim_range): ---> <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=9'>10</a> thresh = find_threshold(D=d, asw=asw, upper_bound=upper_bound) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=10'>11</a> threshes[i] = thresh /Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb Cell 81 in find_threshold(choose_t, choose_s, set_t, D, size, asw, upper_bound) <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=9'>10</a> # call optimizing algorithm, w/ margin of error 0.1 ---> <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=10'>11</a> return brentq(f, lower_bound, upper_bound, xtol=0.1) File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/scipy/optimize/_zeros_py.py:783, in brentq(f, a, b, args, xtol, rtol, maxiter, full_output, disp) 782 raise ValueError("rtol too small (%g < %g)" % (rtol, _rtol)) --> 783 r = _zeros._brentq(f, a, b, xtol, rtol, maxiter, args, full_output, disp) 784 return results_c(full_output, r) ValueError: f(a) and f(b) must have different signs During handling of the above exception, another exception occurred: Exception Traceback (most recent call last) /Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb Cell 81 in <cell line: 3>() <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=6'>7</a> print(f"~ ~ DONE WITH ASW = {asw} ~ ~") <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=7'>8</a> except ValueError: ----> <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=8'>9</a> raise Exception(f"it looks like the bound wasn't high enough for ASW={asw}") <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=9'>10</a> pass <a href='vscode-notebook-cell:/Users/meeraray/Documents/NWU/test-umap1/umap-try.ipynb#ch0000158?line=10'>11</a> print("all done ^_^") Exception: it looks like the bound wasn't high enough for ASW=0.9
T. CaliĆski & J Harabasz (1974) A dendrite method for cluster analysis, Communications in Statistics, 3:1, 1-27, DOI: 10.1080/03610927408827101