import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import ruptures as rpt
import warnings

from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.cluster import Birch
from sklearn.metrics import silhouette_samples, silhouette_score

#warnings.filterwarnings("ignore")

def gini(x):
    total = 0
    
    for xi in x:
        for xj in x:
            total += np.abs(xi - xj)

    return total / ((len(x) ** 2) * 2 * np.mean(x))

def theil(x):
    u = np.mean(x)
    t = 0

    for xi in x:
        t += np.log(xi/u) * (xi / (u * len(x)))

    return t

def mld(x):  # não tolera dados negativos ou nulos
    u = np.mean(x)
    t = 0

    for xi in x:
        t += np.log(u/xi)

    return t

def changepoint(data, x, y):  # a série tem o "patamar novo" do ponto detectado em diante
    model = "rbf"
    algo = rpt.Pelt(model=model).fit(data[y].values)
    result = algo.predict(pen = 2 * np.log(len(data[x])))

    fig = px.line(data, x=x, y=y)
    fig.add_vline(x=data.iloc[0][x])

    for resul in result:
        fig.add_vline(x=data.iloc[resul-1][x])

    fig.show()

def cluster(data):
    fig = go.Figure(data=go.Scatter(x=data[data['Labels'] == 0]["points"], mode="markers", text=data[data["Labels"] == 0]["driverId"]))

    for label in data["Labels"].unique():
        if label == 0:
            continue

        else:
            fig.add_trace(go.Scatter(x=data[data['Labels'] == label]["points"], mode="markers", text=data[data["Labels"] == label]["driverId"]))

    fig.show()

def clusterc(data):
    fig = go.Figure(data=go.Scatter(x=data[data['Labels'] == 0]["points"], mode="markers", text=data[data["Labels"] == 0]["constructorId"]))

    for label in data["Labels"].unique():
        if label == 0:
            continue

        else:
            fig.add_trace(go.Scatter(x=data[data['Labels'] == label]["points"], mode="markers", text=data[data["Labels"] == label]["constructorId"]))

    fig.show()

def clusterd(data, medida):
    fig = go.Figure(data=go.Scatter(x=data[data['Labels'] == 0][medida], mode="markers", text=data[data["Labels"] == 0]["Ano"]))

    for label in data["Labels"].unique():
        if label == 0:
            continue

        else:
            fig.add_trace(go.Scatter(x=data[data['Labels'] == label][medida], mode="markers", text=data[data["Labels"] == label]["Ano"]))

    fig.show()

df = pd.read_csv("D:\\Esportes\\F1\\F1 db\\f1db-csv\\f1db-seasons-driver-standings.csv")
df0 = df[df["points"] != 0]
dfc = pd.read_csv("D:\\Esportes\\F1\\F1 db\\f1db-csv\\f1db-seasons-constructor-standings.csv")
dfc0 = dfc[dfc["points"] != 0]
dfc

for ano in df["year"].unique():
    temp = df[df["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        clusterer = KMeans(n_clusters = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = KMeans(n_clusters = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    cluster(temp)

1950
0.8313141495980811

1951
0.7487690497861516

1952
0.804082211580146

1953
0.7181759129434699

1954
0.7856025347535867

1955
0.801459428143493

1956
0.8832655406458562

1957
0.7632246125420362

1958
0.759830185378558

1959
0.688067126766979

for ano in df["year"].unique():
    temp = df[df["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        clusterer = GaussianMixture(n_components = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = GaussianMixture(n_components = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    cluster(temp)

1950
0.8313141495980811

1951
0.7487690497861516

1952
0.804082211580146

1953
0.7115215716881396

1954
0.7464253688834682

1955
0.801459428143493

1956
0.8832655406458562

1957
0.7632246125420362

1958
0.759830185378558

1959
0.6629953922261406

for ano in df["year"].unique():
    temp = df[df["year"] == ano]
    print(ano)
    clusterer = Birch()
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    cluster(temp)

1950
0.7623876636280248

1951
0.6722252829315847

1952
0.7644382101126403

1953
0.7051863591354304

1954
0.7245351963523796

1955
0.7005619432558534

1956
0.7759388976060635

1957
0.5713550405955361

1958
0.6021865246847107

1959
0.6348435278839822

for ano in df0["year"].unique():
    temp = df0[df0["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        clusterer = KMeans(n_clusters = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = KMeans(n_clusters = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    cluster(temp)

1950
0.8313141495980811

1951
0.7487690497861516

1952
0.804082211580146

1953
0.7115215716881396

1954
0.7933154786406731

1955
0.801459428143493

1956
0.8832655406458562

1957
0.7632246125420362

1958
0.8064024533798089

1959
0.688067126766979

for ano in df0["year"].unique():
    temp = df0[df0["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        clusterer = GaussianMixture(n_components = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = GaussianMixture(n_components = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    cluster(temp)

1950
0.8313141495980811

1951
0.7487690497861516

1952
0.804082211580146

1953
0.7115215716881396

1954
0.7464253688834682

1955
0.801459428143493

1956
0.8832655406458562

1957
0.7632246125420362

1958
0.759830185378558

1959
0.6629953922261406

for ano in df0["year"].unique():
    temp = df0[df0["year"] == ano]
    print(ano)
    clusterer = Birch()
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    cluster(temp)

1950
0.7623876636280248

1951
0.6722252829315847

1952
0.7644382101126403

1953
0.7051863591354304

1954
0.7245351963523796

1955
0.7005619432558534

1956
0.7759388976060635

1957
0.5713550405955361

1958
0.6021865246847107

1959
0.6348435278839822

for ano in dfc["year"].unique():
    temp = dfc[dfc["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        if n_clusters > len(temp["points"].values.tolist()) - 1:
            continue
        clusterer = KMeans(n_clusters = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = KMeans(n_clusters = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    clusterc(temp)

1958
0.6205134657386334

1959
0.5468693205607777

1960
0.7295528104884147

1961
0.5387666984934388

1962
0.6073329658615393

1963
0.7061953212267857

1964
0.7325163183785673

1965
0.7089715779370951

1966
0.739916865593341

1967
0.5816287781713932

for ano in dfc["year"].unique():
    temp = dfc[dfc["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        if n_clusters > len(temp["points"].values.tolist()) - 1:
            continue
        clusterer = GaussianMixture(n_components = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = GaussianMixture(n_components = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    clusterc(temp)

1958
0.6205134657386334

1959
0.2662337662337662

1960
0.7295528104884147

1961
0.5387666984934388

1962
0.6832273038566634

1963
0.7061953212267857

1964
0.7325163183785673

1965
0.7089715779370951

1966
0.65763668189505

1967
0.6752451230053201

for ano in dfc["year"].unique():
    temp = dfc[dfc["year"] == ano]
    print(ano)
    clusterer = Birch()
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    clusterc(temp)

1958
0.4797975711634977

1959
0.2662337662337662

1960
0.6440118599396103

1961
0.28277777777777774

1962
0.6073329658615393

1963
0.7061953212267857

1964
0.5744878280994015

1965
0.6806695622321944

1966
0.65763668189505

1967
0.6146557931180187

for ano in dfc0["year"].unique():
    temp = dfc0[dfc0["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        if n_clusters > len(temp["points"].values.tolist()) - 1:
            continue
        clusterer = KMeans(n_clusters = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = KMeans(n_clusters = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    clusterc(temp)

1958
0.6205134657386334

1959
0.5468693205607777

1960
0.7295528104884147

1961
0.5387666984934388

1962
0.6832273038566634

1963
0.7061953212267857

1964
0.7325163183785673

1965
0.7089715779370951

1966
0.739916865593341

1967
0.6752451230053201

for ano in dfc0["year"].unique():
    temp = dfc0[dfc0["year"] == ano]
    silhouette_avg = []
    print(ano)
    for n_clusters in range(2, 11):
        if n_clusters > len(temp["points"].values.tolist()) - 1:
            continue
        clusterer = GaussianMixture(n_components = n_clusters)
        cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
        silhouette_avg.append(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    
    melhor = silhouette_avg.index(max(silhouette_avg)) + 2
    clusterer = GaussianMixture(n_components = melhor)
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    clusterc(temp)

1958
0.6205134657386334

1959
0.5468693205607777

1960
0.7295528104884147

1961
0.5387666984934388

1962
0.6832273038566634

1963
0.7061953212267857

1964
0.7325163183785673

1965
0.7089715779370951

1966
0.6155334561954411

1967
0.6752451230053201

for ano in dfc0["year"].unique():
    temp = dfc0[dfc0["year"] == ano]
    print(ano)
    clusterer = Birch()
    cluster_labels = clusterer.fit_predict(temp["points"].values.reshape(-1, 1).tolist())
    temp["Labels"] = cluster_labels
    print(silhouette_score(temp["points"].values.reshape(-1, 1).tolist(), cluster_labels))
    clusterc(temp)

1958
0.4797975711634977

1959
0.2662337662337662

1960
0.6440118599396103

1961
0.28277777777777774

1962
0.6073329658615393

1963
0.7061953212267857

1964
0.5744878280994015

1965
0.6806695622321944

1966
0.65763668189505

1967
0.6146557931180187

dados = []

for ano in df["year"].unique():
    dados.append([ano, gini(df[df["year"] == ano]["points"].values.tolist())])

dados = pd.DataFrame(dados, columns=["Ano", "Gini"])
    
silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dados["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dados["Gini"].values.reshape(-1, 1).tolist())
dados["Labels"] = cluster_labels
print(silhouette_score(dados["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados, "Gini")  # mudar função

0.5560353956728895

dados0 = []

for ano in df["year"].unique():
    #print(df[df["year"] == ano]["points"].values.tolist())
    dados0.append([ano, gini(df0[df0["year"] == ano]["points"].values.tolist()), mld(df0[df0["year"] == ano]["points"].values.tolist()), theil(df0[df0["year"] == ano]["points"].values.tolist())])

dados0 = pd.DataFrame(dados0, columns=["Ano", "Gini", "MLD", "Theil"])
    
silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dados0["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dados0["Gini"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "Gini")  # mudar função

0.5387554076587593

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dados["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dados["Gini"].values.reshape(-1, 1).tolist())
dados["Labels"] = cluster_labels
print(silhouette_score(dados["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados, "Gini")  # mudar função

0.5474988113575222

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dados0["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dados0["Gini"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "Gini")  # mudar função

0.5851349158327567

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dados0["Gini"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "Gini")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[48], line 4
      2 cluster_labels = clusterer.fit_predict(dados0["Gini"].values.reshape(-1, 1).tolist())
      3 dados0["Labels"] = cluster_labels
----> 4 print(silhouette_score(dados0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
      5 clusterd(dados0, "Gini")

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:141, in silhouette_score(X, labels, metric, sample_size, random_state, **kwds)
    139     else:
    140         X, labels = X[indices], labels[indices]
--> 141 return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:186, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    184 global_skip_validation = get_config()["skip_parameter_validation"]
    185 if global_skip_validation:
--> 186     return func(*args, **kwargs)
    188 func_sig = signature(func)
    190 # Map *args/**kwargs to the function signature

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:299, in silhouette_samples(X, labels, metric, **kwds)
    297 n_samples = len(labels)
    298 label_freqs = np.bincount(labels)
--> 299 check_number_of_labels(len(le.classes_), n_samples)
    301 kwds["metric"] = metric
    302 reduce_func = functools.partial(
    303     _silhouette_reduce, labels=labels, label_freqs=label_freqs
    304 )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:38, in check_number_of_labels(n_labels, n_samples)
     27 """Check that number of labels are valid.
     28 
     29 Parameters
   (...)
     35     Number of samples.
     36 """
     37 if not 1 < n_labels < n_samples:
---> 38     raise ValueError(
     39         "Number of labels is %d. Valid values are 2 to n_samples - 1 (inclusive)"
     40         % n_labels
     41     )

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dados["Gini"].values.reshape(-1, 1).tolist())
dados["Labels"] = cluster_labels
print(silhouette_score(dados["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados, "Gini")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[50], line 4
      2 cluster_labels = clusterer.fit_predict(dados["Gini"].values.reshape(-1, 1).tolist())
      3 dados["Labels"] = cluster_labels
----> 4 print(silhouette_score(dados["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
      5 clusterd(dados, "Gini")

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:141, in silhouette_score(X, labels, metric, sample_size, random_state, **kwds)
    139     else:
    140         X, labels = X[indices], labels[indices]
--> 141 return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:186, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    184 global_skip_validation = get_config()["skip_parameter_validation"]
    185 if global_skip_validation:
--> 186     return func(*args, **kwargs)
    188 func_sig = signature(func)
    190 # Map *args/**kwargs to the function signature

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:299, in silhouette_samples(X, labels, metric, **kwds)
    297 n_samples = len(labels)
    298 label_freqs = np.bincount(labels)
--> 299 check_number_of_labels(len(le.classes_), n_samples)
    301 kwds["metric"] = metric
    302 reduce_func = functools.partial(
    303     _silhouette_reduce, labels=labels, label_freqs=label_freqs
    304 )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:38, in check_number_of_labels(n_labels, n_samples)
     27 """Check that number of labels are valid.
     28 
     29 Parameters
   (...)
     35     Number of samples.
     36 """
     37 if not 1 < n_labels < n_samples:
---> 38     raise ValueError(
     39         "Number of labels is %d. Valid values are 2 to n_samples - 1 (inclusive)"
     40         % n_labels
     41     )

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dados0["Theil"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dados0["Theil"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "Theil")  # mudar função

0.6062141561768923

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dados0["Theil"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dados0["Theil"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "Theil")  # mudar função

0.5484690385725198

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dados0["Theil"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "Theil")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[60], line 4
      2 cluster_labels = clusterer.fit_predict(dados0["Theil"].values.reshape(-1, 1).tolist())
      3 dados0["Labels"] = cluster_labels
----> 4 print(silhouette_score(dados0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
      5 clusterd(dados0, "Theil")

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:141, in silhouette_score(X, labels, metric, sample_size, random_state, **kwds)
    139     else:
    140         X, labels = X[indices], labels[indices]
--> 141 return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:186, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    184 global_skip_validation = get_config()["skip_parameter_validation"]
    185 if global_skip_validation:
--> 186     return func(*args, **kwargs)
    188 func_sig = signature(func)
    190 # Map *args/**kwargs to the function signature

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:299, in silhouette_samples(X, labels, metric, **kwds)
    297 n_samples = len(labels)
    298 label_freqs = np.bincount(labels)
--> 299 check_number_of_labels(len(le.classes_), n_samples)
    301 kwds["metric"] = metric
    302 reduce_func = functools.partial(
    303     _silhouette_reduce, labels=labels, label_freqs=label_freqs
    304 )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:38, in check_number_of_labels(n_labels, n_samples)
     27 """Check that number of labels are valid.
     28 
     29 Parameters
   (...)
     35     Number of samples.
     36 """
     37 if not 1 < n_labels < n_samples:
---> 38     raise ValueError(
     39         "Number of labels is %d. Valid values are 2 to n_samples - 1 (inclusive)"
     40         % n_labels
     41     )

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dados0["MLD"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dados0["MLD"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "MLD")  # mudar função

0.590304205410129

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dados0["MLD"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dados0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dados0["MLD"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "MLD")  # mudar função

0.5973601424126799

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dados0["MLD"].values.reshape(-1, 1).tolist())
dados0["Labels"] = cluster_labels
print(silhouette_score(dados0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dados0, "MLD")

0.5429833160513684

dadosc = []

for ano in dfc["year"].unique():
    dadosc.append([ano, gini(dfc[dfc["year"] == ano]["points"].values.tolist())])

dadosc = pd.DataFrame(dadosc, columns=["Ano", "Gini"])
    
silhouette_avg = []

for n_clusters in range(2, 11):
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dadosc["Gini"].values.reshape(-1, 1).tolist())
dadosc["Labels"] = cluster_labels
print(silhouette_score(dadosc["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc, "Gini")  # mudar função

0.6383484414359761

dadosc0 = []

for ano in dfc0["year"].unique():
    #print(df[df["year"] == ano]["points"].values.tolist())
    #print(gini(dfc0[dfc0["year"] == ano]["points"].values.tolist()))
    dadosc0.append([ano, gini(dfc0[dfc0["year"] == ano]["points"].values.tolist()), mld(dfc0[dfc0["year"] == ano]["points"].values.tolist()), theil(dfc0[dfc0["year"] == ano]["points"].values.tolist())])

dadosc0 = pd.DataFrame(dadosc0, columns=["Ano", "Gini", "MLD", "Theil"])
    
silhouette_avg = []

for n_clusters in range(2, 11):
    #print(dadosc0["Gini"])
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc0["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dadosc0["Gini"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "Gini")  # mudar função

0.6075436166495269

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dadosc["Gini"].values.reshape(-1, 1).tolist())
dadosc["Labels"] = cluster_labels
print(silhouette_score(dadosc["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc, "Gini")  # mudar função

0.6372312141241268

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc0["Gini"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dadosc0["Gini"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "Gini")  # mudar função

0.6051809852295769

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dadosc["Gini"].values.reshape(-1, 1).tolist())
dadosc["Labels"] = cluster_labels
print(silhouette_score(dadosc["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc, "Gini")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[141], line 4
      2 cluster_labels = clusterer.fit_predict(dadosc["Gini"].values.reshape(-1, 1).tolist())
      3 dadosc["Labels"] = cluster_labels
----> 4 print(silhouette_score(dadosc["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
      5 clusterd(dadosc, "Gini")

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:141, in silhouette_score(X, labels, metric, sample_size, random_state, **kwds)
    139     else:
    140         X, labels = X[indices], labels[indices]
--> 141 return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:186, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    184 global_skip_validation = get_config()["skip_parameter_validation"]
    185 if global_skip_validation:
--> 186     return func(*args, **kwargs)
    188 func_sig = signature(func)
    190 # Map *args/**kwargs to the function signature

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:299, in silhouette_samples(X, labels, metric, **kwds)
    297 n_samples = len(labels)
    298 label_freqs = np.bincount(labels)
--> 299 check_number_of_labels(len(le.classes_), n_samples)
    301 kwds["metric"] = metric
    302 reduce_func = functools.partial(
    303     _silhouette_reduce, labels=labels, label_freqs=label_freqs
    304 )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:38, in check_number_of_labels(n_labels, n_samples)
     27 """Check that number of labels are valid.
     28 
     29 Parameters
   (...)
     35     Number of samples.
     36 """
     37 if not 1 < n_labels < n_samples:
---> 38     raise ValueError(
     39         "Number of labels is %d. Valid values are 2 to n_samples - 1 (inclusive)"
     40         % n_labels
     41     )

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dadosc0["Gini"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "Gini")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[143], line 4
      2 cluster_labels = clusterer.fit_predict(dadosc0["Gini"].values.reshape(-1, 1).tolist())
      3 dadosc0["Labels"] = cluster_labels
----> 4 print(silhouette_score(dadosc0["Gini"].values.reshape(-1, 1).tolist(), cluster_labels))
      5 clusterd(dadosc0, "Gini")

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:141, in silhouette_score(X, labels, metric, sample_size, random_state, **kwds)
    139     else:
    140         X, labels = X[indices], labels[indices]
--> 141 return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:186, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    184 global_skip_validation = get_config()["skip_parameter_validation"]
    185 if global_skip_validation:
--> 186     return func(*args, **kwargs)
    188 func_sig = signature(func)
    190 # Map *args/**kwargs to the function signature

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:299, in silhouette_samples(X, labels, metric, **kwds)
    297 n_samples = len(labels)
    298 label_freqs = np.bincount(labels)
--> 299 check_number_of_labels(len(le.classes_), n_samples)
    301 kwds["metric"] = metric
    302 reduce_func = functools.partial(
    303     _silhouette_reduce, labels=labels, label_freqs=label_freqs
    304 )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:38, in check_number_of_labels(n_labels, n_samples)
     27 """Check that number of labels are valid.
     28 
     29 Parameters
   (...)
     35     Number of samples.
     36 """
     37 if not 1 < n_labels < n_samples:
---> 38     raise ValueError(
     39         "Number of labels is %d. Valid values are 2 to n_samples - 1 (inclusive)"
     40         % n_labels
     41     )

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc0["Theil"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dadosc0["Theil"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "Theil")  # mudar função

0.6048100180202782

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc0["Theil"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dadosc0["Theil"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "Theil")  # mudar função

0.586211982453521

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dadosc0["Theil"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "Theil")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[149], line 4
      2 cluster_labels = clusterer.fit_predict(dadosc0["Theil"].values.reshape(-1, 1).tolist())
      3 dadosc0["Labels"] = cluster_labels
----> 4 print(silhouette_score(dadosc0["Theil"].values.reshape(-1, 1).tolist(), cluster_labels))
      5 clusterd(dadosc0, "Theil")

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:213, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    207 try:
    208     with config_context(
    209         skip_parameter_validation=(
    210             prefer_skip_nested_validation or global_skip_validation
    211         )
    212     ):
--> 213         return func(*args, **kwargs)
    214 except InvalidParameterError as e:
    215     # When the function is just a wrapper around an estimator, we allow
    216     # the function to delegate validation to the estimator, but we replace
    217     # the name of the estimator by the name of the function in the error
    218     # message to avoid confusion.
    219     msg = re.sub(
    220         r"parameter of \w+ must be",
    221         f"parameter of {func.__qualname__} must be",
    222         str(e),
    223     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:141, in silhouette_score(X, labels, metric, sample_size, random_state, **kwds)
    139     else:
    140         X, labels = X[indices], labels[indices]
--> 141 return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:186, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    184 global_skip_validation = get_config()["skip_parameter_validation"]
    185 if global_skip_validation:
--> 186     return func(*args, **kwargs)
    188 func_sig = signature(func)
    190 # Map *args/**kwargs to the function signature

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:299, in silhouette_samples(X, labels, metric, **kwds)
    297 n_samples = len(labels)
    298 label_freqs = np.bincount(labels)
--> 299 check_number_of_labels(len(le.classes_), n_samples)
    301 kwds["metric"] = metric
    302 reduce_func = functools.partial(
    303     _silhouette_reduce, labels=labels, label_freqs=label_freqs
    304 )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\cluster\_unsupervised.py:38, in check_number_of_labels(n_labels, n_samples)
     27 """Check that number of labels are valid.
     28 
     29 Parameters
   (...)
     35     Number of samples.
     36 """
     37 if not 1 < n_labels < n_samples:
---> 38     raise ValueError(
     39         "Number of labels is %d. Valid values are 2 to n_samples - 1 (inclusive)"
     40         % n_labels
     41     )

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = KMeans(n_clusters = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc0["MLD"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = KMeans(n_clusters = melhor)
cluster_labels = clusterer.fit_predict(dadosc0["MLD"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "MLD")  # mudar função

0.6553926483957443

silhouette_avg = []

for n_clusters in range(2, 11):
    
    clusterer = GaussianMixture(n_components = n_clusters)
    cluster_labels = clusterer.fit_predict(dadosc0["MLD"].values.reshape(-1, 1).tolist())
    silhouette_avg.append(silhouette_score(dadosc0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
    
melhor = silhouette_avg.index(max(silhouette_avg)) + 2
clusterer = GaussianMixture(n_components = melhor)
cluster_labels = clusterer.fit_predict(dadosc0["MLD"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "MLD")  # mudar função

0.6514478127463159

clusterer = Birch()
cluster_labels = clusterer.fit_predict(dadosc0["MLD"].values.reshape(-1, 1).tolist())
dadosc0["Labels"] = cluster_labels
print(silhouette_score(dadosc0["MLD"].values.reshape(-1, 1).tolist(), cluster_labels))
clusterd(dadosc0, "MLD")

0.5539655299922572

	year	positionDisplayOrder	positionNumber	positionText	constructorId	engineManufacturerId	points
0	1958	1	1.0	1	vanwall	vanwall	48.0
1	1958	2	2.0	2	ferrari	ferrari	40.0
2	1958	3	3.0	3	cooper	climax	31.0
3	1958	4	4.0	4	brm	brm	18.0
4	1958	5	5.0	5	maserati	maserati	6.0
...	...	...	...	...	...	...	...
705	2025	6	6.0	6	aston-martin	mercedes	52.0
706	2025	7	7.0	7	kick-sauber	ferrari	51.0
707	2025	8	8.0	8	racing-bulls	honda-rbpt	45.0
708	2025	9	9.0	9	haas	ferrari	35.0
709	2025	10	10.0	10	alpine	renault	20.0

Bibliotecas¶

Funções¶

Pré-Processamento¶

Clusterização¶

Pilotos (incluindo 0)¶

Pilotos (excluindo 0)¶

Construtores (incluindo 0)¶

Construtores (excluindo 0)¶

Medidas de desigualdade¶

Campeonato de pilotos¶

Gini¶

K-Means¶

Mistura Gaussiana¶

Birch¶

Theil¶

K-Means¶

Mistura Gaussiana¶

Birch¶

MLD¶

K-Means¶

Mistura Gaussiana¶

Birch¶

Campeonato de construtores¶

Gini¶

K-Means¶

Mistura Gaussiana¶

Birch¶

Theil¶

K-Means¶

Mistura Gaussiana¶

Birch¶

MLD¶

K-Means¶

Mistura Gaussiana¶

Birch¶