commit bc626125ab84d594a076a620f9f3201a79ff95bb
Author: azertop <aslaneme@gmail.com>
Date:   Tue Jul 9 13:41:08 2024 +0200

    init

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..aa138fa
--- /dev/null
+++ b/README.md
@@ -0,0 +1,119 @@
+# Prédiction de Matchs de Tennis avec FastAPI
+
+Ce projet utilise FastAPI pour fournir des prédictions de matchs de tennis basées sur un modèle de machine learning pré-entraîné. L'API permet de faire des prédictions en lot ou pour un seul match, en utilisant diverses caractéristiques des joueurs et des matchs.
+
+## Prérequis
+
+Assurez-vous d'avoir Docker installé sur votre machine.
+
+## Installation
+
+1. Clonez ce dépôt sur votre machine locale :
+    ```sh
+    git clone https://votre-repo-git.git
+    cd votre-repo-git
+    ```
+
+2. Placez votre modèle `pipeline_xgb.pkl` dans le répertoire du projet.
+
+3. Assurez-vous que votre fichier `requirements.txt` contient les dépendances nécessaires, y compris `fastapi`, `uvicorn`, `gunicorn`, `numpy`, `pydantic`, etc.
+
+## Construction et Exécution du Conteneur Docker
+
+1. Construisez l'image Docker :
+    ```sh
+    docker build -t tennis-prediction-app .
+    ```
+
+2. Exécutez le conteneur :
+    ```sh
+    docker run -p 8000:8000 tennis-prediction-app
+    ```
+
+## Utilisation de l'API
+
+### Endpoint de Prédiction en Lot
+
+- **URL** : `/predict_batch`
+- **Méthode** : `POST`
+- **Corps de la Requête** :
+    ```json
+    {
+        "matches": [
+            {
+                "match_id": 1,
+                "player_id1": 101,
+                "player_id2": 102,
+                "p1_niveau_rank": 5.0,
+                "p1_niveau_win_total": 10.0,
+                "p1_niveau_win_surface_total": 3.0,
+                ...
+                "idc_p2_historique_estimated": 0.5,
+                "tournament_round": 2,
+                "tournament_type": 1,
+                "tournament_surface": 0
+            },
+            {
+                "match_id": 2,
+                "player_id1": 103,
+                "player_id2": 104,
+                ...
+            }
+        ]
+    }
+    ```
+
+- **Réponse** :
+    ```json
+    {
+        "predictions": [
+            {
+                "match_id": 1,
+                "win_probability_j1": 0.65,
+                "win_probability_j2": 0.35
+            },
+            {
+                "match_id": 2,
+                ...
+            }
+        ]
+    }
+    ```
+
+### Endpoint de Prédiction pour un Seul Match
+
+- **URL** : `/predict`
+- **Méthode** : `POST`
+- **Corps de la Requête** :
+    ```json
+    {
+        "match_id": 1,
+        "player_id1": 101,
+        "player_id2": 102,
+        "p1_niveau_rank": 5.0,
+        "p1_niveau_win_total": 10.0,
+        "p1_niveau_win_surface_total": 3.0,
+        ...
+        "idc_p2_historique_estimated": 0.5,
+        "tournament_round": 2,
+        "tournament_type": 1,
+        "tournament_surface": 0
+    }
+    ```
+
+- **Réponse** :
+    ```json
+    {
+        "match_id": 1,
+        "win_probability_j1": 0.65,
+        "win_probability_j2": 0.35
+    }
+    ```
+
+## Structure du Projet
+
+- `main.py` : Contient le code principal de l'application FastAPI.
+- `pipeline_xgb.pkl` : Fichier du modèle pré-entraîné.
+- `requirements.txt` : Liste des dépendances Python.
+- `Dockerfile` : Fichier de configuration pour Docker.
+- `start.sh` : Script pour démarrer l'application avec Gunicorn.
\ No newline at end of file
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..6d68afb
--- /dev/null
+++ b/app.py
@@ -0,0 +1,133 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+import asyncio
+import pickle
+from typing import List, Dict
+import numpy as np
+
+app = FastAPI()
+
+# Charger le modèle
+with open('pipeline_xgb.pkl', 'rb') as file:
+    model = pickle.load(file)
+
+# Définir les modèles de requête et de réponse
+class TennisMatch(BaseModel):
+    match_id: int
+    player_id1: int
+    player_id2: int
+    p1_niveau_rank: float
+    p1_niveau_win_total: float
+    p1_niveau_win_surface_total: float
+    p1_h2h_win_total: float
+    p1_h2h_win_1an: float
+    p1_h2h_win_surface_total: float
+    p1_h2h_win_sets: float
+    p1_progression_win_1an: float
+    p1_progression_win_surface_1an: float
+    p1_progression_win_last_50matchs: float
+    p1_progression_win_last_10matchs: float
+    p1_forme_matchs_30j: float
+    p1_forme_matchs_length_tournament: float
+    p1_tendance_loose_favori_30j: float
+    p1_tendance_win_outsider_30j: float
+    p1_confiance_first_service_last_match: float
+    p1_confiance_points_first_service_last_match: float
+    p1_confiance_break_saved_last_match: float
+    p2_niveau_rank: float
+    p2_niveau_win_total: float
+    p2_niveau_win_surface_total: float
+    p2_h2h_win_total: float
+    p2_h2h_win_1an: float
+    p2_h2h_win_surface_total: float
+    p2_h2h_win_sets: float
+    p2_progression_win_1an: float
+    p2_progression_win_surface_1an: float
+    p2_progression_win_last_50matchs: float
+    p2_progression_win_last_10matchs: float
+    p2_forme_matchs_30j: float
+    p2_forme_matchs_length_tournament: float
+    p2_tendance_loose_favori_30j: float
+    p2_tendance_win_outsider_30j: float
+    p2_confiance_first_service_last_match: float
+    p2_confiance_points_first_service_last_match: float
+    p2_confiance_break_saved_last_match: float
+    idc_p1_motivation_enjeu: float
+    idc_p1_motivation_revanche: float
+    idc_p1_motivation_surface: float
+    idc_p1_tactique_profil: float
+    idc_p1_tactique_service1: float
+    idc_p1_tactique_service2: float
+    idc_p1_tactique_retourservice1: float
+    idc_p1_tactique_retourservice2: float
+    idc_p1_tactique_breaksauve: float
+    idc_p1_tactique_breakgagne: float
+    idc_p1_tactique_tiebreak: float
+    idc_p1_historique_match: float
+    idc_p1_historique_book: float
+    idc_p1_historique_estimated: float
+    idc_p2_motivation_enjeu: float
+    idc_p2_motivation_revanche: float
+    idc_p2_motivation_surface: float
+    idc_p2_tactique_profil: float
+    idc_p2_tactique_service1: float
+    idc_p2_tactique_service2: float
+    idc_p2_tactique_retourservice1: float
+    idc_p2_tactique_retourservice2: float
+    idc_p2_tactique_breaksauve: float
+    idc_p2_tactique_breakgagne: float
+    idc_p2_tactique_tiebreak: float
+    idc_p2_historique_match: float
+    idc_p2_historique_book: float
+    idc_p2_historique_estimated: float
+    tournament_round: float
+    tournament_type: float
+    tournament_surface: float
+
+class MatchPrediction(BaseModel):
+    match_id: int
+    win_probability_j1: float
+    win_probability_j2: float
+
+class BatchPredictionRequest(BaseModel):
+    matches: List[TennisMatch]
+
+class BatchPredictionResponse(BaseModel):
+    predictions: List[MatchPrediction]
+
+# Définir la fonction de prédiction
+async def predict_match(features: Dict[str, float]):
+    match_id = int(features.pop('match_id'))  # Extraire l'identifiant du match
+    prediction = await asyncio.to_thread(model.predict_proba, [features])
+    return MatchPrediction(
+        match_id=match_id,
+        win_probability_j1=prediction[0][0],
+        win_probability_j2=prediction[0][1]
+    )
+
+# Définir le point de terminaison pour la prédiction en lot
+@app.post("/predict_batch", response_model=BatchPredictionResponse)
+async def predict_batch(request: BatchPredictionRequest):
+
+    prediction_tasks = [
+        predict_match(match.dict())
+        for match in request.matches
+    ]
+    
+    predictions = await asyncio.gather(*prediction_tasks)
+
+    return BatchPredictionResponse(predictions=predictions)
+
+# Définir le point de terminaison pour une seule prédiction
+@app.post("/predict", response_model=MatchPrediction)
+async def predict_match(features: Dict[str, float]):
+    match_id = int(features['match_id'])  # Extraire l'identifiant du match
+    # Assurez-vous que les caractéristiques sont dans le bon format pour le modèle
+    feature_list = [features[key] for key in sorted(features.keys())]
+    feature_array = np.array(feature_list).reshape(1, -1)  # Transformer en tableau 2D avec une seule ligne
+    prediction = await asyncio.to_thread(model.predict_proba, feature_array)
+    return MatchPrediction(
+        match_id=match_id,
+        win_probability_j1=prediction[0][0],
+        win_probability_j2=prediction[0][1]
+    )
diff --git a/pipeline_xgb.pkl b/pipeline_xgb.pkl
new file mode 100644
index 0000000..b85580c
Binary files /dev/null and b/pipeline_xgb.pkl differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cfeed35
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+fastapi
+pydantic
+uvicorn
+gunicorn
+numpy
+xgboost
+scikit-learn
\ No newline at end of file
diff --git a/start.sh b/start.sh
new file mode 100644
index 0000000..6e07cbf
--- /dev/null
+++ b/start.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+cores=$(nproc)
+workers=$((cores * WORKERS_PER_CORE))
+if [ $workers -gt $MAX_WORKERS ]; then
+    workers=$MAX_WORKERS
+fi
+exec gunicorn app:app -k uvicorn.workers.UvicornWorker -w $workers --bind 0.0.0.0:8000