init

2024-07-09 13:41:08 +02:00 · 2024-07-09 13:41:08 +02:00 · bc626125ab
commit bc626125ab
5 changed files with 266 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,119 @@
 # Prédiction de Matchs de Tennis avec FastAPI
 Ce projet utilise FastAPI pour fournir des prédictions de matchs de tennis basées sur un modèle de machine learning pré-entraîné. L'API permet de faire des prédictions en lot ou pour un seul match, en utilisant diverses caractéristiques des joueurs et des matchs.
 ## Prérequis
 Assurez-vous d'avoir Docker installé sur votre machine.
 ## Installation
 1. Clonez ce dépôt sur votre machine locale :
    ```sh
    git clone https://votre-repo-git.git
    cd votre-repo-git
    ```
 2. Placez votre modèle `pipeline_xgb.pkl` dans le répertoire du projet.
 3. Assurez-vous que votre fichier `requirements.txt` contient les dépendances nécessaires, y compris `fastapi`, `uvicorn`, `gunicorn`, `numpy`, `pydantic`, etc.
 ## Construction et Exécution du Conteneur Docker
 1. Construisez l'image Docker :
    ```sh
    docker build -t tennis-prediction-app .
    ```
 2. Exécutez le conteneur :
    ```sh
    docker run -p 8000:8000 tennis-prediction-app
    ```
 ## Utilisation de l'API
 ### Endpoint de Prédiction en Lot
 - **URL** : `/predict_batch`
 - **Méthode** : `POST`
 - **Corps de la Requête** :
    ```json
    {
        "matches": [
            {
                "match_id": 1,
                "player_id1": 101,
                "player_id2": 102,
                "p1_niveau_rank": 5.0,
                "p1_niveau_win_total": 10.0,
                "p1_niveau_win_surface_total": 3.0,
                ...
                "idc_p2_historique_estimated": 0.5,
                "tournament_round": 2,
                "tournament_type": 1,
                "tournament_surface": 0
            },
            {
                "match_id": 2,
                "player_id1": 103,
                "player_id2": 104,
                ...
            }
        ]
    }
    ```
 - **Réponse** :
    ```json
    {
        "predictions": [
            {
                "match_id": 1,
                "win_probability_j1": 0.65,
                "win_probability_j2": 0.35
            },
            {
                "match_id": 2,
                ...
            }
        ]
    }
    ```
 ### Endpoint de Prédiction pour un Seul Match
 - **URL** : `/predict`
 - **Méthode** : `POST`
 - **Corps de la Requête** :
    ```json
    {
        "match_id": 1,
        "player_id1": 101,
        "player_id2": 102,
        "p1_niveau_rank": 5.0,
        "p1_niveau_win_total": 10.0,
        "p1_niveau_win_surface_total": 3.0,
        ...
        "idc_p2_historique_estimated": 0.5,
        "tournament_round": 2,
        "tournament_type": 1,
        "tournament_surface": 0
    }
    ```
 - **Réponse** :
    ```json
    {
        "match_id": 1,
        "win_probability_j1": 0.65,
        "win_probability_j2": 0.35
    }
    ```
 ## Structure du Projet
 - `main.py` : Contient le code principal de l'application FastAPI.
 - `pipeline_xgb.pkl` : Fichier du modèle pré-entraîné.
 - `requirements.txt` : Liste des dépendances Python.
 - `Dockerfile` : Fichier de configuration pour Docker.
 - `start.sh` : Script pour démarrer l'application avec Gunicorn.
--- a/app.py
+++ b/app.py
@ -0,0 +1,133 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
 import asyncio
 import pickle
 from typing import List, Dict
 import numpy as np
 app = FastAPI()
 # Charger le modèle
 with open('pipeline_xgb.pkl', 'rb') as file:
    model = pickle.load(file)
 # Définir les modèles de requête et de réponse
 class TennisMatch(BaseModel):
    match_id: int
    player_id1: int
    player_id2: int
    p1_niveau_rank: float
    p1_niveau_win_total: float
    p1_niveau_win_surface_total: float
    p1_h2h_win_total: float
    p1_h2h_win_1an: float
    p1_h2h_win_surface_total: float
    p1_h2h_win_sets: float
    p1_progression_win_1an: float
    p1_progression_win_surface_1an: float
    p1_progression_win_last_50matchs: float
    p1_progression_win_last_10matchs: float
    p1_forme_matchs_30j: float
    p1_forme_matchs_length_tournament: float
    p1_tendance_loose_favori_30j: float
    p1_tendance_win_outsider_30j: float
    p1_confiance_first_service_last_match: float
    p1_confiance_points_first_service_last_match: float
    p1_confiance_break_saved_last_match: float
    p2_niveau_rank: float
    p2_niveau_win_total: float
    p2_niveau_win_surface_total: float
    p2_h2h_win_total: float
    p2_h2h_win_1an: float
    p2_h2h_win_surface_total: float
    p2_h2h_win_sets: float
    p2_progression_win_1an: float
    p2_progression_win_surface_1an: float
    p2_progression_win_last_50matchs: float
    p2_progression_win_last_10matchs: float
    p2_forme_matchs_30j: float
    p2_forme_matchs_length_tournament: float
    p2_tendance_loose_favori_30j: float
    p2_tendance_win_outsider_30j: float
    p2_confiance_first_service_last_match: float
    p2_confiance_points_first_service_last_match: float
    p2_confiance_break_saved_last_match: float
    idc_p1_motivation_enjeu: float
    idc_p1_motivation_revanche: float
    idc_p1_motivation_surface: float
    idc_p1_tactique_profil: float
    idc_p1_tactique_service1: float
    idc_p1_tactique_service2: float
    idc_p1_tactique_retourservice1: float
    idc_p1_tactique_retourservice2: float
    idc_p1_tactique_breaksauve: float
    idc_p1_tactique_breakgagne: float
    idc_p1_tactique_tiebreak: float
    idc_p1_historique_match: float
    idc_p1_historique_book: float
    idc_p1_historique_estimated: float
    idc_p2_motivation_enjeu: float
    idc_p2_motivation_revanche: float
    idc_p2_motivation_surface: float
    idc_p2_tactique_profil: float
    idc_p2_tactique_service1: float
    idc_p2_tactique_service2: float
    idc_p2_tactique_retourservice1: float
    idc_p2_tactique_retourservice2: float
    idc_p2_tactique_breaksauve: float
    idc_p2_tactique_breakgagne: float
    idc_p2_tactique_tiebreak: float
    idc_p2_historique_match: float
    idc_p2_historique_book: float
    idc_p2_historique_estimated: float
    tournament_round: float
    tournament_type: float
    tournament_surface: float
 class MatchPrediction(BaseModel):
    match_id: int
    win_probability_j1: float
    win_probability_j2: float
 class BatchPredictionRequest(BaseModel):
    matches: List[TennisMatch]
 class BatchPredictionResponse(BaseModel):
    predictions: List[MatchPrediction]
 # Définir la fonction de prédiction
 async def predict_match(features: Dict[str, float]):
    match_id = int(features.pop('match_id'))  # Extraire l'identifiant du match
    prediction = await asyncio.to_thread(model.predict_proba, [features])
    return MatchPrediction(
        match_id=match_id,
        win_probability_j1=prediction[0][0],
        win_probability_j2=prediction[0][1]
    )
 # Définir le point de terminaison pour la prédiction en lot
@app.post("/predict_batch", response_model=BatchPredictionResponse)
 async def predict_batch(request: BatchPredictionRequest):
    prediction_tasks = [
        predict_match(match.dict())
        for match in request.matches
    ]
    predictions = await asyncio.gather(*prediction_tasks)
    return BatchPredictionResponse(predictions=predictions)
 # Définir le point de terminaison pour une seule prédiction
@app.post("/predict", response_model=MatchPrediction)
 async def predict_match(features: Dict[str, float]):
    match_id = int(features['match_id'])  # Extraire l'identifiant du match
    # Assurez-vous que les caractéristiques sont dans le bon format pour le modèle
    feature_list = [features[key] for key in sorted(features.keys())]
    feature_array = np.array(feature_list).reshape(1, -1)  # Transformer en tableau 2D avec une seule ligne
    prediction = await asyncio.to_thread(model.predict_proba, feature_array)
    return MatchPrediction(
        match_id=match_id,
        win_probability_j1=prediction[0][0],
        win_probability_j2=prediction[0][1]
    )
--- a/pipeline_xgb.pkl
+++ b/pipeline_xgb.pkl
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,7 @@
 fastapi
 pydantic
 uvicorn
 gunicorn
 numpy
 xgboost
 scikit-learn
--- a/start.sh
+++ b/start.sh
@ -0,0 +1,7 @@
 #!/bin/bash
 cores=$(nproc)
 workers=$((cores * WORKERS_PER_CORE))
 if [ $workers -gt $MAX_WORKERS ]; then
    workers=$MAX_WORKERS
 fi
 exec gunicorn app:app -k uvicorn.workers.UvicornWorker -w $workers --bind 0.0.0.0:8000