147 lines
4.5 KiB
Python
147 lines
4.5 KiB
Python
from flask import Flask, render_template, request, jsonify
|
|
import requests
|
|
import json
|
|
import os
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Configurazione Ollama
|
|
OLLAMA_BASE_URL = "http://192.168.0.170:11434"
|
|
OLLAMA_MODEL = "xieweicong95/HY-MT1.5-1.8B"
|
|
OLLAMA_PARAMS = {
|
|
"top_p": 0.6,
|
|
"repeat_penalty": 1.1
|
|
}
|
|
|
|
@app.route('/')
|
|
def index():
|
|
return render_template('index.html')
|
|
|
|
@app.route('/api/translate', methods=['POST'])
|
|
def translate():
|
|
"""
|
|
Endpoint per tradurre testo usando Ollama.
|
|
Accetta JSON con:
|
|
- prompt: il prompt da inviare a Ollama
|
|
- model: il modello da usare
|
|
- stream: boolean per streaming (default: False)
|
|
"""
|
|
try:
|
|
data = request.get_json()
|
|
|
|
if not data or 'prompt' not in data:
|
|
return jsonify({'error': 'Prompt non fornito'}), 400
|
|
|
|
prompt = data.get('prompt')
|
|
model = data.get('model', OLLAMA_MODEL)
|
|
stream = data.get('stream', False)
|
|
|
|
# Costruisci il payload per Ollama
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": stream,
|
|
**OLLAMA_PARAMS # Aggiungi i parametri top_p e repeat_penalty
|
|
}
|
|
|
|
# Invia la richiesta a Ollama
|
|
response = requests.post(
|
|
f"{OLLAMA_BASE_URL}/api/generate",
|
|
json=payload,
|
|
timeout=300 # Timeout di 5 minuti per testi lunghi
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
return jsonify({
|
|
'error': f'Errore Ollama: {response.status_code}',
|
|
'details': response.text
|
|
}), response.status_code
|
|
|
|
# Se non è streaming, la risposta è JSON diretto
|
|
if not stream:
|
|
result = response.json()
|
|
translation = result.get('response', '').strip()
|
|
|
|
# Pulisci la traduzione da eventuali prefissi
|
|
if translation.startswith('Translation:'):
|
|
translation = translation.replace('Translation:', '', 1).strip()
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'translation': translation,
|
|
'response': translation,
|
|
'model': model
|
|
})
|
|
else:
|
|
# Se è streaming, concatena le risposte
|
|
full_response = ""
|
|
for line in response.iter_lines():
|
|
if line:
|
|
chunk = json.loads(line)
|
|
full_response += chunk.get('response', '')
|
|
|
|
translation = full_response.strip()
|
|
if translation.startswith('Translation:'):
|
|
translation = translation.replace('Translation:', '', 1).strip()
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'translation': translation,
|
|
'response': translation,
|
|
'model': model
|
|
})
|
|
|
|
except requests.exceptions.Timeout:
|
|
return jsonify({
|
|
'error': 'Timeout della richiesta',
|
|
'details': 'La richiesta a Ollama ha impiegato troppo tempo'
|
|
}), 408
|
|
|
|
except requests.exceptions.ConnectionError:
|
|
return jsonify({
|
|
'error': 'Errore di connessione',
|
|
'details': f'Impossibile connettersi a Ollama su {OLLAMA_BASE_URL}'
|
|
}), 503
|
|
|
|
except Exception as e:
|
|
return jsonify({
|
|
'error': 'Errore interno del server',
|
|
'details': str(e)
|
|
}), 500
|
|
|
|
@app.route('/api/health', methods=['GET'])
|
|
def health():
|
|
"""Controlla lo stato di Ollama"""
|
|
try:
|
|
response = requests.get(
|
|
f"{OLLAMA_BASE_URL}/api/tags",
|
|
timeout=5
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
models = response.json().get('models', [])
|
|
model_names = [m.get('name') for m in models]
|
|
|
|
return jsonify({
|
|
'status': 'healthy',
|
|
'ollama_url': OLLAMA_BASE_URL,
|
|
'available_models': model_names,
|
|
'target_model': OLLAMA_MODEL,
|
|
'model_available': OLLAMA_MODEL in model_names
|
|
})
|
|
else:
|
|
return jsonify({
|
|
'status': 'unhealthy',
|
|
'error': f'Ollama ha risposto con status {response.status_code}'
|
|
}), 503
|
|
|
|
except Exception as e:
|
|
return jsonify({
|
|
'status': 'error',
|
|
'error': str(e),
|
|
'ollama_url': OLLAMA_BASE_URL
|
|
}), 503
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True, host='0.0.0.0', port=5000)
|