You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
4.7 KiB
125 lines
4.7 KiB
from flask import Flask, request, send_from_directory, jsonify, abort
|
|
import os
|
|
import logging
|
|
import requests
|
|
from pdf_to_word import pdf_to_word_txt, pdf_to_word_docx
|
|
from datetime import datetime
|
|
from threading import Thread
|
|
|
|
# Set up logging
|
|
logging.basicConfig(level=logging.INFO, filename='/app/logs/app.log', filemode='w',
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
# Configure specific logger for potentially noisy libraries or modules
|
|
watchdog_logger = logging.getLogger('watchdog')
|
|
watchdog_logger.setLevel(logging.ERROR) # Show only errors from watchdog
|
|
|
|
app = Flask(__name__)
|
|
UPLOAD_FOLDER = '/app/uploads'
|
|
OUTPUT_FOLDER = '/app/outputs'
|
|
CALLBACK_FOLDER = '/app/callbacks'
|
|
|
|
if not os.path.exists(UPLOAD_FOLDER):
|
|
os.makedirs(UPLOAD_FOLDER)
|
|
if not os.path.exists(OUTPUT_FOLDER):
|
|
os.makedirs(OUTPUT_FOLDER)
|
|
if not os.path.exists(CALLBACK_FOLDER):
|
|
os.makedirs(CALLBACK_FOLDER)
|
|
|
|
|
|
|
|
|
|
|
|
def process_pdf_in_background(pdf_path, output_folder, callback_url=None):
|
|
try:
|
|
# Convert PDF to text (assuming you have a function that does this)
|
|
output_path = pdf_to_word_docx(pdf_path, output_folder)
|
|
|
|
if output_path and callback_url:
|
|
with open(output_path, 'rb') as f:
|
|
files = {'file': (os.path.basename(output_path), f)}
|
|
response = requests.post(callback_url, files=files)
|
|
response.raise_for_status()
|
|
|
|
except Exception as e:
|
|
logging.error(f'Background processing error: {e}', exc_info=True)
|
|
|
|
def start_background_task(target, *args):
|
|
thread = Thread(target=target, args=args)
|
|
thread.start()
|
|
|
|
@app.route('/upload-pdf', methods=['POST'])
|
|
def upload_pdf():
|
|
file = request.files['file']
|
|
callback_url = request.form.get('callback_url')
|
|
if file and file.filename.endswith('.pdf'):
|
|
|
|
pdf_filename = os.path.splitext(file.filename)[0] + '_' + datetime.now().strftime("%Y%m%d_%H%M%S") + '.pdf'
|
|
pdf_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
|
|
file.save(pdf_path)
|
|
logging.info(f'PDF uploaded and saved to {pdf_path}')
|
|
|
|
# Start the PDF processing in a background thread
|
|
start_background_task(process_pdf_in_background, pdf_path, OUTPUT_FOLDER, callback_url)
|
|
|
|
# Respond immediately
|
|
return jsonify({'message': 'File upload accepted, processing started'}), 202
|
|
|
|
else:
|
|
logging.warning('Invalid file upload attempt')
|
|
abort(400, 'Invalid file type or no file uploaded')
|
|
|
|
|
|
|
|
|
|
|
|
# @app.route('/upload-pdf', methods=['POST'])
|
|
# def upload_pdf():
|
|
# file = request.files['file']
|
|
# callback_url = request.form.get('callback_url')
|
|
# if file and file.filename.endswith('.pdf'):
|
|
|
|
# pdf_filename = os.path.splitext(file.filename)[0] + '_' + datetime.now().strftime("%Y%m%d_%H%M%S") + '.pdf'
|
|
# pdf_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
|
|
# file.save(pdf_path)
|
|
|
|
# logging.info(f'PDF uploaded and saved to {pdf_path}')
|
|
|
|
# # Convert PDF to text
|
|
# #output_path = pdf_to_word_txt(pdf_path, OUTPUT_FOLDER)
|
|
# output_path = pdf_to_word_docx(pdf_path, OUTPUT_FOLDER)
|
|
# if output_path:
|
|
# logging.info(f'Text file created at {output_path}')
|
|
# if callback_url:
|
|
# try:
|
|
# with open(output_path, 'rb') as f:
|
|
# files = {'file': (os.path.basename(output_path), f)}
|
|
# response = requests.post(callback_url, files=files)
|
|
# response.raise_for_status() # Will raise an HTTPError for bad requests
|
|
# return jsonify({'message': 'File processed and sent successfully'}), 200
|
|
# except requests.exceptions.RequestException as e:
|
|
# logging.error(f'Failed to send file to callback URL: {e}')
|
|
# abort(500, 'Failed to send file to callback URL')
|
|
# else:
|
|
# return send_from_directory(OUTPUT_FOLDER, os.path.basename(output_path), as_attachment=True)
|
|
# else:
|
|
# logging.error('Failed to convert PDF')
|
|
# abort(500, 'Conversion failed')
|
|
# else:
|
|
# logging.warning('Invalid file upload attempt')
|
|
# abort(400, 'Invalid file type or no file uploaded')
|
|
|
|
@app.route('/callback', methods=['POST'])
|
|
def callback():
|
|
file = request.files['file']
|
|
if file:
|
|
filepath = os.path.join(CALLBACK_FOLDER, file.filename)
|
|
file.save(filepath)
|
|
logging.info(f'File received and saved at {filepath}')
|
|
return jsonify({'message': 'File received and saved successfully'}), 200
|
|
else:
|
|
logging.error('No file received at callback')
|
|
abort(400, 'No file received')
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True, host='0.0.0.0', port=5000)
|