I'm encountering an issue with synchronizing data between my Python application and a MongoDB Atlas cluster. Here are the details:
I have a Python script that synchronizes data between a CSV file and a MongoDB database.When I run the script with a localhost MongoDB connection string, it works perfectly fine. However, when I switch to using a MongoDB Atlas cluster connection string, the synchronization process fails.
The MongoDB Atlas cluster is hosted on AWS (the free tier) and I'm using the following connection string: mongodb+srv://NewUser:**********@cluster0.nnyqklj.mongodb.net/ExcelDB.The MongoDB user "NewUser" has been granted the necessary permissions to access the "ExcelDB" database in the Atlas cluster.
I have tried manually changing documents in my Compass, it is updated in my Atlas with no issues, but when i run the code, it throws an error.
The thing is the code works perfectly fine when change the connection string to 'mongodb://localhost:27017/'(I have the same DB here).
`
import pandas as pdimport pymongoimport loggingdef synchronize_data(csv_file, mongo_uri, database_name, collection_name, primary_key): # Setup logging logging.basicConfig(filename='sync_log.log', level=logging.INFO) try: # Connect to MongoDB client = pymongo.MongoClient(mongo_uri) db = client[database_name] collection = db[collection_name] # Read CSV file Exdf = pd.read_csv(csv_file) Exdf_cleaned = Exdf.dropna(how='all').dropna(axis=1, how='all') # Convert DataFrame to list of dictionaries new_records = Exdf_cleaned.where(pd.notnull(Exdf_cleaned), "N/A").to_dict(orient='records') existing_records = list(collection.find({}, {"_id": 0})) # Extract column names from the DataFrame csv_columns = set(Exdf_cleaned.columns) # Extract column names from existing MongoDB documents mongo_columns = set(existing_records[0].keys()) if existing_records else set() # Determine columns to add and remove columns_to_add = csv_columns - mongo_columns columns_to_remove = mongo_columns - csv_columns # Initialize counters num_updated = 0 num_deleted = 0 num_inserted = 0 if len(existing_records) == 0: num_inserted = len(new_records) else: # Update or delete existing records for existing_record in existing_records: found = False for new_record in new_records: if existing_record[primary_key] == new_record[primary_key]: found = True # Check for field-level changes if existing_record != new_record: # Create update operation by excluding fields deleted from CSV update_operation = {"$set": {key: new_record[key] for key in new_record if key in existing_record}} # Include $unset for fields removed from CSV for key in existing_record: if key not in new_record: update_operation["$unset"] = {key: ""} collection.update_one({primary_key: new_record[primary_key]}, update_operation) num_updated += 1 break if not found: # Delete the record if not found in new data collection.delete_one({primary_key: existing_record[primary_key]}) num_deleted += 1 # Insert new records for new_record in new_records: if not any(existing_record[primary_key] == new_record[primary_key] for existing_record in existing_records): collection.insert_one(new_record) num_inserted += 1 # Remove columns from MongoDB documents for column in columns_to_remove: collection.update_many({}, {"$unset": {column: ""}}) num_deleted += 1 # Add new columns to MongoDB documents for column in columns_to_add: for index, row in Exdf_cleaned.iterrows(): collection.update_one({primary_key: row[primary_key]}, {"$set": {column: row[column]}}) num_updated += 1 # Log synchronization results logging.info(f"Number of documents updated: {num_updated}") logging.info(f"Number of documents deleted: {num_deleted}") logging.info(f"Number of documents inserted: {num_inserted}") # Print synchronization results print(f"Number of documents updated: {num_updated}") print(f"Number of documents deleted: {num_deleted}") print(f"Number of documents inserted: {num_inserted}") except FileNotFoundError: logging.exception(f"CSV file {csv_file} not found.") print(f"CSV file {csv_file} not found.") except Exception as e: # Log any exceptions logging.exception("An error occurred during synchronization:") print("An error occurred during synchronization:", e)synchronize_data("C:/Users/aksha/Desktop/mongoexceltest.csv", "mongodb+srv://NewUser:mongo123456@cluster0.nnyqklj.mongodb.net/ExcelDB", "ExcelDB", "Excel", "EEID")
Im shown this errorAn error occurred during synchronization: 'EEID'
note that this works perfectly in my local host and the column exists in both the collection and excel