## CSC 369  Winter 2019
## Alex Dekhtyar (dekhtyar@calpoly.edu)
##
## MongoDB Python API example
##
## Note: I did not include remove({}) into this program
##       If you run it multiple times, you will have multiple copies of the data
##       in the same collection

## Run this with python3.6 command - pymongo is installed only for python 3.6  
## on ambari-head server
##
##  $python3.6 example.py


import  pymongo as pm
from pymongo import MongoClient
import json


# put your login credentials
client = MongoClient('localhost',27017,
                      username='',        # put username
                      password='',        # put password
                      authSource='admin',
                      authMechanism='SCRAM-SHA-1')


# analog of "use example" command
db = client.example      # you can use your own database

# setting up the collection
collection = db.bands    # choose whatever collection name you want


## prepare data

# this is a list of objects for insert_many()
data = [{"name":"The Beatles",
         "years": [1962, 1970]},
        {"name": "Pink Floyd",
         "years": [1966, 2015]},
        {"name": "The Clash",
         "years": [1977, 1985]},
        {"name": "Dire Straits",
         "years": [1977, 1993]}
        ]

# additional dictionary (JSON object) for insert_one()

oneMore = {"name": "King Crimson",
           "years": [1969,2018]}


# insert objects

result = collection.insert_many(data)

print("Inserting four objects: ", result)

result = collection.insert_one(oneMore)

print("Inserting one more object: ", result)


## get objects

bands = collection.find()

print()
print("Retrieve all data")
print()

for b in bands:
   print(b)

print()
print()

# Find query

print("Find all bands that started their career in 1977")
print()

queryDoc = {"years.0": 1977}

queryResult = collection.find(queryDoc)

for b in queryResult:
   print(b)


# Simple aggregation pipeline

print("For bands that started in the 1960s, report start and stop years as individual keys")
print()

pipeline = [{"$match": {"$and": [{"years.0": {"$gte": 1960}}, {"years.0": {"$lt": 1970}}]}},
            {"$project": {"_id":0, "name":1,
                          "start": {"$arrayElemAt":["$years",0]}, 
                          "stop":  {"$arrayElemAt":["$years",1]}
                          }}
           ]

aggResult = collection.aggregate(pipeline)

for b in aggResult:
   print(b)

print()
print("------------- DONE ---------------")