| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 
 | from urllib.parse import urlencodeimport requests
 from pyquery import PyQuery as pq
 from pymongo import MongoClient
 
 
 
 base_url = 'https://m.weibo.cn/api/container/getIndex?'
 headers ={
 'Host':'m.weibo.cn',
 'Referer':'https://m.weibo.cn/u/2830678474',
 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) \
 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
 'X-Requested-With':'XMLHttpRequest'
 }
 
 def get_page(page):
 
 params = {
 'type': 'uid',
 'value': '2830678474',
 'containerid': '1076032830678474',
 'page': page
 }
 url = base_url + urlencode(params)
 print(url)
 try:
 re = requests.get(url, headers=headers)
 if re.status_code == 200:
 htmls = re.json()
 
 return htmls
 except requests.ConnectionError as e:
 print('Error', e.args)
 
 def parse_page(htmls):
 if htmls:
 items = htmls.get('data').get('cards')
 
 for item in items:
 item = item.get('mblog')
 weibo = {}
 weibo['id'] = item.get('id')
 weibo['text'] = pq(item.get('text')).text()
 weibo['attitudes'] = item.get('attitudes_count')
 weibo['comments'] = item.get('comments_count')
 weibo['reposts_count'] = item.get('reposts_count')
 yield weibo
 
 def save_to_mongo(result):
 client = MongoClient(host='localhost', port=27017)
 db = client.admin
 collection = db.cweibo
 collection.insert_one(result)
 
 if __name__ == '__main__':
 for page in range(2,11):
 
 htmls = get_page(page)
 results  = parse_page(htmls)
 
 for result in results:
 
 save_to_mongo(result)
 
 |