## 9.1
import requests
from bs4 import BeautifulSoup
website_content = requests.get("http://codingsocialscience.org/").text
page = BeautifulSoup ( website_content , 'html.parser' )
for list in page.find_all('li'):
print( list )
<li class="nav-item"> <a class="nav-link page-scroll" href=".">HOME <span class="sr-only">(current)</span></a> </li> <li class="nav-item"> <a class="nav-link page-scroll" href="/book/r1/">Review book now</a> </li> <li>The blended approach used in this book reflects my teaching and situates computational methods in social research, providing both ideas on research design and hands-on tools to implement such research.</li> <li>It covers both top-level research design and more detailed approaches on how to implement and conduct research using computational methods.</li> <li>By exploring some key traditions in computer science, it helps readers to understand computational approaches and problems from a social science perspective.</li> <li>Situate computational social science research to fully appreciate its interdisciplinary nature, and understand the core challenges and benefits that computational methods provide for social sciences.</li> <li>Understand how computational processes work through improving their computational thinking. They need a short introduction to programming to help "translate" a research problem into an algorithm, and to understand how algorithms work.</li> <li>Gain familiarity with different method âfamiliesâ (data science, network analysis, simulation models, constructive work) to help understand the range of application domains and possibilities of computational methods.</li> <li>Situating Computational Social Science</li> <li>Programming and computational thinking</li> <li>Algorithmic data analysis</li> <li>Network analysis</li> <li>Simulations and complex systems</li> <li>Constructing interactive systems</li> <li>Data structures</li> <li>Best practices for software development</li> <li>Research ethics adn Computational Social Science</li> <li>Validity, reliability and computational social sciences</li> <li>Integrating Computational Methods in a Social Science Research</li> <li><a href="https://opetus.mante.li/computationalsocialscience/">Introduction to computational social science</a></li> <li><a href="https://opetus.mante.li/programming/">Programming for social scientists</a></li> <li><a href="https://opetus.mante.li/datascience/">Data Science for social scientists</a></li> <li>Politics of Human-Computer Interaction.</li>
## 9.2
import requests
from bs4 import BeautifulSoup
website_content = requests.get("https://uk.sagepub.com/").text
page = BeautifulSoup ( website_content , 'html.parser' )
image_count = 0
for image in page.find_all('img'):
image_count = image_count + 1
print( image_count )
5
## 9.3
import requests
from bs4 import BeautifulSoup
website_content = requests.get("https://www.helsinki.fi/").text
website_content = website_content.lower()
search_words = [ 'Twitter', 'Facebook', 'YouTube' ]
for search_word in search_words:
if search_word.lower() in website_content:
print("Found", search_word)
Found Twitter Found Facebook Found YouTube
## 9.4
import requests
from bs4 import BeautifulSoup
universities = ['https://www.helsinki.fi', 'https://www.aalto.fi']
results = {}
search_words = [ 'Twitter', 'Facebook', 'YouTube' ]
for university in universities:
website_content = requests.get( university ).text
website_content = website_content.lower()
res = {} ## this is for storing service, count-pairs
for search_word in search_words:
res[ search_word ] = website_content.count( search_word.lower() )
results[ university ] = res
print( results )
{'https://www.helsinki.fi': {'Twitter': 6, 'Facebook': 4, 'YouTube': 3}, 'https://www.aalto.fi': {'Twitter': 10, 'Facebook': 5, 'YouTube': 5}}
## 9.5
if False: ## automated testing fails, so remove for now
import requests
from bs4 import BeautifulSoup
start_url = "http://codingsocialscience.org/"
start_page = requests.get( start_url ).text
start_page = BeautifulSoup ( start_page , 'html.parser' )
for link in start_page.find_all('a'):
link = link['href']
## there are both internal and external links, for simplicity let's identify external links as those starting with http
if link.startswith('http'):
print( start_url + "-" + link )
followup = requests.get( link ).text
followup = BeautifulSoup( followup, 'html.parser' )
for followuplink in followup.find_all('a'):
followuplink = followuplink['href']
if followuplink.startswith('http'):
print( link + "-" + followuplink )
http://codingsocialscience.org/-https://www.helsinki.fi/en/faculty-of-social-sciences/centre-for-social-data-science-csds <a class="visually-hidden focusable" href="#main-content"> Skip to main content </a> <a id="main-content" tabindex="-1"></a>
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-35-e2f2bb5ed058> in <module> 19 for followuplink in followup.find_all('a'): 20 print(followuplink) ---> 21 followuplink = followuplink['href'] 22 if followuplink.startswith('http'): 23 print( link + "-" + followuplink ) /usr/local/lib/python3.9/site-packages/bs4/element.py in __getitem__(self, key) 1404 """tag[key] returns the value of the 'key' attribute for the Tag, 1405 and throws an exception if it's not there.""" -> 1406 return self.attrs[key] 1407 1408 def __iter__(self): KeyError: 'href'
## 9.6
import requests
import json
url = 'https://data.police.uk/api/crimes-street/all-crime?lat=51.5073&lng=-0.171505' ## latest month is shown by default, see documentation
data = requests.get( url ).text ## could just be .json() for simplicyt, but doing here step by step
data = json.loads( data )
categories = {} ## this is a megacollector
for entry in data:
category = entry['category']
if category not in categories:
categories[ category ] = 0
categories[ category ] = categories[ category ] + 1
print( categories )
{'anti-social-behaviour': 237, 'bicycle-theft': 29, 'burglary': 98, 'criminal-damage-arson': 48, 'drugs': 56, 'other-theft': 400, 'possession-of-weapons': 4, 'public-order': 102, 'robbery': 74, 'shoplifting': 86, 'theft-from-the-person': 247, 'vehicle-crime': 116, 'violent-crime': 258, 'other-crime': 8}
## 9.7
import requests
import json
url = 'http://api.worldbank.org/v2/country/fi?format=json'
data = requests.get( url ).text ## could just be .json() for simplicyt, but doing here step by step
data = json.loads( data )
print( data[1][0]['capitalCity'] )
Helsinki
## 9.8
import requests
import json
adjacents = ['se', 'no', 'ru', 'ee']
for country in adjacents:
url = 'http://api.worldbank.org/v2/country/' + country + '?format=json'
data = requests.get( url ).text ## could just be .json() for simplicy, but doing here step by step
data = json.loads( data )
print( data[1][0]['capitalCity'] )
Stockholm Oslo Moscow Tallinn
## 9.9
import requests
import json
url = 'http://api.worldbank.org/v2/country/all?format=json'
data = requests.get( url ).text ## could just be .json() for simplicy, but doing here step by step
data = json.loads( data )
## the book does not cover pagination, so let's this is not a full result
data = data[1] ## index 0 is for pagination details
for country in data:
print( country['latitude'], country['longitude'], country['incomeLevel']['value'] )
12.5167 -70.0167 High income Aggregates 34.5228 69.1761 Low income Aggregates Aggregates -8.81155 13.242 Lower middle income 41.3317 19.8172 Upper middle income 42.5075 1.5218 High income Aggregates 24.4764 54.3705 High income -34.6118 -58.4173 Upper middle income 40.1596 44.509 Upper middle income -14.2846 -170.691 Upper middle income 17.1175 -61.8456 High income -35.282 149.129 High income 48.2201 16.3798 High income 40.3834 49.8932 Upper middle income -3.3784 29.3639 Low income Aggregates Aggregates 50.8371 4.36761 High income 6.4779 2.6323 Lower middle income 12.3605 -1.53395 Low income 23.7055 90.4113 Lower middle income 42.7105 23.3238 Upper middle income Aggregates 26.1921 50.5354 High income 25.0661 -77.339 High income 43.8607 18.4214 Upper middle income Aggregates 53.9678 27.5766 Upper middle income 17.2534 -88.7713 Upper middle income Aggregates 32.3293 -64.706 High income -13.9908 -66.1936 Lower middle income -15.7801 -47.9292 Upper middle income 13.0935 -59.6105 High income 4.94199 114.946 High income Aggregates 27.5768 89.6177 Lower middle income -24.6544 25.9201 Upper middle income Aggregates 5.63056 21.6407 Low income 45.4215 -75.6919 High income Aggregates Aggregates Aggregates 46.948 7.44821 High income High income -33.475 -70.6475 High income
## 9.10
example = '127.0.0.1 - - [10/Nov/2020:08:43:18 +0200] "GET / HTTP/1.1" 200 4471 "-" "Mozilla/5.0"'
example = example.split('[')[1] ## everything to right from [
example = example.split(']')[0] ## everything to left from ]
## 10/Nov/2020:08:43:18 +0200 # you can print it out here
## print( example )
example = example.split(':')[0] ## everything left from :
date = example.split('/')
day = date[0]
month = date[1]
year = date[2]
print( year, month, day )
2020 Nov 10
## 9.11
from PyPDF2 import PdfReader
all_text = ''
reader = PdfReader('alice.pdf')
number_of_pages = len(reader.pages)
for i in range( number_of_pages ):
page = reader.pages[i]
text = page.extract_text()
all_text = all_text + text
print( all_text )
the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to he ar the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT -POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat -pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit -hole under the hedge. In another moment down went Alice after it, never once considering how in the world she was to get out again. The rabbit -hole went str aight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and no ticed that they were filled with cupboards and book -shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empt y: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. 'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave th ey'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.) Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think —' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY goo d opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) ' —yes, that's about the right distance —but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.) Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies , I think —' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) ' —but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke —fancy CURTSEYING as you're falling through the air! Do you think you could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.' Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to -night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea -time. Dinah my dear! I wish you were down here with me! Th ere are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats ? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with D inah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over. Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away wen t Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, l ow hall, which was lit up by a row of lamps hanging from the roof. There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wond ering how she was ever to get out again. Suddenly she came upon a little three -legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but , alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inche s high: she tried the little golden key in the lock, and to her great delight it fitted! Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest gard en you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only know how to begin.' For, you see, so many out -of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible. There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this tim e she found a little bottle on it, ('which certainly was not here
## 9.12
from docx import Document
all_text = ''
document = Document('alice.docx')
for paragraph in document.paragraphs:
all_text = all_text + paragraph.text
print( all_text )
the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge.In another moment down went Alice after it, never once considering how in the world she was to get out again.The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well.Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) '—yes, that's about the right distance—but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.)Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies, I think—' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) '—but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke—fancy CURTSEYING as you're falling through the air! Do you thinkyou could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.'Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to-night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over.Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof.There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again.Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted!Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only know how to begin.' For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible.There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, ('which certainly was not here
## 9.13
import requests
from bs4 import BeautifulSoup
universities = ['https://www.helsinki.fi', 'https://www.aalto.fi']
results = {}
search_words = [ 'Twitter', 'Facebook', 'YouTube' ]
for university in universities:
website_content = requests.get( university ).text
website_content = website_content.lower()
res = {} ## this is for storing service, count-pairs
for search_word in search_words:
res[ search_word ] = website_content.count( search_word.lower() )
results[ university ] = res
import json
json.dump( results, open("universities.json", "w") )
## 9.14
import json
data = json.load( open("universities.json") )
for university, results in data.items():
counts = 0
for service, count in results.items():
counts = counts + count
print( university, counts )
https://www.helsinki.fi 13 https://www.aalto.fi 20
## 9.15
import json
## this could also be a dictionary in dictionary
northwest = {}
northeast = {}
southwest = {}
southeast = {}
data = json.load( open("countries.json") )
for country in data:
income = country['income']
if country['lat'] > 0:
if country['long'] > 0:
if income not in northeast:
northeast[ income ] = 0
northeast[ income ] = northeast[ income ] + 1
else:
if income not in northwest:
northwest[ income ] = 0
northwest[ income ] = northwest[ income ] + 1
if country['lat'] < 0:
if country['long'] > 0:
if income not in southeast:
southeast[ income ] = 0
southeast[ income ] = southeast[ income ] + 1
else:
if income not in southwest:
southwest[ income ] = 0
southwest[ income ] = southwest[ income ] + 1
print( northwest, northeast, southwest, southeast )
{'High income': 6, 'Low income': 1, 'Upper middle income': 1} {'Low income': 2, 'Upper middle income': 6, 'High income': 7, 'Lower middle income': 3} {'Upper middle income': 3, 'Lower middle income': 1, 'High income': 1} {'Lower middle income': 1, 'High income': 1, 'Low income': 1, 'Upper middle income': 1}
## 9.16
import pandas
url = 'https://data.police.uk/api/crimes-street/all-crime?lat=51.5073&lng=-0.171505' ## latest month is shown by default, see documentation
dataframe = pandas.read_json(url)
## note: it is also possible to manually construct the dataframe if you wish and append, remove rows from it. this is a simple example to make it clear why one might want to use tools such as pandas
print( dataframe['category'].value_counts() )
other-theft 400 violent-crime 258 theft-from-the-person 247 anti-social-behaviour 237 vehicle-crime 116 public-order 102 burglary 98 shoplifting 86 robbery 74 drugs 56 criminal-damage-arson 48 bicycle-theft 29 other-crime 8 possession-of-weapons 4 Name: category, dtype: int64