+1 (315) 557-6473 

Create a Program to Implement Meta Data in Python Assignment Solution.


Instructions

Objective
Write a python assignment program to implement meta data.

Requirements and Specifications

program to implement meta data in python
program to implement meta data in python 1

Source Code

A3

"""CSC108: Fall 2021 -- Assignment 3: arxiv.org

This code is provided solely for the personal and private use of

students taking the CSC108/CSCA08 course at the University of

Toronto. Copying for purposes other than this use is expressly

prohibited. All forms of distribution of this code, whether as given

or with any changes, are expressly prohibited.

All of the files in this directory and all subdirectories are:

Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia

Sharmin, and Jacqueline Smith.

"""

from io import StringIO

from typing import Any, Dict

import unittest

import arxiv_functions as arxiv

import checker_generic as checker

MODULENAME = 'arxiv_functions'

PYTA_CONFIG = 'a3_pyta.json'

TARGET_LEN = 79

SEP = '='

CONSTANTS = {

'ID': 'identifier',

'TITLE': 'title',

'CREATED': 'created',

'MODIFIED': 'modified',

'AUTHORS': 'authors',

'ABSTRACT': 'abstract',

    'END': 'END'

}

DATA_FILE = """008

Intro to CS is the best course ever

2021-09-01

Ponce,Marcelo

Tafliovich,Anya Y.

We present clear evidence that Introduction to

Computer Science is the best course.

END

827

University of Toronto is the best university

2021-08-20

2021-10-02

Ponce,Marcelo

Bretscher,Anna

Tafliovich,Anya Y.

We show a formal proof that the University of

Toronto is the best university.

END

"""

DATA_DICT = {

'008': {

'identifier': '008',

'title': 'Intro to CS is the best course ever',

'created': '2021-09-01',

'modified': '',

'authors': [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')],

'abstract': '''We present clear evidence that Introduction to

Computer Science is the best course.'''},

'827': {

'identifier': '827',

'title': 'University of Toronto is the best university',

'created': '2021-08-20',

'modified': '2021-10-02',

'authors': [('Bretscher', 'Anna'),

('Ponce', 'Marcelo'),

('Tafliovich', 'Anya Y.')],

'abstract': '''We show a formal proof that the University of

Toronto is the best university.'''}

}

class CheckTest(unittest.TestCase):

"""A simple checker (NOT a full tester!) for assignment functions."""

def test_contains_keyword(self) -> None:

"""A simple check for contains_keyword."""

self._check_list_of_type(arxiv.contains_keyword,

[DATA_DICT, 'is'], str)

def test_created_in_year(self) -> None:

"""A simple check for created_in_year."""

self._check_simple_type(arxiv.created_in_year,

[DATA_DICT, '008', 2021], bool)

def test_read_arxiv_file(self) -> None:

"""A simple check for read_arxiv_file."""

print('\nChecking read_arxiv_file...')

result = checker.returns_dict_of(

arxiv.read_arxiv_file, [StringIO(DATA_FILE)], str, dict)

self.assertTrue(result[0], result[1])

valid_keys = {'identifier', 'title', 'created', 'modified',

'authors', 'abstract'}

msg = 'Value corresponding to key "{}" should be a {}.'

for article in result[1].values():

self.assertTrue(isinstance(article['authors'], list) and

_all_names(article['authors']),

msg.format('authors', 'list of names'))

for key in valid_keys - {'authors'}:

self.assertTrue(article[key] is None or

isinstance(article[key], str),

msg.format(key, 'str or None'))

print(' check complete')

def test_make_author_to_article(self) -> None:

"""A simple check for make_author_to_articles."""

print('\nChecking make_author_to_articles...')

result = checker.type_check_simple(

arxiv.make_author_to_articles, [DATA_DICT], dict)

self.assertTrue(result[0], result[1])

msg = ('make_author_to_articles should return a dict in which\n'

'keys are names (Tuple[str, str]) and values are lists of\n'

'article IDs (List[str]).')

for key, value in result[1].items():

self.assertTrue(_is_name(key) and

isinstance(value, list) and

all(isinstance(elt, str) for elt in value),

msg)

print(' check complete')

def test_get_coauthors(self) -> None:

"""A simple check for get_coauthors."""

self._check_list_of_names(arxiv.get_coauthors,

[DATA_DICT, ('Tafliovich', 'Anya Y.')])

def test_get_most_published_authors(self) -> None:

"""A simple check for get_most_published_authors."""

self._check_list_of_names(arxiv.get_most_published_authors,

[DATA_DICT])

def test_suggest_collaborators(self) -> None:

"""A simple check for suggest_collaborators."""

self._check_list_of_names(arxiv.suggest_collaborators,

[DATA_DICT, ('Tafliovich', 'Anya Y.')])

def test_has_prolific_authors(self) -> None:

"""A simple check for has_prolific_authors."""

by_author = {

('Ponce', 'Marcelo'): ['008', '827'],

('Tafliovich', 'Anya Y.'): ['008', '827'],

('Bretscher', 'Anna'): ['827']

}

self._check_simple_type(arxiv.has_prolific_authors,

[by_author, DATA_DICT['008'], 2],

bool)

def test_keep_prolific_authors(self) -> None:

"""A simple check for keep_prolific_authors: only checks that the

return value is None."""

self._check_simple_type(arxiv.keep_prolific_authors,

[DATA_DICT, 2],

type(None))

def test_check_constants(self) -> None:

"""Check that values of constants are not changed."""

print('\nChecking that constants refer to their original values')

self._check_constants(CONSTANTS, arxiv)

print(' check complete')

def _check_list_of_names(self, func: callable, args: list) -> None:

"""Check that func called with arguments args returns a

List[constants.NameType]. Display progress and outcome of the

check.

"""

print('\nChecking {}...'.format(func.__name__))

result = checker.type_check_simple(func, args, list)

self.assertTrue(result[0], result[1])

msg = ('{} should return a list of tuples of two strs:\n'

'[(lastname1, firstname1), (lastname2, firstname2), ...]\n'

'Test your function thoroughly!').format(func.__name__)

self.assertTrue(_all_names(result[1]), msg)

print(' check complete')

def _check_simple_type(self, func: callable, args: list,

expected: type) -> None:

"""Check that func called with arguments args returns a value of type

expected. Display the progress and the result of the check.

"""

print('\nChecking {}...'.format(func.__name__))

result = checker.type_check_simple(func, args, expected)

self.assertTrue(result[0], result[1])

print(' check complete')

def _check_list_of_type(self, func: callable, args: list, typ: type) -> None:

"""Check that func called with arguments args returns a list with

values of the type expected. Display the progress and the result of

the check.

"""

print('\nChecking {}...'.format(func.__name__))

result = checker.returns_list_of(func, args, typ)

self.assertTrue(result[0], result[1])

print(' check complete')

def _check_constants(self, name2value: Dict[str, Any], mod: Any) -> None:

"""Check that, for each (name, value) pair in name2value, the value of

a variable named name in module mod is value.

"""

for name, expected in name2value.items():

actual = getattr(mod, name)

msg = 'The value of {} should be {} but is {}.'.format(

name, expected, actual)

self.assertEqual(expected, actual, msg)

def _all_names(obj: Any) -> bool:

"""Return whether every item in obj is of type constants.NameType."""

return all(_is_name(name) for name in obj)

def _is_name(obj: Any) -> bool:

"""Return whether obj is a name, i.e. a Tuple[str, str]."""

return (isinstance(obj, tuple) and len(obj) == 2 and

isinstance(obj[0], str) and isinstance(obj[1], str))

checker.ensure_no_io(MODULENAME)

print(''.center(TARGET_LEN, SEP))

print(' Start: checking coding style '.center(TARGET_LEN, SEP))

checker.run_pyta(MODULENAME + '.py', PYTA_CONFIG)

print(' End checking coding style '.center(TARGET_LEN, SEP))

print(' Start: checking type contracts '.center(TARGET_LEN, SEP))

unittest.main(exit=False)

print(' End checking type contracts '.center(TARGET_LEN, SEP))

print('\nScroll up to see ALL RESULTS:')

print(' - checking coding style')

print(' - checking type contract\n')

ARXIV FUNCTION

"""CSC108: Fall 2021 -- Assignment 3: arxiv.org

This code is provided solely for the personal and private use of

students taking the CSC108/CSCA08 course at the University of

Toronto. Copying for purposes other than this use is expressly

prohibited. All forms of distribution of this code, whether as given

or with any changes, are expressly prohibited.

All of the files in this directory and all subdirectories are:

Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia

Sharmin, and Jacqueline Smith.

"""

# importing copy for use in the keep_prolific_authors docstring

# you do not need to use it anywhere else

import copy

from typing import Dict, List, TextIO

from constants import (ID, TITLE, CREATED, MODIFIED, AUTHORS, ABSTRACT, END,

NameType, ArticleValueType, ArticleType, ArxivType)

EXAMPLE_ARXIV = {

'031': {

ID: '031',

TITLE: 'Calculus is the Best Course Ever',

CREATED: '',

MODIFIED: '2021-09-02',

AUTHORS: [('Breuss', 'Nataliya')],

ABSTRACT: 'We discuss the reasons why Calculus is the best course.'},

'067': {

ID: '067',

TITLE: 'Discrete Mathematics is the Best Course Ever',

CREATED: '2021-09-02',

MODIFIED: '2021-10-01',

AUTHORS: [('Pancer', 'Richard'), ('Bretscher', 'Anna')],

ABSTRACT: ('We explain why Discrete Mathematics is the best ' +

'course of all times.')},

'827': {

ID: '827',

TITLE: 'University of Toronto is the Best University',

CREATED: '2021-08-20',

MODIFIED: '2021-10-02',

AUTHORS: [('Ponce', 'Marcelo'), ('Bretscher', 'Anna'),

('Tafliovich', 'Anya Y.')],

ABSTRACT: 'We show a formal proof that the University of\n' +

'Toronto is the best university.'},

'008': {

ID: '008',

TITLE: 'Intro to CS is the Best Course Ever',

CREATED: '2021-09-01',

MODIFIED: '',

AUTHORS: [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')],

ABSTRACT: 'We present clear evidence that Introduction to\n' + \

'Computer Science is the best course.'},

'042': {

ID: '042',

TITLE: '',

CREATED: '2021-05-04',

MODIFIED: '2021-05-05',

AUTHORS: [],

ABSTRACT: 'This is a strange article with no title\n' + \

'and no authors.\n\nIt also has a blank line in its abstract!'}

}

EXAMPLE_BY_AUTHOR = {

('Ponce', 'Marcelo'): ['008', '827'],

('Tafliovich', 'Anya Y.'): ['008', '827'],

('Bretscher', 'Anna'): ['067', '827'],

('Breuss', 'Nataliya'): ['031'],

('Pancer', 'Richard'): ['067']

}

################################################

## Task 1

################################################

# a helper to remove non-alphabetic characters

def clean_word(word: str) -> str:

"""Return word with all non-alphabetic characters removed and converted to

lowercase.

Precondition: word contains no whitespace

>>> clean_word('Hello!!!')

'hello'

>>> clean_word('12cat.dog?')

'catdog'

>>> clean_word("DON'T")

'dont'

"""

new_word = ''

for ch in word:

if ch.isalpha():

new_word = new_word + ch.lower()

return new_word

# Add your other Task 1 functions here

def created_in_year(ArxivType, id: str, year: int):

# Check if there is an article with the given id in the data

if id in ArxivType:

article = ArxivType[id]

# get date

article_date = article[CREATED]

# Split and get year

article_year = int(article_date.split("-")[0])

# Chekc if years match

return article_year == year

# If we reach this line, it is because there is no article with that id in the data, so return None

return None

def contains_keyword(id_to_article: ArxivType, word: str) -> List[str]:

# clean word

word = clean_word(word)

# Create a list to store all article with the word

result = []

# Loop through articles

for id in id_to_article:

if word in id_to_article[id][TITLE].lower() or word in id_to_article[id][ABSTRACT].lower():

result.append(id)

# Now sort list

result = sorted(result)

return result

################################################

## Task 2

################################################

def extract_article_data(data: ArxivType) -> ArticleType:

id = data[0]

title = data[1]

creation_date = data[2]

modified_date = data[3]

authors = []

if data[4] != '': # the authors line is not empty

line = data[4]

i = 4

while line != '':

author_data = line.split(",")

authors.append(tuple(author_data))

line = data[i+1]

i += 1

# We are now at an empty line

# the next line contains the abstract

abstract = ''

for j in range(i, len(data)):

abstract += data[j]

else: # there is no authors

abstract = ''

i = 5

for j in range(i, len(data)):

abstract += data[j]

# Sort authors

#authors = sorted(authors, key = lambda x: x[0])

#print(authors)

# Now build dict

article = {

ID: id,

TITLE: title,

CREATED: creation_date,

MODIFIED: modified_date,

AUTHORS: authors,

ABSTRACT: abstract

}

return article

def read_arxiv_file(f: TextIO) -> ArxivType:

"""Return a ArxivType dictionary containing the arxiv metadata in f.

Note we do not include example calls for functions that take open files.

"""

# Complete this Task 2 function

# Create Dictionary

ArxivType = dict()

# Read lines

lines = f.readlines()

# Create a list to store all lines before an 'END'

while len(lines) > 0:

line = lines.pop(0).strip()

data = []

while line != 'END':

data.append(line)

line = lines.pop(0).strip()

# Extract article

article = extract_article_data(data)

ArxivType[article[ID]] = article

# Now, sort ArxivType by title

ArxivType = dict(sorted(ArxivType.items(), key = lambda x: x[1][TITLE] if x[1][TITLE] else 'z'*100))

return ArxivType

# Add your helper functions for Task 2 here

################################################

## Task 3

################################################

def make_author_to_articles(id_to_article: ArxivType

) -> Dict[NameType, List[str]]:

"""Return a dict that maps each author name to a list (sorted in

lexicographic order) of IDs of articles written by that author,

based on the information in id_to_article.

>>> make_author_to_articles(EXAMPLE_ARXIV) == EXAMPLE_BY_AUTHOR

True

>>> make_author_to_articles({})

{}

"""

# Complete this Task 3 function

result = dict()

# Loop through articles

for id in id_to_article:

for author in id_to_article[id][AUTHORS]:

if not author in result:

result[author] = []

result[author].append(id)

# Now sort the values

for author, lst in result.items():

lst = sorted(lst)

result[author] = lst

return result

def get_coauthors(data: ArxivType, author: NameType ) -> List[NameType]:

result = list()

for id in data:

authors = data[id][AUTHORS]

if author in authors:

for author2 in authors:

if author2 != author:

result += [x for x in authors if x != author and x not in result]

# Now sort

result = sorted(result, key = lambda x: x[0])

return result

def get_most_published_authors(data: ArxivType) -> List[NameType]:

articles_count = dict()

for id in data:

authors = data[id][AUTHORS]

for author in authors:

articles_count[author] = articles_count.get(author, 0) + 1

# Now, get the max value in the dictionary (max count of articles)

max_count = max(articles_count.items(), key = lambda x: x[1])[1]

result = []

# Now, get all keys with this value. If there is just one, then one key will be returned.

# If there is more than one, then all keys with that value are returned

for key in articles_count:

if articles_count[key] == max_count:

result.append(key)

# Now sort

result = sorted(result, key=lambda x: x[0])

return result

def suggest_collaborators(data: ArxivType, author: NameType) -> List[NameType]:

# First, get author's coauthors

coauthors = get_coauthors(data, author)

# Now, for each coauthor, get its coauthors

result = []

for c_author in coauthors:

co_coauthors = get_coauthors(data, c_author)

result += [x for x in co_coauthors if x != author and x not in result and x not in coauthors]

# Sort

result = sorted(result, key=lambda x: x[0])

return result

# Add your other functions for Task 3 here

################################################

## Task 4

################################################

# Add your Task 4 functions here

def has_prolific_authors(data: Dict[NameType, List[str]], id: str, min_number: int):

# First, loop turhough data

for author in data:

# Check if the article_id is in this author's articles

if id in data[author] and len(data[author]) >= min_number:

return True

return False

def keep_prolific_authors(id_to_article: ArxivType,

min_publications: int) -> None:

"""Update id_to_article so that it contains only articles published by

authors with min_publications or more articles published. As long

as at least one of the authors has min_publications, the article

is kept.

>>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV)

>>> keep_prolific_authors(arxiv_copy, 2)

>>> len(arxiv_copy)

3

>>> '008' in arxiv_copy and '067' in arxiv_copy and '827' in arxiv_copy

True

>>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV)

>>> keep_prolific_authors(arxiv_copy, 3)

>>> arxiv_copy

{}

"""

# Complete the body of this function. We have provided this docstring to

# you so that you can use the EXAMPLE_ARXIV for testing mutation.

# Note that we do not expect you to know about the copy.deepcopy function.

# Make author to articles

author_to_article = make_author_to_articles(id_to_article)

# Define a list to store the articles to be removed

ids_to_remove = list()

# Loop through articles

for id in id_to_article:

authors = id_to_article[id][AUTHORS]

# Check if at least one author is prolific

has_prolific = False

for author in authors:

if has_prolific_authors(author_to_article, id, min_publications):

has_prolific = True

break

if not has_prolific: # There is no prolific author in this article

ids_to_remove.append(id)

# Now remove

for id in ids_to_remove:

id_to_article.pop(id, None)

if __name__ == '__main__':

# uncomment the lines below to run doctest on your code

# note that doctest requires your docstring examples to be perfectly

# formatted, and we will not be running doctest on your code

import doctest

doctest.testmod()

# uncomment the lines below to work with the small data set

example_data = open('example_data.txt')

example_arxiv = read_arxiv_file(example_data)

example_data.close()

if example_arxiv == EXAMPLE_ARXIV:

print('The dictionary you produced matches EXAMPLE_ARXIV!')

print('This is a good sign, but do more of your own testing!')

else:

# If you are getting this message, try setting a breakpoint on the

# line that calls read_arxiv_file above and running the debugger

print('Expected to get', EXAMPLE_ARXIV)

print('But got', example_arxiv)

# uncomment the lines below to work with a larger data set

data = open('data.txt')

arxiv = read_arxiv_file(data)

data.close()

author_to_articles = make_author_to_articles(arxiv)

most_published = get_most_published_authors(arxiv)

print(most_published)

print(get_coauthors(arxiv, ('Varanasi', 'Mahesh K.'))) # one

print(get_coauthors(arxiv, ('Chablat', 'Damien'))) # many

CONSTANTS

"""CSC108: Fall 2021 -- Assignment 3: arxiv.org

This code is provided solely for the personal and private use of

students taking the CSC108/CSCA08 course at the University of

Toronto. Copying for purposes other than this use is expressly

prohibited. All forms of distribution of this code, whether as given

or with any changes, are expressly prohibited.

All of the files in this directory and all subdirectories are:

Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia

Sharmin, and Jacqueline Smith.

"""

from typing import Dict, List, Tuple, Union

ID = 'identifier'

TITLE = 'title'

CREATED = 'created'

MODIFIED = 'modified'

AUTHORS = 'authors'

ABSTRACT = 'abstract'

END = 'END'

# We store names as tuples of two strs: (last-name, first-name(s)).

NameType = Tuple[str, str]

# ArticleValueType is the type for valid values in the ArticleType

# dict. All values are str, except for the value associated with

# key AUTHORS, which is a List of NameType.

# Note that we have not introduced Union - you can read it as "or"

ArticleValueType = Union[str, List[NameType]]

# ArticleType is a dict that maps keys ID, TITLE, CREATED, MODIFIED,

# AUTHORS, and ABSTRACT to their values (of type ArticleValueType).

ArticleType = Dict[str, ArticleValueType]

# ArxivType is a dict that maps article identifiers to articles,

# i.e. to values of type ArticleType.

ArxivType = Dict[str, ArticleType]