Skip to content

Commit

Permalink
fix parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Nov 25, 2024
1 parent fc9ff3b commit 62afe10
Show file tree
Hide file tree
Showing 22 changed files with 18,434 additions and 645 deletions.
8 changes: 8 additions & 0 deletions authorutils/authorutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ func ParseName(name string) (string, string, string) {
}
}

// check for comma separated names, e.g. "Doe, John"
comma := strings.Split(name, ", ")
if len(comma) > 1 {
givenName = comma[1]
familyName = comma[0]
return givenName, familyName, ""
}

// default to the last word as family name
words := strings.Split(name, " ")
if len(words) == 1 {
Expand Down
2 changes: 2 additions & 0 deletions authorutils/authorutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ func TestParseName(t *testing.T) {
}
testCases := []testCase{
{input: "John Doe", givenName: "John", familyName: "Doe", name: ""},
{input: "Doe, John", givenName: "John", familyName: "Doe", name: ""},
{input: "Rainer Maria Rilke", givenName: "Rainer Maria", familyName: "Rilke", name: ""},
{input: "Rilke, Rainer Maria", givenName: "Rainer Maria", familyName: "Rilke", name: ""},
{input: "Harvard University", givenName: "", familyName: "", name: "Harvard University"},
{input: "LiberateScience", givenName: "", familyName: "", name: "LiberateScience"},
{input: "Jane Smith, MD", givenName: "Jane", familyName: "Smith", name: ""},
Expand Down
8 changes: 6 additions & 2 deletions cmd/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ var convertCmd = &cobra.Command{
Use: "convert",
Short: "Convert scholarly metadata from one format to another",
Long: `Convert scholarly metadata between formats. Currently
supported input formats are Crossref and DataCite DOIs, currently
the only supported output format is Commonmeta. Example usage:
supported input formats are Crossref (default) and DataCite DOIs, currently
supported output format and Commonmeta (default). Example usage:
commonmeta 10.5555/12345678`,

Expand Down Expand Up @@ -88,6 +88,8 @@ commonmeta 10.5555/12345678`,
data, err = crossrefxml.Fetch(id)
} else if from == "datacite" {
data, err = datacite.Fetch(id)
} else if from == "inveniordm" {
data, err = inveniordm.Fetch(id)
} else if from == "jsonfeed" {
data, err = jsonfeed.Fetch(id)
} else {
Expand All @@ -103,6 +105,8 @@ commonmeta 10.5555/12345678`,
data, err = crossrefxml.Load(str)
} else if from == "datacite" {
data, err = datacite.Load(str)
} else if from == "inveniordm" {
data, err = inveniordm.Load(str)
} else if from == "csl" {
data, err = csl.Load(str)
} else {
Expand Down
12 changes: 10 additions & 2 deletions cmd/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ var listCmd = &cobra.Command{
client_, _ := cmd.Flags().GetString("client")
member, _ := cmd.Flags().GetString("member")
type_, _ := cmd.Flags().GetString("type")
year, _ := cmd.Flags().GetString("year")
language, _ := cmd.Flags().GetString("language")
orcid, _ := cmd.Flags().GetString("orcid")
ror, _ := cmd.Flags().GetString("ror")
fromHost, _ := cmd.Flags().GetString("from-host")
community, _ := cmd.Flags().GetString("community")
hasORCID, _ := cmd.Flags().GetBool("has-orcid")
hasROR, _ := cmd.Flags().GetBool("has-ror-id")
hasReferences, _ := cmd.Flags().GetBool("has-references")
Expand Down Expand Up @@ -89,9 +95,11 @@ var listCmd = &cobra.Command{
} else if str != "" && from == "csl" {
data, err = csl.LoadAll(str)
} else if from == "crossref" {
data, err = crossref.FetchAll(number, member, type_, sample, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
data, err = crossref.FetchAll(number, member, type_, sample, year, ror, orcid, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
} else if from == "datacite" {
data, err = datacite.FetchAll(number, client_, type_, sample)
data, err = datacite.FetchAll(number, client_, type_, sample, year, language, orcid, ror, hasORCID, hasROR, hasRelation, hasAbstract, hasAward, hasLicense)
} else if from == "inveniordm" {
data, err = inveniordm.FetchAll(number, fromHost, community)
} else {
fmt.Println("Please provide a valid input format")
return
Expand Down
14 changes: 12 additions & 2 deletions cmd/push.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ commonmeta push --sample -f crossref -t inveniordm -h rogue-scholar.org --token
client_, _ := cmd.Flags().GetString("client")
member, _ := cmd.Flags().GetString("member")
type_, _ := cmd.Flags().GetString("type")
year, _ := cmd.Flags().GetString("year")
language, _ := cmd.Flags().GetString("language")
orcid, _ := cmd.Flags().GetString("orcid")
ror, _ := cmd.Flags().GetString("ror")
fromHost, _ := cmd.Flags().GetString("from-host")
community, _ := cmd.Flags().GetString("community")
hasORCID, _ := cmd.Flags().GetBool("has-orcid")
hasROR, _ := cmd.Flags().GetBool("has-ror-id")
hasReferences, _ := cmd.Flags().GetBool("has-references")
Expand Down Expand Up @@ -78,9 +84,11 @@ commonmeta push --sample -f crossref -t inveniordm -h rogue-scholar.org --token
}

if sample && from == "crossref" {
data, err = crossref.FetchAll(number, member, type_, sample, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
data, err = crossref.FetchAll(number, member, type_, sample, year, ror, orcid, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
} else if sample && from == "datacite" {
data, err = datacite.FetchAll(number, client_, type_, sample)
data, err = datacite.FetchAll(number, client_, type_, sample, year, language, orcid, ror, hasORCID, hasROR, hasRelation, hasAbstract, hasAward, hasLicense)
} else if from == "inveniordm" {
data, err = inveniordm.FetchAll(number, fromHost, community)
} else if str != "" && from == "commonmeta" {
data, err = commonmeta.LoadAll(str)
} else if str != "" && from == "crossref" {
Expand All @@ -89,6 +97,8 @@ commonmeta push --sample -f crossref -t inveniordm -h rogue-scholar.org --token
data, err = crossrefxml.LoadAll(str)
} else if str != "" && from == "datacite" {
data, err = datacite.LoadAll(str)
} else if str != "" && from == "inveniordm" {
data, err = inveniordm.LoadAll(str)
} else if str != "" && from == "jsonfeed" {
data, err = jsonfeed.LoadAll(str)
} else if str != "" && from == "csl" {
Expand Down
6 changes: 6 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ func init() {
rootCmd.PersistentFlags().StringP("client", "", "", "DataCite client ID")
rootCmd.PersistentFlags().StringP("member", "", "", "Crossref member ID")
rootCmd.PersistentFlags().StringP("type", "", "", "work type")
rootCmd.PersistentFlags().StringP("year", "", "", "work publication year")
rootCmd.PersistentFlags().StringP("language", "", "", "work language")
rootCmd.PersistentFlags().StringP("orcid", "", "", "ORCID ID")
rootCmd.PersistentFlags().StringP("ror", "", "", "ROR ID")
rootCmd.PersistentFlags().StringP("from-host", "", "", "from InvenioRDM host")
rootCmd.PersistentFlags().StringP("community", "", "", "InvenioRDM community slug")
rootCmd.PersistentFlags().BoolP("sample", "", false, "random sample")
rootCmd.PersistentFlags().BoolP("has-orcid", "", false, "has one or more ORCID IDs")
rootCmd.PersistentFlags().BoolP("has-ror-id", "", false, "has one or more ROR IDs")
Expand Down
8 changes: 6 additions & 2 deletions cmd/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ var sampleCmd = &cobra.Command{
client_, _ := cmd.Flags().GetString("client")
member, _ := cmd.Flags().GetString("member")
type_, _ := cmd.Flags().GetString("type")
year, _ := cmd.Flags().GetString("year")
language, _ := cmd.Flags().GetString("language")
orcid, _ := cmd.Flags().GetString("orcid")
ror, _ := cmd.Flags().GetString("ror")
hasORCID, _ := cmd.Flags().GetBool("has-orcid")
hasROR, _ := cmd.Flags().GetBool("has-ror-id")
hasReferences, _ := cmd.Flags().GetBool("has-references")
Expand All @@ -57,9 +61,9 @@ var sampleCmd = &cobra.Command{
var err error
sample := true
if from == "crossref" {
data, err = crossref.FetchAll(number, member, type_, sample, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
data, err = crossref.FetchAll(number, member, type_, sample, year, ror, orcid, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
} else if from == "datacite" {
data, err = datacite.FetchAll(number, client_, type_, sample)
data, err = datacite.FetchAll(number, client_, type_, sample, year, language, orcid, ror, hasORCID, hasROR, hasRelation, hasAbstract, hasAward, hasLicense)
}
if err != nil {
fmt.Println(err)
Expand Down
27 changes: 22 additions & 5 deletions crossref/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,10 @@ func Fetch(str string) (commonmeta.Data, error) {
}

// FetchAll gets the metadata for a list of works from the Crossref API and converts it to the Commonmeta format
func FetchAll(number int, member string, type_ string, sample bool, hasORCID bool, hasROR bool, hasReferences bool, hasRelation bool, hasAbstract bool, hasAward bool, hasLicense bool, hasArchive bool) ([]commonmeta.Data, error) {
func FetchAll(number int, member string, type_ string, sample bool, year string, ror string, orcid string, hasORCID bool, hasROR bool, hasReferences bool, hasRelation bool, hasAbstract bool, hasAward bool, hasLicense bool, hasArchive bool) ([]commonmeta.Data, error) {

var data []commonmeta.Data
content, err := GetAll(number, member, type_, sample, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
content, err := GetAll(number, member, type_, sample, year, ror, orcid, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
if err != nil {
return data, err
}
Expand Down Expand Up @@ -333,7 +333,7 @@ func Get(pid string) (Content, error) {
}

// GetAll gets the metadata for a list of works from the Crossref API
func GetAll(number int, member string, type_ string, sample bool, hasORCID bool, hasROR bool, hasReferences bool, hasRelation bool, hasAbstract bool, hasAward bool, hasLicense bool, hasArchive bool) ([]Content, error) {
func GetAll(number int, member string, type_ string, sample bool, year string, ror string, orcid string, hasORCID bool, hasROR bool, hasReferences bool, hasRelation bool, hasAbstract bool, hasAward bool, hasLicense bool, hasArchive bool) ([]Content, error) {
// the envelope for the JSON response from the Crossref API
type Response struct {
Status string `json:"status"`
Expand All @@ -351,7 +351,7 @@ func GetAll(number int, member string, type_ string, sample bool, hasORCID bool,
client := &http.Client{
Timeout: 20 * time.Second,
}
url := QueryURL(number, member, type_, sample, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
url := QueryURL(number, member, type_, sample, year, ror, orcid, hasORCID, hasROR, hasReferences, hasRelation, hasAbstract, hasAward, hasLicense, hasArchive)
req, err := http.NewRequest(http.MethodGet, url, nil)
v := "0.1"
u := "[email protected]"
Expand Down Expand Up @@ -774,7 +774,7 @@ func ReadAll(content []Content) ([]commonmeta.Data, error) {
}

// QueryURL returns the URL for the Crossref API query
func QueryURL(number int, member string, _type string, sample bool, hasORCID bool, hasROR bool, hasReferences bool, hasRelation bool, hasAbstract bool, hasAward bool, hasLicense bool, hasArchive bool) string {
func QueryURL(number int, member string, _type string, sample bool, year string, ror string, orcid string, hasORCID bool, hasROR bool, hasReferences bool, hasRelation bool, hasAbstract bool, hasAward bool, hasLicense bool, hasArchive bool) string {
types := []string{
"book",
"book-chapter",
Expand Down Expand Up @@ -826,6 +826,23 @@ func QueryURL(number int, member string, _type string, sample bool, hasORCID boo
if _type != "" && slices.Contains(types, _type) {
filters = append(filters, "type:"+_type)
}
if ror != "" {
r, _ := utils.ValidateROR(ror)
if r != "" {
filters = append(filters, "ror-id:"+r)
}
}
if orcid != "" {
o, _ := utils.ValidateORCID(orcid)
if o != "" {
filters = append(filters, "orcid:"+o)
}
}
if year != "" {
filters = append(filters, "from-pub-date:"+year+"-01-01")
filters = append(filters, "until-pub-date:"+year+"-12-31")
}

if hasORCID {
filters = append(filters, "has-orcid:true")
}
Expand Down
Loading

0 comments on commit 62afe10

Please sign in to comment.